All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3] whpx: Added support for breakpoints and stepping
@ 2022-02-23  5:25 Ivan Shcherbakov
  2022-02-23  9:36 ` Paolo Bonzini
  0 siblings, 1 reply; 10+ messages in thread
From: Ivan Shcherbakov @ 2022-02-23  5:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: armbru, mst

This adds support for breakpoints and stepping when debugging
WHPX-accelerated guests with gdb.
It enables reliable debugging of the Linux kernel in both single-CPU and SMP
modes.

Signed-off-by: Ivan Shcherbakov <ivan@sysprogs.com>
---
 gdbstub.c                        |  10 +
 include/exec/gdbstub.h           |   8 +
 target/i386/whpx/whpx-all.c      | 689 ++++++++++++++++++++++++++++++-
 target/i386/whpx/whpx-internal.h |  29 ++
 4 files changed, 721 insertions(+), 15 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 3c14c6a038..d30cbfa478 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -373,6 +373,12 @@ typedef struct GDBState {
 } GDBState;
 
 static GDBState gdbserver_state;
+static bool gdbserver_is_connected;
+
+bool gdb_is_connected(void)
+{
+    return gdbserver_is_connected;
+}
 
 static void init_gdbserver_state(void)
 {
@@ -3410,6 +3416,10 @@ static void gdb_chr_event(void *opaque, QEMUChrEvent
event)
         vm_stop(RUN_STATE_PAUSED);
         replay_gdb_attached();
         gdb_has_xml = false;
+        gdbserver_is_connected = true;
+        break;
+    case CHR_EVENT_CLOSED:
+        gdbserver_is_connected = false;
         break;
     default:
         break;
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index a024a0350d..0ef54cdeb5 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -188,4 +188,12 @@ extern bool gdb_has_xml;
 /* in gdbstub-xml.c, generated by scripts/feature_to_c.sh */
 extern const char *const xml_builtin[][2];
 
+/**
+ * gdb_is_connected: Check whether gdb is currently connected.
+ * This function is used to determine if gdb is currently connected to
qemu.
+ * It is used by the WHPX engine to enable interception of debug-related
+ * exceptions, when debugging with gdb, and pass them to the guest
otherwise.
+ */
+bool gdb_is_connected(void);
+
 #endif
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index 8a8b5d55d1..030988fa51 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -12,6 +12,7 @@
 #include "cpu.h"
 #include "exec/address-spaces.h"
 #include "exec/ioport.h"
+#include "exec/gdbstub.h"
 #include "qemu-common.h"
 #include "qemu/accel.h"
 #include "sysemu/whpx.h"
@@ -148,6 +149,12 @@ struct whpx_register_set {
     WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
 };
 
+enum whpx_step_mode {
+    whpx_step_none = 0,
+    /* Halt other VCPUs */
+    whpx_step_exclusive,
+};
+
 struct whpx_vcpu {
     WHV_EMULATOR_HANDLE emulator;
     bool window_registered;
@@ -156,7 +163,6 @@ struct whpx_vcpu {
     uint64_t tpr;
     uint64_t apic_base;
     bool interruption_pending;
-
     /* Must be the last field as it may have a tail */
     WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
 };
@@ -793,6 +799,515 @@ static int whpx_handle_portio(CPUState *cpu,
     return 0;
 }
 
+/*
+ * Controls whether we should intercept various exceptions on the guest,
+ * namely breakpoint/single-step events.
+ *
+ * The 'exceptions' argument accepts a bitmask, e.g:
+ * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...)
+ */
+static HRESULT whpx_set_exception_exit_bitmap(UINT64 exceptions)
+{
+    struct whpx_state *whpx = &whpx_global;
+    WHV_PARTITION_PROPERTY prop = { 0, };
+    HRESULT hr;
+
+    if (exceptions == whpx->exception_exit_bitmap) {
+        return S_OK;
+    }
+
+    prop.ExceptionExitBitmap = exceptions;
+
+    hr = whp_dispatch.WHvSetPartitionProperty(
+        whpx->partition,
+        WHvPartitionPropertyCodeExceptionExitBitmap,
+        &prop,
+        sizeof(WHV_PARTITION_PROPERTY));
+
+    if (SUCCEEDED(hr)) {
+        whpx->exception_exit_bitmap = exceptions;
+    }
+
+    return hr;
+}
+
+
+/*
+ * This function is called before/after stepping over a single instruction.
+ * It will update the CPU registers to arm/disarm the instruction stepping
+ * accordingly.
+ */
+static HRESULT whpx_vcpu_configure_single_stepping(CPUState *cpu,
+    bool set,
+    uint64_t *exit_context_rflags)
+{
+    WHV_REGISTER_NAME reg_name;
+    WHV_REGISTER_VALUE reg_value;
+    HRESULT hr;
+    struct whpx_state *whpx = &whpx_global;
+
+    /*
+     * If we are trying to step over a single instruction, we need to set
the
+     * TF bit in rflags. Otherwise, clear it.
+     */
+    reg_name = WHvX64RegisterRflags;
+    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to get rflags, hr=%08lx", hr);
+        return hr;
+    }
+
+    if (exit_context_rflags) {
+        assert(*exit_context_rflags == reg_value.Reg64);
+    }
+
+    if (set) {
+        /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction
*/
+        reg_value.Reg64 |= TF_MASK;
+    } else {
+        reg_value.Reg64 &= ~TF_MASK;
+    }
+
+    if (exit_context_rflags) {
+        *exit_context_rflags = reg_value.Reg64;
+    }
+
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+        whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set rflags,"
+            " hr=%08lx",
+            hr);
+        return hr;
+    }
+
+    reg_name = WHvRegisterInterruptState;
+    reg_value.Reg64 = 0;
+
+    /* Suspend delivery of hardware interrupts during single-stepping. */
+    reg_value.InterruptState.InterruptShadow = set != 0;
+
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+    whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set InterruptState,"
+            " hr=%08lx",
+            hr);
+        return hr;
+    }
+
+    if (!set) {
+        /*
+         * We have just finished stepping over a single instruction,
+         * and intercepted the INT1 generated by it.
+         * We need to now hide the INT1 from the guest,
+         * as it would not be expecting it.
+         */
+
+        reg_name = WHvX64RegisterPendingDebugException;
+        hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition,
+            cpu->cpu_index,
+            &reg_name,
+            1,
+            &reg_value);
+
+        if (FAILED(hr)) {
+            error_report("WHPX: Failed to get pending debug exceptions,"
+                         "hr=%08lx", hr);
+            return hr;
+        }
+
+        if (reg_value.PendingDebugException.SingleStep) {
+            reg_value.PendingDebugException.SingleStep = 0;
+
+            hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+                whpx->partition,
+                cpu->cpu_index,
+                &reg_name,
+                1,
+                &reg_value);
+
+            if (FAILED(hr)) {
+                error_report("WHPX: Failed to clear pending debug
exceptions,"
+                             "hr=%08lx", hr);
+             return hr;
+            }
+        }
+
+    }
+
+    return S_OK;
+}
+
+/* Tries to find a breakpoint at the specified address. */
+static struct whpx_breakpoint *whpx_lookup_breakpoint_by_addr(uint64_t
address)
+{
+    struct whpx_state *whpx = &whpx_global;
+    int i;
+
+    if (whpx->breakpoints.breakpoints) {
+        for (i = 0; i < whpx->breakpoints.breakpoints->used; i++) {
+            if (address == whpx->breakpoints.breakpoints->data[i].address)
{
+                return &whpx->breakpoints.breakpoints->data[i];
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/*
+ * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for
+ * debugging user-mode applications. Since the WHPX API does not offer
+ * an easy way to pass the intercepted exception back to the guest, we
+ * resort to using INT1 instead, and let the guest always handle INT3.
+ */
+static const uint8_t whpx_breakpoint_instruction = 0xF1;
+
+/*
+ * The WHPX QEMU backend implements breakpoints by writing the INT1
+ * instruction into memory (ignoring the DRx registers). This raises a few
+ * issues that need to be carefully handled:
+ *
+ * 1. Although unlikely, other parts of QEMU may set multiple breakpoints
+ *    at the same location, and later remove them in arbitrary order.
+ *    This should not cause memory corruption, and should only remove the
+ *    physical breakpoint instruction when the last QEMU breakpoint is
gone.
+ *
+ * 2. Writing arbitrary virtual memory may fail if it's not mapped to a
valid
+ *    physical location. Hence, physically adding/removing a breakpoint can
+ *    theoretically fail at any time. We need to keep track of it.
+ *
+ * The function below rebuilds a list of low-level breakpoints (one per
+ * address, tracking the original instruction and any errors) from the list
of
+ * high-level breakpoints (set via cpu_breakpoint_insert()).
+ *
+ * In order to optimize performance, this function stores the list of
+ * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the
+ * low-level ones, so that it won't be re-invoked until these breakpoints
+ * change.
+ *
+ * Note that this function decides which breakpoints should be inserted
into,
+ * memory, but doesn't actually do it. The memory accessing is done in
+ * whpx_apply_breakpoints().
+ */
+static void whpx_translate_cpu_breakpoints(
+    struct whpx_breakpoints *breakpoints,
+    CPUState *cpu,
+    int cpu_breakpoint_count)
+{
+    CPUBreakpoint *bp;
+    int cpu_bp_index = 0;
+
+    breakpoints->original_addresses =
+        g_renew(vaddr, breakpoints->original_addresses,
cpu_breakpoint_count);
+
+    breakpoints->original_address_count = cpu_breakpoint_count;
+
+    int max_breakpoints = cpu_breakpoint_count +
+        (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0);
+
+    struct whpx_breakpoint_collection *new_breakpoints =
+        (struct whpx_breakpoint_collection *)g_malloc0(
+        sizeof(struct whpx_breakpoint_collection) +
+            max_breakpoints * sizeof(struct whpx_breakpoint));
+
+    new_breakpoints->allocated = max_breakpoints;
+    new_breakpoints->used = 0;
+
+    /*
+     * 1. Preserve all old breakpoints that could not be automatically
+     * cleared when the CPU got stopped.
+     */
+    if (breakpoints->breakpoints) {
+        int i;
+        for (i = 0; i < breakpoints->breakpoints->used; i++) {
+            if (breakpoints->breakpoints->data[i].state != whpx_bp_cleared)
{
+                new_breakpoints->data[new_breakpoints->used++] =
+                    breakpoints->breakpoints->data[i];
+            }
+        }
+    }
+
+    /* 2. Map all CPU breakpoints to WHPX breakpoints */
+    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+        int i;
+        bool found = false;
+
+        /* This will be used to detect changed CPU breakpoints later. */
+        breakpoints->original_addresses[cpu_bp_index++] = bp->pc;
+
+        for (i = 0; i < new_breakpoints->used; i++) {
+            /*
+             * WARNING: This loop has O(N^2) complexity, where N is the
+             * number of breakpoints. It should not be a bottleneck in
+             * real-world scenarios, since it only needs to run once after
+             * the breakpoints have been modified.
+             * If this ever becomes a concern, it can be optimized by
storing
+             * high-level breakpoint objects in a tree or hash map.
+             */
+
+            if (new_breakpoints->data[i].address == bp->pc) {
+                /* There was already a breakpoint at this address. */
+                if (new_breakpoints->data[i].state ==
whpx_bp_clear_pending) {
+                    new_breakpoints->data[i].state = whpx_bp_set;
+                } else if (new_breakpoints->data[i].state == whpx_bp_set) {
+                    new_breakpoints->data[i].state = whpx_bp_set_pending;
+                }
+
+                found = true;
+                break;
+            }
+        }
+
+        if (!found && new_breakpoints->used < new_breakpoints->allocated) {
+            /* No WHPX breakpoint at this address. Create one. */
+            new_breakpoints->data[new_breakpoints->used].address = bp->pc;
+            new_breakpoints->data[new_breakpoints->used].state =
+                whpx_bp_set_pending;
+            new_breakpoints->used++;
+        }
+    }
+
+    if (breakpoints->breakpoints) {
+        /*
+         * Free the previous breakpoint list. This can be optimized by
keeping
+         * it as shadow buffer for the next computation instead of freeing
+         * it immediately.
+         */
+        g_free(breakpoints->breakpoints);
+    }
+
+    breakpoints->breakpoints = new_breakpoints;
+}
+
+/*
+ * Physically inserts/removes the breakpoints by reading and writing the
+ * physical memory, keeping a track of the failed attempts.
+ *
+ * Passing resuming=true  will try to set all previously unset breakpoints.
+ * Passing resuming=false will remove all inserted ones.
+ */
+static void whpx_apply_breakpoints(
+    struct whpx_breakpoint_collection *breakpoints,
+    CPUState *cpu,
+    bool resuming)
+{
+    int i, rc;
+    if (!breakpoints) {
+        return;
+    }
+
+    for (i = 0; i < breakpoints->used; i++) {
+        /* Decide what to do right now based on the last known state. */
+        enum whpx_breakpoint_state state = breakpoints->data[i].state;
+        switch (state) {
+        case whpx_bp_cleared:
+            if (resuming) {
+                state = whpx_bp_set_pending;
+            }
+            break;
+        case whpx_bp_set_pending:
+            if (!resuming) {
+                state = whpx_bp_cleared;
+            }
+            break;
+        case whpx_bp_set:
+            if (!resuming) {
+                state = whpx_bp_clear_pending;
+            }
+            break;
+        case whpx_bp_clear_pending:
+            if (resuming) {
+                state = whpx_bp_set;
+            }
+            break;
+        }
+
+        if (state == whpx_bp_set_pending) {
+            /* Remember the original instruction. */
+            rc = cpu_memory_rw_debug(cpu,
+                breakpoints->data[i].address,
+                &breakpoints->data[i].original_instruction,
+                1,
+                false);
+
+            if (!rc) {
+                /* Write the breakpoint instruction. */
+                rc = cpu_memory_rw_debug(cpu,
+                    breakpoints->data[i].address,
+                    (void *)&whpx_breakpoint_instruction,
+                    1,
+                    true);
+            }
+
+            if (!rc) {
+                state = whpx_bp_set;
+            }
+
+        }
+
+        if (state == whpx_bp_clear_pending) {
+            /* Restore the original instruction. */
+            rc = cpu_memory_rw_debug(cpu,
+                breakpoints->data[i].address,
+                &breakpoints->data[i].original_instruction,
+                1,
+                true);
+
+            if (!rc) {
+                state = whpx_bp_cleared;
+            }
+        }
+
+        breakpoints->data[i].state = state;
+    }
+}
+
+/*
+ * This function is called when the a VCPU is about to start and no other
+ * VCPUs have been started so far. Since the VCPU start order could be
+ * arbitrary, it doesn't have to be VCPU#0.
+ *
+ * It is used to commit the breakpoints into memory, and configure WHPX
+ * to intercept debug exceptions.
+ *
+ * Note that whpx_set_exception_exit_bitmap() cannot be called if one or
+ * more VCPUs are already running, so this is the best place to do it.
+ */
+static int whpx_first_vcpu_starting(CPUState *cpu)
+{
+    struct whpx_state *whpx = &whpx_global;
+    HRESULT hr;
+
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (!QTAILQ_EMPTY(&cpu->breakpoints) ||
+            (whpx->breakpoints.breakpoints &&
+             whpx->breakpoints.breakpoints->used)) {
+        CPUBreakpoint *bp;
+        int i = 0;
+        bool update_pending = false;
+
+        QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+            if (i >= whpx->breakpoints.original_address_count ||
+                bp->pc != whpx->breakpoints.original_addresses[i]) {
+                update_pending = true;
+            }
+
+            i++;
+        }
+
+        if (i != whpx->breakpoints.original_address_count) {
+            update_pending = true;
+        }
+
+        if (update_pending) {
+            /*
+             * The CPU breakpoints have changed since the last call to
+             * whpx_translate_cpu_breakpoints(). WHPX breakpoints must
+             * now be recomputed.
+             */
+            whpx_translate_cpu_breakpoints(&whpx->breakpoints, cpu, i);
+        }
+
+        /* Actually insert the breakpoints into the memory. */
+        whpx_apply_breakpoints(whpx->breakpoints.breakpoints, cpu, true);
+    }
+
+    uint64_t exception_mask;
+    if (gdb_is_connected()) {
+        /*
+         * GDB is connected. Intercept breakpoint/step events.
+         * Since we are using INT1 rather than INT3 for breakpoints,
+         * we don't need to intercept WHvX64ExceptionTypeBreakpointTrap.
+         */
+
+        exception_mask = 1UL << WHvX64ExceptionTypeDebugTrapOrFault;
+    } else {
+        /* GDB is not connected. Let the guest handle all exceptions. */
+        exception_mask = 0;
+    }
+
+    hr = whpx_set_exception_exit_bitmap(exception_mask);
+    if (!SUCCEEDED(hr)) {
+        error_report("WHPX: Failed to update exception exit mask,"
+                     "hr=%08lx.", hr);
+        return 1;
+    }
+
+    return 0;
+}
+
+/*
+ * This function is called when the last VCPU has finished running.
+ * It is used to remove any previously set breakpoints from memory.
+ */
+static int whpx_last_vcpu_stopping(CPUState *cpu)
+{
+    whpx_apply_breakpoints(whpx_global.breakpoints.breakpoints, cpu,
false);
+    return 0;
+}
+
+/* Returns the address of the next instruction that is about to be
executed. */
+static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid)
+{
+    if (cpu->vcpu_dirty) {
+        /* The CPU registers have been modified by other parts of QEMU. */
+        struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+        return env->eip;
+    } else if (exit_context_valid) {
+        /*
+         * The CPU registers have not been modified by neither other parts
+         * of QEMU, nor this port by calling
WHvSetVirtualProcessorRegisters().
+         * This is the most common case.
+         */
+        struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
+        return vcpu->exit_ctx.VpContext.Rip;
+    } else {
+        /*
+         * The CPU registers have been modified by a call to
+         * WHvSetVirtualProcessorRegisters() and must be re-queried from
+         * the target.
+         */
+        WHV_REGISTER_VALUE reg_value;
+        WHV_REGISTER_NAME reg_name = WHvX64RegisterRip;
+        HRESULT hr;
+        struct whpx_state *whpx = &whpx_global;
+
+        hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+            whpx->partition,
+            cpu->cpu_index,
+            &reg_name,
+            1,
+            &reg_value);
+
+        if (FAILED(hr)) {
+            error_report("WHPX: Failed to get PC, hr=%08lx", hr);
+            return 0;
+        }
+
+        return reg_value.Reg64;
+    }
+}
+
 static int whpx_handle_halt(CPUState *cpu)
 {
     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
@@ -1004,17 +1519,75 @@ static int whpx_vcpu_run(CPUState *cpu)
     HRESULT hr;
     struct whpx_state *whpx = &whpx_global;
     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
+    struct whpx_breakpoint *stepped_over_bp = NULL;
+    enum whpx_step_mode exclusive_step_mode = whpx_step_none;
     int ret;
 
-    whpx_vcpu_process_async_events(cpu);
-    if (cpu->halted && !whpx_apic_in_platform()) {
-        cpu->exception_index = EXCP_HLT;
-        qatomic_set(&cpu->exit_request, false);
-        return 0;
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (whpx->running_cpus++ == 0) {
+        /* Insert breakpoints into memory, update exception exit bitmap. */
+        ret = whpx_first_vcpu_starting(cpu);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+
+    if (whpx->breakpoints.breakpoints &&
+        whpx->breakpoints.breakpoints->used > 0)
+    {
+        uint64_t pc = whpx_vcpu_get_pc(cpu, true);
+        stepped_over_bp = whpx_lookup_breakpoint_by_addr(pc);
+        if (stepped_over_bp && stepped_over_bp->state != whpx_bp_set) {
+            stepped_over_bp = NULL;
+        }
+
+        if (stepped_over_bp) {
+            /*
+             * We are trying to run the instruction overwritten by an
active
+             * breakpoint. We will temporarily disable the breakpoint,
suspend
+             * other CPUs, and step over the instruction.
+             */
+            exclusive_step_mode = whpx_step_exclusive;
+        }
+    }
+
+    if (exclusive_step_mode == whpx_step_none) {
+        whpx_vcpu_process_async_events(cpu);
+        if (cpu->halted && !whpx_apic_in_platform()) {
+            cpu->exception_index = EXCP_HLT;
+            qatomic_set(&cpu->exit_request, false);
+            return 0;
+        }
     }
 
     qemu_mutex_unlock_iothread();
-    cpu_exec_start(cpu);
+
+    if (exclusive_step_mode != whpx_step_none) {
+        start_exclusive();
+        g_assert(cpu == current_cpu);
+        g_assert(!cpu->running);
+        cpu->running = true;
+
+        hr = whpx_set_exception_exit_bitmap(
+            1UL << WHvX64ExceptionTypeDebugTrapOrFault);
+        if (!SUCCEEDED(hr)) {
+            error_report("WHPX: Failed to update exception exit mask, "
+                         "hr=%08lx.", hr);
+            return 1;
+        }
+
+        if (stepped_over_bp) {
+            /* Temporarily disable the triggered breakpoint. */
+            cpu_memory_rw_debug(cpu,
+                stepped_over_bp->address,
+                &stepped_over_bp->original_instruction,
+                1,
+                true);
+        }
+    } else {
+        cpu_exec_start(cpu);
+    }
 
     do {
         if (cpu->vcpu_dirty) {
@@ -1022,10 +1595,16 @@ static int whpx_vcpu_run(CPUState *cpu)
             cpu->vcpu_dirty = false;
         }
 
-        whpx_vcpu_pre_run(cpu);
+        if (exclusive_step_mode == whpx_step_none) {
+            whpx_vcpu_pre_run(cpu);
+
+            if (qatomic_read(&cpu->exit_request)) {
+                whpx_vcpu_kick(cpu);
+            }
+        }
 
-        if (qatomic_read(&cpu->exit_request)) {
-            whpx_vcpu_kick(cpu);
+        if (exclusive_step_mode != whpx_step_none ||
cpu->singlestep_enabled) {
+            whpx_vcpu_configure_single_stepping(cpu, true, NULL);
         }
 
         hr = whp_dispatch.WHvRunVirtualProcessor(
@@ -1039,6 +1618,12 @@ static int whpx_vcpu_run(CPUState *cpu)
             break;
         }
 
+        if (exclusive_step_mode != whpx_step_none ||
cpu->singlestep_enabled) {
+            whpx_vcpu_configure_single_stepping(cpu,
+                false,
+                &vcpu->exit_ctx.VpContext.Rflags);
+        }
+
         whpx_vcpu_post_run(cpu);
 
         switch (vcpu->exit_ctx.ExitReason) {
@@ -1062,6 +1647,10 @@ static int whpx_vcpu_run(CPUState *cpu)
             break;
 
         case WHvRunVpExitReasonX64Halt:
+            /*
+             * WARNING: as of build 19043.1526 (21H1), this exit reason is
no
+             * longer used.
+             */
             ret = whpx_handle_halt(cpu);
             break;
 
@@ -1160,10 +1749,19 @@ static int whpx_vcpu_run(CPUState *cpu)
         }
 
         case WHvRunVpExitReasonCanceled:
-            cpu->exception_index = EXCP_INTERRUPT;
-            ret = 1;
+            if (exclusive_step_mode != whpx_step_none) {
+                /*
+                 * We are trying to step over a single instruction, and
+                 * likely got a request to stop from another thread.
+                 * Delay it until we are done stepping
+                 * over.
+                 */
+                ret = 0;
+            } else {
+                cpu->exception_index = EXCP_INTERRUPT;
+                ret = 1;
+            }
             break;
-
         case WHvRunVpExitReasonX64MsrAccess: {
             WHV_REGISTER_VALUE reg_values[3] = {0};
             WHV_REGISTER_NAME reg_names[3];
@@ -1267,11 +1865,36 @@ static int whpx_vcpu_run(CPUState *cpu)
             ret = 0;
             break;
         }
+        case WHvRunVpExitReasonException:
+            whpx_get_registers(cpu);
+
+            if ((vcpu->exit_ctx.VpException.ExceptionType ==
+                 WHvX64ExceptionTypeDebugTrapOrFault) &&
+                (vcpu->exit_ctx.VpException.InstructionByteCount >= 1) &&
+                (vcpu->exit_ctx.VpException.InstructionBytes[0] ==
+                 whpx_breakpoint_instruction)) {
+                /* Stopped at a software breakpoint. */
+                cpu->exception_index = EXCP_DEBUG;
+            } else if ((vcpu->exit_ctx.VpException.ExceptionType ==
+                        WHvX64ExceptionTypeDebugTrapOrFault) &&
+                       !cpu->singlestep_enabled) {
+                /*
+                 * Just finished stepping over a breakpoint, but the
+                 * gdb does not expect us to do single-stepping.
+                 * Don't do anything special.
+                 */
+                cpu->exception_index = EXCP_INTERRUPT;
+            } else {
+                /* Another exception or debug event. Report it to GDB. */
+                cpu->exception_index = EXCP_DEBUG;
+            }
+
+            ret = 1;
+            break;
         case WHvRunVpExitReasonNone:
         case WHvRunVpExitReasonUnrecoverableException:
         case WHvRunVpExitReasonInvalidVpRegisterValue:
         case WHvRunVpExitReasonUnsupportedFeature:
-        case WHvRunVpExitReasonException:
         default:
             error_report("WHPX: Unexpected VP exit code %d",
                          vcpu->exit_ctx.ExitReason);
@@ -1284,10 +1907,32 @@ static int whpx_vcpu_run(CPUState *cpu)
 
     } while (!ret);
 
-    cpu_exec_end(cpu);
+    if (stepped_over_bp) {
+        /* Restore the breakpoint we stepped over */
+        cpu_memory_rw_debug(cpu,
+            stepped_over_bp->address,
+            (void *)&whpx_breakpoint_instruction,
+            1,
+            true);
+    }
+
+    if (exclusive_step_mode != whpx_step_none) {
+        g_assert(cpu_in_exclusive_context(cpu));
+        cpu->running = false;
+        end_exclusive();
+
+        exclusive_step_mode = whpx_step_none;
+    } else {
+        cpu_exec_end(cpu);
+    }
+
     qemu_mutex_lock_iothread();
     current_cpu = cpu;
 
+    if (--whpx->running_cpus == 0) {
+        whpx_last_vcpu_stopping(cpu);
+    }
+
     qatomic_set(&cpu->exit_request, false);
 
     return ret < 0;
@@ -1846,6 +2491,7 @@ static int whpx_accel_init(MachineState *ms)
     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
     prop.ExtendedVmExits.X64MsrExit = 1;
     prop.ExtendedVmExits.X64CpuidExit = 1;
+    prop.ExtendedVmExits.ExceptionExit = 1;
     if (whpx_apic_in_platform()) {
         prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
     }
@@ -1874,6 +2520,19 @@ static int whpx_accel_init(MachineState *ms)
         goto error;
     }
 
+    /*
+     * We do not want to intercept any exceptions from the guest,
+     * until we actually start debugging with gdb.
+     */
+    whpx->exception_exit_bitmap = -1;
+    hr = whpx_set_exception_exit_bitmap(0);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx",
hr);
+        ret = -EINVAL;
+        goto error;
+    }
+
     hr = whp_dispatch.WHvSetupPartition(whpx->partition);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
diff --git a/target/i386/whpx/whpx-internal.h
b/target/i386/whpx/whpx-internal.h
index 908ababf6d..8f8e02cf9a 100644
--- a/target/i386/whpx/whpx-internal.h
+++ b/target/i386/whpx/whpx-internal.h
@@ -5,9 +5,38 @@
 #include <WinHvPlatform.h>
 #include <WinHvEmulation.h>
 
+enum whpx_breakpoint_state {
+    whpx_bp_cleared = 0,
+    whpx_bp_set_pending,
+    whpx_bp_set,
+    whpx_bp_clear_pending,
+};
+
+struct whpx_breakpoint {
+    vaddr address;
+    enum whpx_breakpoint_state state;
+    uint8_t original_instruction;
+};
+
+struct whpx_breakpoint_collection {
+    int allocated, used;
+    struct whpx_breakpoint data[0];
+};
+
+struct whpx_breakpoints {
+    int original_address_count;
+    vaddr *original_addresses;
+
+    struct whpx_breakpoint_collection *breakpoints;
+};
+
 struct whpx_state {
     uint64_t mem_quota;
     WHV_PARTITION_HANDLE partition;
+    uint64_t exception_exit_bitmap;
+    int32_t running_cpus;
+    struct whpx_breakpoints breakpoints;
+
     bool kernel_irqchip_allowed;
     bool kernel_irqchip_required;
     bool apic_in_platform;
-- 




^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-02-23  5:25 [PATCH 3/3] whpx: Added support for breakpoints and stepping Ivan Shcherbakov
@ 2022-02-23  9:36 ` Paolo Bonzini
  2022-02-23 20:05   ` Ivan Shcherbakov
  0 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2022-02-23  9:36 UTC (permalink / raw)
  To: Ivan Shcherbakov, qemu-devel; +Cc: armbru, mst

On 2/23/22 06:25, Ivan Shcherbakov wrote:
> This adds support for breakpoints and stepping when debugging
> WHPX-accelerated guests with gdb.
> It enables reliable debugging of the Linux kernel in both single-CPU and SMP
> modes.
> 
> Signed-off-by: Ivan Shcherbakov <ivan@sysprogs.com>

Hi,

in general this patch is really good work, thanks for contributing it!

Just a couple notes:

> +enum whpx_step_mode {
> +    whpx_step_none = 0,
> +    /* Halt other VCPUs */
> +    whpx_step_exclusive,
> +};

Please use

typedef enum WhpxStepMode {
     WHPX_STEP_NONE,
     WHPX_STEP_EXCLUSIVE,
} WhpxStepMode;

and likewise for WhpxBreakpointState.  (In the case of WhpxStepMode I 
would also consider simply a "bool exclusive" in whpx_cpu_run).

>   struct whpx_vcpu {
>       WHV_EMULATOR_HANDLE emulator;
>       bool window_registered;
> @@ -156,7 +163,6 @@ struct whpx_vcpu {
>       uint64_t tpr;
>       uint64_t apic_base;
>       bool interruption_pending;
> -

Please leave the empty line.

> +    if (set) {
> +        /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction
> */
> +        reg_value.Reg64 |= TF_MASK;
> +    } else {
> +        reg_value.Reg64 &= ~TF_MASK;
> +    }

Out of curiosity, does the guest see TF=1 if it single steps through a 
PUSHF (and then break horribly on POPF :))?

> +/*
> + * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for
> + * debugging user-mode applications. Since the WHPX API does not offer
> + * an easy way to pass the intercepted exception back to the guest, we
> + * resort to using INT1 instead, and let the guest always handle INT3.
> + */
> +static const uint8_t whpx_breakpoint_instruction = 0xF1;

Makes sense.

> +    breakpoints->original_addresses =
> +        g_renew(vaddr, breakpoints->original_addresses,
> cpu_breakpoint_count);
> +
> +    breakpoints->original_address_count = cpu_breakpoint_count;
> +
> +    int max_breakpoints = cpu_breakpoint_count +
> +        (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0);
> +
> +    struct whpx_breakpoint_collection *new_breakpoints =
> +        (struct whpx_breakpoint_collection *)g_malloc0(
> +        sizeof(struct whpx_breakpoint_collection) +
> +            max_breakpoints * sizeof(struct whpx_breakpoint));

> +    new_breakpoints->allocated = max_breakpoints;

Why separate the original addresses in a different array (and why the 
different logic, with used/allocated for one array and an exact size for 
the other)

> +        enum whpx_breakpoint_state state = breakpoints->data[i].state;

Same comment on coding style applies to this enum.

I would have done most changes for you, but I didn't really understand 
the breakpoints vs breakpoint collection part, so I would like your 
input on that.

I have queued the first two patches already.

Thanks!

Paolo


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-02-23  9:36 ` Paolo Bonzini
@ 2022-02-23 20:05   ` Ivan Shcherbakov
  2022-02-24  9:35     ` Peter Maydell
  0 siblings, 1 reply; 10+ messages in thread
From: Ivan Shcherbakov @ 2022-02-23 20:05 UTC (permalink / raw)
  To: 'Paolo Bonzini', qemu-devel; +Cc: armbru, mst

Hi Paolo,

Thanks for getting back to me. Please see my comments below:

>Please use WhpxStepMode and likewise for WhpxBreakpointState.
No problem, I have updated the patch.

>(In the case of WhpxStepMode I would also consider simply a "bool exclusive" in whpx_cpu_run).
This is a leftover from prior experiments with stepping into the exception handlers 
that involves reading the IDT/GDT, writing INT3 into the original handler, disabling INT1 interception
and instead intercepting INT3. It would be implemented via a third step mode. I ended up removing
it due to the unbelievable complexity of properly handling all corner cases, but I thought it would make
sense to keep it as an enum if someone else decides to add anything similar later.

>Please leave the empty line.
Oops, leftover from other experiments. Thanks for pointing out.

>Out of curiosity, does the guest see TF=1 if it single steps through a PUSHF (and then break horribly on POPF :))?
This is a very good point. It would indeed get pushed into the stack and popped back with POPF, raising another INT1.
I shouldn't be too horrible though: it would report another stop to gdb, and doing a step again would clear it.
A somewhat bigger issue would be that stepping through POPF or LAHF could reset the TF to 0, effectively ending the
single-stepping mode.

It could be addressed by emulating PUSHF/POPF, but there are a few corner cases (alignment checks, page faults on RSP,
flag translation for the virtual 8086 mode), that could make things more complicated. Also, PUSHF/POPF it not the only
special case. Stepping over IRET, into page fault handlers, and when the guest itself is running a debugger that wants
to do single-stepping would also require rather non-trivial workarounds.

I have added a detailed comment outlining the limitations of the current stepping logic and possible workarounds above
the definition of WhpxStepMode. The current implementation works like a charm for debugging real-world kernel modules
on x64 Ubuntu, and if some of these limitations end up breaking other debug scenarios, I would much more prefer
addressing specific narrow issues, than adding another layer of complexity proactively, if that's OK with you. 

I have taken special care to make sure the new functionality won't cause any regressions when not debugging.
The intercepted exception mask is set to 0 when gdb is not connected, 100% matching the behavior prior to the patch.

>Why separate the original addresses in a different array
This accommodates the case when different parts of QEMU would set multiple breakpoints at the same location, or when
a breakpoint removed on the QEMU level could not be removed from RAM (e.g. the page got swapped out). 
Synchronizing the QEMU breakpoint list with the RAM breakpoint list currently has O(N^2) complexity. However, in many
cases (e.g. stopping to handle timers), the QEMU breakpoints won't change between the invocations, so we can just do a
quick O(N) comparison of the old breakpoint list vs. new breakpoint list, and avoid the O(N^2) part. You can find this logic
by searching for the 'update_pending' variable.

This could be avoided if cpu_breakpoint_insert() or gdb_breakpoint_insert() could invoke a WHPX-specific handler,
similar to what is currently done with kvm_insert_breakpoint(), or a generic callback via AccelOpsClass, that could just
mark the RAM breakpoint list dirty. But I didn't want to introduce unnecessary changes outside the WHPX module.

>(and why the different logic, with used/allocated for one array and an exact size for the other)
When we are rebuilding the memory breakpoint list, we don't know how many of the CPU breakpoints will refer to the same
memory addresses, or overlap with the breakpoints that could not be removed. So, we allocate for the worst case, and keep
a track of the actually created entries in the 'used' field.

Updated patch below. Let me know if you have any further concerns and I will be happy to address them.

This adds support for breakpoints and stepping when debugging WHPX-accelerated guests with gdb.
It enables reliable debugging of the Linux kernel in both single-CPU and SMP modes.

Signed-off-by: Ivan Shcherbakov <ivan@sysprogs.com>
---
 gdbstub.c                        |  10 +
 include/exec/gdbstub.h           |   8 +
 target/i386/whpx/whpx-all.c      | 763 ++++++++++++++++++++++++++++++-
 target/i386/whpx/whpx-internal.h |  29 ++
 4 files changed, 796 insertions(+), 14 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 3c14c6a038..d30cbfa478 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -373,6 +373,12 @@ typedef struct GDBState {
 } GDBState;
 
 static GDBState gdbserver_state;
+static bool gdbserver_is_connected;
+
+bool gdb_is_connected(void)
+{
+    return gdbserver_is_connected;
+}
 
 static void init_gdbserver_state(void)
 {
@@ -3410,6 +3416,10 @@ static void gdb_chr_event(void *opaque, QEMUChrEvent event)
         vm_stop(RUN_STATE_PAUSED);
         replay_gdb_attached();
         gdb_has_xml = false;
+        gdbserver_is_connected = true;
+        break;
+    case CHR_EVENT_CLOSED:
+        gdbserver_is_connected = false;
         break;
     default:
         break;
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index a024a0350d..0ef54cdeb5 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -188,4 +188,12 @@ extern bool gdb_has_xml;
 /* in gdbstub-xml.c, generated by scripts/feature_to_c.sh */
 extern const char *const xml_builtin[][2];
 
+/**
+ * gdb_is_connected: Check whether gdb is currently connected.
+ * This function is used to determine if gdb is currently connected to qemu.
+ * It is used by the WHPX engine to enable interception of debug-related
+ * exceptions, when debugging with gdb, and pass them to the guest otherwise.
+ */
+bool gdb_is_connected(void);
+
 #endif
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index 8a8b5d55d1..df47fb60b6 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -12,6 +12,7 @@
 #include "cpu.h"
 #include "exec/address-spaces.h"
 #include "exec/ioport.h"
+#include "exec/gdbstub.h"
 #include "qemu-common.h"
 #include "qemu/accel.h"
 #include "sysemu/whpx.h"
@@ -148,6 +149,87 @@ struct whpx_register_set {
     WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
 };
 
+/*
+ * The current implementation of instruction stepping sets the TF flag
+ * in RFLAGS, causing the CPU to raise an INT1 after each instruction.
+ * This corresponds to the WHvX64ExceptionTypeDebugTrapOrFault exception.
+ *
+ * This approach has a few limitations:
+ *     1. Stepping over a PUSHF/SAHF instruction will save the TF flag
+ *        along with the other flags, possibly restoring it later. It would
+ *        result in another INT1 when the flags are restored, triggering
+ *        a stop in gdb that could be cleared by doing another step.
+ *
+ *        Stepping over a POPF/LAHF instruction will let it overwrite the
+ *        TF flags, ending the stepping mode.
+ *
+ *     2. Stepping over an instruction raising an exception (e.g. INT, DIV,
+ *        or anything that could result in a page fault) will save the flags
+ *        to the stack, clear the TF flag, and let the guest execute the
+ *        handler. Normally, the guest will restore the original flags,
+ *        that will continue single-stepping.
+ *
+ *     3. Debuggers running on the guest may wish to set TF to do instruction
+ *        stepping. INT1 events generated by it would be intercepted by us,
+ *        as long as the gdb is connected to QEMU.
+ *
+ * In practice this means that:
+ *     1. Stepping through flags-modifying instructions may cause gdb to
+ *        continue or stop in unexpected places. This will be fully recoverable
+ *        and will not crash the target.
+ *
+ *     2. Stepping over an instruction that triggers an exception will step
+ *        over the exception handler, not into it.
+ *
+ *     3. Debugging the guest via gdb, while running debugger on the guest
+ *        at the same time may lead to unexpected effects. Detaching gdb from
+ *        QEMU will prevent any further interference with the guest-level
+ *        debuggers.
+ *
+ * The limitations can be addressed as shown below:
+ *     1. PUSHF/SAHF/POPF/LAHF/IRET instructions can be emulated instead of
+ *        stepping through them. The exact semantics of the instructions is
+ *        defined in the "Combined Volume Set of Intel 64 and IA-32 
+ *        Architectures Software Developer's Manuals", however it involves a
+ *        fair amount of corner cases due to compatibility with real mode,
+ *        virtual 8086 mode, and differences between 64-bit and 32-bit modes.
+ *
+ *     2. We could step into the guest's exception handlers using the following
+ *        sequence:
+ *          a. Temporarily enable catching of all exception types via
+ *             whpx_set_exception_exit_bitmap().
+ *          b. Once an exception is intercepted, read the IDT/GDT and locate
+ *             the original handler.
+ *          c. Patch the original handler, injecting an INT3 at the beginning.
+ *          d. Update the exception exit bitmap to only catch the
+ *             WHvX64ExceptionTypeBreakpointTrap exception.
+ *          e. Let the affected CPU run in the exclusive mode.
+ *          f. Restore the original handler and the exception exit bitmap.
+ *        Note that handling all corner cases related to IDT/GDT is harder 
+ *        than it may seem. See x86_cpu_get_phys_page_attrs_debug() for a 
+ *        rough idea.
+ *
+ *     3. In order to properly support guest-level debugging in parallel with
+ *        the QEMU-level debugging, we would need to be able to pass some INT1
+ *        events to the guest. This could be done via the following methods:
+ *          a. Using the WHvRegisterPendingEvent register. As of Windows 21H1,
+ *             it seems to only work for interrupts and not software
+ *             exceptions.
+ *          b. Locating and patching the original handler by parsing IDT/GDT.
+ *             This involves relatively complex logic outlined in the previous
+ *             paragraph.
+ *          c. Emulating the exception invocation (i.e. manually updating RIP,
+ *             RFLAGS, and pushing the old values to stack). This is even more
+ *             complicated than the previous option, since it involves checking
+ *             CPL, gate attributes, and doing various adjustments depending
+ *             on the current CPU mode, whether the CPL is changing, etc.
+ */
+typedef enum WhpxStepMode {
+    WHPX_STEP_NONE = 0,
+    /* Halt other VCPUs */
+    WHPX_STEP_EXCLUSIVE,
+} WhpxStepMode;
+
 struct whpx_vcpu {
     WHV_EMULATOR_HANDLE emulator;
     bool window_registered;
@@ -793,6 +875,515 @@ static int whpx_handle_portio(CPUState *cpu,
     return 0;
 }
 
+/*
+ * Controls whether we should intercept various exceptions on the guest,
+ * namely breakpoint/single-step events.
+ *
+ * The 'exceptions' argument accepts a bitmask, e.g:
+ * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...)
+ */
+static HRESULT whpx_set_exception_exit_bitmap(UINT64 exceptions)
+{
+    struct whpx_state *whpx = &whpx_global;
+    WHV_PARTITION_PROPERTY prop = { 0, };
+    HRESULT hr;
+
+    if (exceptions == whpx->exception_exit_bitmap) {
+        return S_OK;
+    }
+
+    prop.ExceptionExitBitmap = exceptions;
+
+    hr = whp_dispatch.WHvSetPartitionProperty(
+        whpx->partition,
+        WHvPartitionPropertyCodeExceptionExitBitmap,
+        &prop,
+        sizeof(WHV_PARTITION_PROPERTY));
+
+    if (SUCCEEDED(hr)) {
+        whpx->exception_exit_bitmap = exceptions;
+    }
+
+    return hr;
+}
+
+
+/*
+ * This function is called before/after stepping over a single instruction.
+ * It will update the CPU registers to arm/disarm the instruction stepping
+ * accordingly.
+ */
+static HRESULT whpx_vcpu_configure_single_stepping(CPUState *cpu,
+    bool set,
+    uint64_t *exit_context_rflags)
+{
+    WHV_REGISTER_NAME reg_name;
+    WHV_REGISTER_VALUE reg_value;
+    HRESULT hr;
+    struct whpx_state *whpx = &whpx_global;
+
+    /*
+     * If we are trying to step over a single instruction, we need to set the
+     * TF bit in rflags. Otherwise, clear it.
+     */
+    reg_name = WHvX64RegisterRflags;
+    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to get rflags, hr=%08lx", hr);
+        return hr;
+    }
+
+    if (exit_context_rflags) {
+        assert(*exit_context_rflags == reg_value.Reg64);
+    }
+
+    if (set) {
+        /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction */
+        reg_value.Reg64 |= TF_MASK;
+    } else {
+        reg_value.Reg64 &= ~TF_MASK;
+    }
+
+    if (exit_context_rflags) {
+        *exit_context_rflags = reg_value.Reg64;
+    }
+
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+        whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set rflags,"
+            " hr=%08lx",
+            hr);
+        return hr;
+    }
+
+    reg_name = WHvRegisterInterruptState;
+    reg_value.Reg64 = 0;
+
+    /* Suspend delivery of hardware interrupts during single-stepping. */
+    reg_value.InterruptState.InterruptShadow = set != 0;
+
+    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+    whpx->partition,
+        cpu->cpu_index,
+        &reg_name,
+        1,
+        &reg_value);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set InterruptState,"
+            " hr=%08lx",
+            hr);
+        return hr;
+    }
+
+    if (!set) {
+        /*
+         * We have just finished stepping over a single instruction,
+         * and intercepted the INT1 generated by it.
+         * We need to now hide the INT1 from the guest,
+         * as it would not be expecting it.
+         */
+
+        reg_name = WHvX64RegisterPendingDebugException;
+        hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+        whpx->partition,
+            cpu->cpu_index,
+            &reg_name,
+            1,
+            &reg_value);
+
+        if (FAILED(hr)) {
+            error_report("WHPX: Failed to get pending debug exceptions,"
+                         "hr=%08lx", hr);
+            return hr;
+        }
+
+        if (reg_value.PendingDebugException.SingleStep) {
+            reg_value.PendingDebugException.SingleStep = 0;
+
+            hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
+                whpx->partition,
+                cpu->cpu_index,
+                &reg_name,
+                1,
+                &reg_value);
+
+            if (FAILED(hr)) {
+                error_report("WHPX: Failed to clear pending debug exceptions,"
+                             "hr=%08lx", hr);
+             return hr;
+            }
+        }
+
+    }
+
+    return S_OK;
+}
+
+/* Tries to find a breakpoint at the specified address. */
+static struct whpx_breakpoint *whpx_lookup_breakpoint_by_addr(uint64_t address)
+{
+    struct whpx_state *whpx = &whpx_global;
+    int i;
+
+    if (whpx->breakpoints.breakpoints) {
+        for (i = 0; i < whpx->breakpoints.breakpoints->used; i++) {
+            if (address == whpx->breakpoints.breakpoints->data[i].address) {
+                return &whpx->breakpoints.breakpoints->data[i];
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/*
+ * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for
+ * debugging user-mode applications. Since the WHPX API does not offer
+ * an easy way to pass the intercepted exception back to the guest, we
+ * resort to using INT1 instead, and let the guest always handle INT3.
+ */
+static const uint8_t whpx_breakpoint_instruction = 0xF1;
+
+/*
+ * The WHPX QEMU backend implements breakpoints by writing the INT1
+ * instruction into memory (ignoring the DRx registers). This raises a few
+ * issues that need to be carefully handled:
+ *
+ * 1. Although unlikely, other parts of QEMU may set multiple breakpoints
+ *    at the same location, and later remove them in arbitrary order.
+ *    This should not cause memory corruption, and should only remove the
+ *    physical breakpoint instruction when the last QEMU breakpoint is gone.
+ *
+ * 2. Writing arbitrary virtual memory may fail if it's not mapped to a valid
+ *    physical location. Hence, physically adding/removing a breakpoint can
+ *    theoretically fail at any time. We need to keep track of it.
+ *
+ * The function below rebuilds a list of low-level breakpoints (one per
+ * address, tracking the original instruction and any errors) from the list of
+ * high-level breakpoints (set via cpu_breakpoint_insert()).
+ *
+ * In order to optimize performance, this function stores the list of
+ * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the
+ * low-level ones, so that it won't be re-invoked until these breakpoints
+ * change.
+ *
+ * Note that this function decides which breakpoints should be inserted into,
+ * memory, but doesn't actually do it. The memory accessing is done in
+ * whpx_apply_breakpoints().
+ */
+static void whpx_translate_cpu_breakpoints(
+    struct whpx_breakpoints *breakpoints,
+    CPUState *cpu,
+    int cpu_breakpoint_count)
+{
+    CPUBreakpoint *bp;
+    int cpu_bp_index = 0;
+
+    breakpoints->original_addresses =
+        g_renew(vaddr, breakpoints->original_addresses, cpu_breakpoint_count);
+
+    breakpoints->original_address_count = cpu_breakpoint_count;
+
+    int max_breakpoints = cpu_breakpoint_count +
+        (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0);
+
+    struct whpx_breakpoint_collection *new_breakpoints =
+        (struct whpx_breakpoint_collection *)g_malloc0(
+        sizeof(struct whpx_breakpoint_collection) +
+            max_breakpoints * sizeof(struct whpx_breakpoint));
+
+    new_breakpoints->allocated = max_breakpoints;
+    new_breakpoints->used = 0;
+
+    /*
+     * 1. Preserve all old breakpoints that could not be automatically
+     * cleared when the CPU got stopped.
+     */
+    if (breakpoints->breakpoints) {
+        int i;
+        for (i = 0; i < breakpoints->breakpoints->used; i++) {
+            if (breakpoints->breakpoints->data[i].state != WHPX_BP_CLEARED) {
+                new_breakpoints->data[new_breakpoints->used++] =
+                    breakpoints->breakpoints->data[i];
+            }
+        }
+    }
+
+    /* 2. Map all CPU breakpoints to WHPX breakpoints */
+    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+        int i;
+        bool found = false;
+
+        /* This will be used to detect changed CPU breakpoints later. */
+        breakpoints->original_addresses[cpu_bp_index++] = bp->pc;
+
+        for (i = 0; i < new_breakpoints->used; i++) {
+            /*
+             * WARNING: This loop has O(N^2) complexity, where N is the
+             * number of breakpoints. It should not be a bottleneck in
+             * real-world scenarios, since it only needs to run once after
+             * the breakpoints have been modified.
+             * If this ever becomes a concern, it can be optimized by storing
+             * high-level breakpoint objects in a tree or hash map.
+             */
+
+            if (new_breakpoints->data[i].address == bp->pc) {
+                /* There was already a breakpoint at this address. */
+                if (new_breakpoints->data[i].state == WHPX_BP_CLEAR_PENDING) {
+                    new_breakpoints->data[i].state = WHPX_BP_SET;
+                } else if (new_breakpoints->data[i].state == WHPX_BP_SET) {
+                    new_breakpoints->data[i].state = WHPX_BP_SET_PENDING;
+                }
+
+                found = true;
+                break;
+            }
+        }
+
+        if (!found && new_breakpoints->used < new_breakpoints->allocated) {
+            /* No WHPX breakpoint at this address. Create one. */
+            new_breakpoints->data[new_breakpoints->used].address = bp->pc;
+            new_breakpoints->data[new_breakpoints->used].state =
+                WHPX_BP_SET_PENDING;
+            new_breakpoints->used++;
+        }
+    }
+
+    if (breakpoints->breakpoints) {
+        /*
+         * Free the previous breakpoint list. This can be optimized by keeping
+         * it as shadow buffer for the next computation instead of freeing
+         * it immediately.
+         */
+        g_free(breakpoints->breakpoints);
+    }
+
+    breakpoints->breakpoints = new_breakpoints;
+}
+
+/*
+ * Physically inserts/removes the breakpoints by reading and writing the
+ * physical memory, keeping a track of the failed attempts.
+ *
+ * Passing resuming=true  will try to set all previously unset breakpoints.
+ * Passing resuming=false will remove all inserted ones.
+ */
+static void whpx_apply_breakpoints(
+    struct whpx_breakpoint_collection *breakpoints,
+    CPUState *cpu,
+    bool resuming)
+{
+    int i, rc;
+    if (!breakpoints) {
+        return;
+    }
+
+    for (i = 0; i < breakpoints->used; i++) {
+        /* Decide what to do right now based on the last known state. */
+        WhpxBreakpointState state = breakpoints->data[i].state;
+        switch (state) {
+        case WHPX_BP_CLEARED:
+            if (resuming) {
+                state = WHPX_BP_SET_PENDING;
+            }
+            break;
+        case WHPX_BP_SET_PENDING:
+            if (!resuming) {
+                state = WHPX_BP_CLEARED;
+            }
+            break;
+        case WHPX_BP_SET:
+            if (!resuming) {
+                state = WHPX_BP_CLEAR_PENDING;
+            }
+            break;
+        case WHPX_BP_CLEAR_PENDING:
+            if (resuming) {
+                state = WHPX_BP_SET;
+            }
+            break;
+        }
+
+        if (state == WHPX_BP_SET_PENDING) {
+            /* Remember the original instruction. */
+            rc = cpu_memory_rw_debug(cpu,
+                breakpoints->data[i].address,
+                &breakpoints->data[i].original_instruction,
+                1,
+                false);
+
+            if (!rc) {
+                /* Write the breakpoint instruction. */
+                rc = cpu_memory_rw_debug(cpu,
+                    breakpoints->data[i].address,
+                    (void *)&whpx_breakpoint_instruction,
+                    1,
+                    true);
+            }
+
+            if (!rc) {
+                state = WHPX_BP_SET;
+            }
+
+        }
+
+        if (state == WHPX_BP_CLEAR_PENDING) {
+            /* Restore the original instruction. */
+            rc = cpu_memory_rw_debug(cpu,
+                breakpoints->data[i].address,
+                &breakpoints->data[i].original_instruction,
+                1,
+                true);
+
+            if (!rc) {
+                state = WHPX_BP_CLEARED;
+            }
+        }
+
+        breakpoints->data[i].state = state;
+    }
+}
+
+/*
+ * This function is called when the a VCPU is about to start and no other
+ * VCPUs have been started so far. Since the VCPU start order could be
+ * arbitrary, it doesn't have to be VCPU#0.
+ *
+ * It is used to commit the breakpoints into memory, and configure WHPX
+ * to intercept debug exceptions.
+ *
+ * Note that whpx_set_exception_exit_bitmap() cannot be called if one or
+ * more VCPUs are already running, so this is the best place to do it.
+ */
+static int whpx_first_vcpu_starting(CPUState *cpu)
+{
+    struct whpx_state *whpx = &whpx_global;
+    HRESULT hr;
+
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (!QTAILQ_EMPTY(&cpu->breakpoints) ||
+            (whpx->breakpoints.breakpoints &&
+             whpx->breakpoints.breakpoints->used)) {
+        CPUBreakpoint *bp;
+        int i = 0;
+        bool update_pending = false;
+
+        QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+            if (i >= whpx->breakpoints.original_address_count ||
+                bp->pc != whpx->breakpoints.original_addresses[i]) {
+                update_pending = true;
+            }
+
+            i++;
+        }
+
+        if (i != whpx->breakpoints.original_address_count) {
+            update_pending = true;
+        }
+
+        if (update_pending) {
+            /*
+             * The CPU breakpoints have changed since the last call to
+             * whpx_translate_cpu_breakpoints(). WHPX breakpoints must
+             * now be recomputed.
+             */
+            whpx_translate_cpu_breakpoints(&whpx->breakpoints, cpu, i);
+        }
+
+        /* Actually insert the breakpoints into the memory. */
+        whpx_apply_breakpoints(whpx->breakpoints.breakpoints, cpu, true);
+    }
+
+    uint64_t exception_mask;
+    if (gdb_is_connected()) {
+        /*
+         * GDB is connected. Intercept breakpoint/step events.
+         * Since we are using INT1 rather than INT3 for breakpoints,
+         * we don't need to intercept WHvX64ExceptionTypeBreakpointTrap.
+         */
+
+        exception_mask = 1UL << WHvX64ExceptionTypeDebugTrapOrFault;
+    } else {
+        /* GDB is not connected. Let the guest handle all exceptions. */
+        exception_mask = 0;
+    }
+
+    hr = whpx_set_exception_exit_bitmap(exception_mask);
+    if (!SUCCEEDED(hr)) {
+        error_report("WHPX: Failed to update exception exit mask,"
+                     "hr=%08lx.", hr);
+        return 1;
+    }
+
+    return 0;
+}
+
+/*
+ * This function is called when the last VCPU has finished running.
+ * It is used to remove any previously set breakpoints from memory.
+ */
+static int whpx_last_vcpu_stopping(CPUState *cpu)
+{
+    whpx_apply_breakpoints(whpx_global.breakpoints.breakpoints, cpu, false);
+    return 0;
+}
+
+/* Returns the address of the next instruction that is about to be executed. */
+static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid)
+{
+    if (cpu->vcpu_dirty) {
+        /* The CPU registers have been modified by other parts of QEMU. */
+        struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+        return env->eip;
+    } else if (exit_context_valid) {
+        /*
+         * The CPU registers have not been modified by neither other parts
+         * of QEMU, nor this port by calling WHvSetVirtualProcessorRegisters().
+         * This is the most common case.
+         */
+        struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
+        return vcpu->exit_ctx.VpContext.Rip;
+    } else {
+        /*
+         * The CPU registers have been modified by a call to
+         * WHvSetVirtualProcessorRegisters() and must be re-queried from
+         * the target.
+         */
+        WHV_REGISTER_VALUE reg_value;
+        WHV_REGISTER_NAME reg_name = WHvX64RegisterRip;
+        HRESULT hr;
+        struct whpx_state *whpx = &whpx_global;
+
+        hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
+            whpx->partition,
+            cpu->cpu_index,
+            &reg_name,
+            1,
+            &reg_value);
+
+        if (FAILED(hr)) {
+            error_report("WHPX: Failed to get PC, hr=%08lx", hr);
+            return 0;
+        }
+
+        return reg_value.Reg64;
+    }
+}
+
 static int whpx_handle_halt(CPUState *cpu)
 {
     struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
@@ -1004,17 +1595,75 @@ static int whpx_vcpu_run(CPUState *cpu)
     HRESULT hr;
     struct whpx_state *whpx = &whpx_global;
     struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
+    struct whpx_breakpoint *stepped_over_bp = NULL;
+    WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE;
     int ret;
 
-    whpx_vcpu_process_async_events(cpu);
-    if (cpu->halted && !whpx_apic_in_platform()) {
-        cpu->exception_index = EXCP_HLT;
-        qatomic_set(&cpu->exit_request, false);
-        return 0;
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (whpx->running_cpus++ == 0) {
+        /* Insert breakpoints into memory, update exception exit bitmap. */
+        ret = whpx_first_vcpu_starting(cpu);
+        if (ret != 0) {
+            return ret;
+        }
+    }
+
+    if (whpx->breakpoints.breakpoints &&
+        whpx->breakpoints.breakpoints->used > 0)
+    {
+        uint64_t pc = whpx_vcpu_get_pc(cpu, true);
+        stepped_over_bp = whpx_lookup_breakpoint_by_addr(pc);
+        if (stepped_over_bp && stepped_over_bp->state != WHPX_BP_SET) {
+            stepped_over_bp = NULL;
+        }
+
+        if (stepped_over_bp) {
+            /*
+             * We are trying to run the instruction overwritten by an active
+             * breakpoint. We will temporarily disable the breakpoint, suspend
+             * other CPUs, and step over the instruction.
+             */
+            exclusive_step_mode = WHPX_STEP_EXCLUSIVE;
+        }
+    }
+
+    if (exclusive_step_mode == WHPX_STEP_NONE) {
+        whpx_vcpu_process_async_events(cpu);
+        if (cpu->halted && !whpx_apic_in_platform()) {
+            cpu->exception_index = EXCP_HLT;
+            qatomic_set(&cpu->exit_request, false);
+            return 0;
+        }
     }
 
     qemu_mutex_unlock_iothread();
-    cpu_exec_start(cpu);
+
+    if (exclusive_step_mode != WHPX_STEP_NONE) {
+        start_exclusive();
+        g_assert(cpu == current_cpu);
+        g_assert(!cpu->running);
+        cpu->running = true;
+
+        hr = whpx_set_exception_exit_bitmap(
+            1UL << WHvX64ExceptionTypeDebugTrapOrFault);
+        if (!SUCCEEDED(hr)) {
+            error_report("WHPX: Failed to update exception exit mask, "
+                         "hr=%08lx.", hr);
+            return 1;
+        }
+
+        if (stepped_over_bp) {
+            /* Temporarily disable the triggered breakpoint. */
+            cpu_memory_rw_debug(cpu,
+                stepped_over_bp->address,
+                &stepped_over_bp->original_instruction,
+                1,
+                true);
+        }
+    } else {
+        cpu_exec_start(cpu);
+    }
 
     do {
         if (cpu->vcpu_dirty) {
@@ -1022,10 +1671,16 @@ static int whpx_vcpu_run(CPUState *cpu)
             cpu->vcpu_dirty = false;
         }
 
-        whpx_vcpu_pre_run(cpu);
+        if (exclusive_step_mode == WHPX_STEP_NONE) {
+            whpx_vcpu_pre_run(cpu);
 
-        if (qatomic_read(&cpu->exit_request)) {
-            whpx_vcpu_kick(cpu);
+            if (qatomic_read(&cpu->exit_request)) {
+                whpx_vcpu_kick(cpu);
+            }
+        }
+
+        if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
+            whpx_vcpu_configure_single_stepping(cpu, true, NULL);
         }
 
         hr = whp_dispatch.WHvRunVirtualProcessor(
@@ -1039,6 +1694,12 @@ static int whpx_vcpu_run(CPUState *cpu)
             break;
         }
 
+        if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
+            whpx_vcpu_configure_single_stepping(cpu,
+                false,
+                &vcpu->exit_ctx.VpContext.Rflags);
+        }
+
         whpx_vcpu_post_run(cpu);
 
         switch (vcpu->exit_ctx.ExitReason) {
@@ -1062,6 +1723,10 @@ static int whpx_vcpu_run(CPUState *cpu)
             break;
 
         case WHvRunVpExitReasonX64Halt:
+            /*
+             * WARNING: as of build 19043.1526 (21H1), this exit reason is no
+             * longer used.
+             */
             ret = whpx_handle_halt(cpu);
             break;
 
@@ -1160,10 +1825,19 @@ static int whpx_vcpu_run(CPUState *cpu)
         }
 
         case WHvRunVpExitReasonCanceled:
-            cpu->exception_index = EXCP_INTERRUPT;
-            ret = 1;
+            if (exclusive_step_mode != WHPX_STEP_NONE) {
+                /*
+                 * We are trying to step over a single instruction, and
+                 * likely got a request to stop from another thread.
+                 * Delay it until we are done stepping
+                 * over.
+                 */
+                ret = 0;
+            } else {
+                cpu->exception_index = EXCP_INTERRUPT;
+                ret = 1;
+            }
             break;
-
         case WHvRunVpExitReasonX64MsrAccess: {
             WHV_REGISTER_VALUE reg_values[3] = {0};
             WHV_REGISTER_NAME reg_names[3];
@@ -1267,11 +1941,36 @@ static int whpx_vcpu_run(CPUState *cpu)
             ret = 0;
             break;
         }
+        case WHvRunVpExitReasonException:
+            whpx_get_registers(cpu);
+
+            if ((vcpu->exit_ctx.VpException.ExceptionType ==
+                 WHvX64ExceptionTypeDebugTrapOrFault) &&
+                (vcpu->exit_ctx.VpException.InstructionByteCount >= 1) &&
+                (vcpu->exit_ctx.VpException.InstructionBytes[0] ==
+                 whpx_breakpoint_instruction)) {
+                /* Stopped at a software breakpoint. */
+                cpu->exception_index = EXCP_DEBUG;
+            } else if ((vcpu->exit_ctx.VpException.ExceptionType ==
+                        WHvX64ExceptionTypeDebugTrapOrFault) &&
+                       !cpu->singlestep_enabled) {
+                /*
+                 * Just finished stepping over a breakpoint, but the
+                 * gdb does not expect us to do single-stepping.
+                 * Don't do anything special.
+                 */
+                cpu->exception_index = EXCP_INTERRUPT;
+            } else {
+                /* Another exception or debug event. Report it to GDB. */
+                cpu->exception_index = EXCP_DEBUG;
+            }
+
+            ret = 1;
+            break;
         case WHvRunVpExitReasonNone:
         case WHvRunVpExitReasonUnrecoverableException:
         case WHvRunVpExitReasonInvalidVpRegisterValue:
         case WHvRunVpExitReasonUnsupportedFeature:
-        case WHvRunVpExitReasonException:
         default:
             error_report("WHPX: Unexpected VP exit code %d",
                          vcpu->exit_ctx.ExitReason);
@@ -1284,10 +1983,32 @@ static int whpx_vcpu_run(CPUState *cpu)
 
     } while (!ret);
 
-    cpu_exec_end(cpu);
+    if (stepped_over_bp) {
+        /* Restore the breakpoint we stepped over */
+        cpu_memory_rw_debug(cpu,
+            stepped_over_bp->address,
+            (void *)&whpx_breakpoint_instruction,
+            1,
+            true);
+    }
+
+    if (exclusive_step_mode != WHPX_STEP_NONE) {
+        g_assert(cpu_in_exclusive_context(cpu));
+        cpu->running = false;
+        end_exclusive();
+
+        exclusive_step_mode = WHPX_STEP_NONE;
+    } else {
+        cpu_exec_end(cpu);
+    }
+
     qemu_mutex_lock_iothread();
     current_cpu = cpu;
 
+    if (--whpx->running_cpus == 0) {
+        whpx_last_vcpu_stopping(cpu);
+    }
+
     qatomic_set(&cpu->exit_request, false);
 
     return ret < 0;
@@ -1846,6 +2567,7 @@ static int whpx_accel_init(MachineState *ms)
     memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
     prop.ExtendedVmExits.X64MsrExit = 1;
     prop.ExtendedVmExits.X64CpuidExit = 1;
+    prop.ExtendedVmExits.ExceptionExit = 1;
     if (whpx_apic_in_platform()) {
         prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
     }
@@ -1874,6 +2596,19 @@ static int whpx_accel_init(MachineState *ms)
         goto error;
     }
 
+    /*
+     * We do not want to intercept any exceptions from the guest,
+     * until we actually start debugging with gdb.
+     */
+    whpx->exception_exit_bitmap = -1;
+    hr = whpx_set_exception_exit_bitmap(0);
+
+    if (FAILED(hr)) {
+        error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr);
+        ret = -EINVAL;
+        goto error;
+    }
+
     hr = whp_dispatch.WHvSetupPartition(whpx->partition);
     if (FAILED(hr)) {
         error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
diff --git a/target/i386/whpx/whpx-internal.h b/target/i386/whpx/whpx-internal.h
index 908ababf6d..1d505d37fe 100644
--- a/target/i386/whpx/whpx-internal.h
+++ b/target/i386/whpx/whpx-internal.h
@@ -5,9 +5,38 @@
 #include <WinHvPlatform.h>
 #include <WinHvEmulation.h>
 
+typedef enum WhpxBreakpointState {
+    WHPX_BP_CLEARED = 0,
+    WHPX_BP_SET_PENDING,
+    WHPX_BP_SET,
+    WHPX_BP_CLEAR_PENDING,
+} WhpxBreakpointState;
+
+struct whpx_breakpoint {
+    vaddr address;
+    WhpxBreakpointState state;
+    uint8_t original_instruction;
+};
+
+struct whpx_breakpoint_collection {
+    int allocated, used;
+    struct whpx_breakpoint data[0];
+};
+
+struct whpx_breakpoints {
+    int original_address_count;
+    vaddr *original_addresses;
+
+    struct whpx_breakpoint_collection *breakpoints;
+};
+
 struct whpx_state {
     uint64_t mem_quota;
     WHV_PARTITION_HANDLE partition;
+    uint64_t exception_exit_bitmap;
+    int32_t running_cpus;
+    struct whpx_breakpoints breakpoints;
+
     bool kernel_irqchip_allowed;
     bool kernel_irqchip_required;
     bool apic_in_platform;
-- 
2.29.2.windows.3





^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-02-23 20:05   ` Ivan Shcherbakov
@ 2022-02-24  9:35     ` Peter Maydell
  2022-02-24 11:22       ` Alex Bennée
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Maydell @ 2022-02-24  9:35 UTC (permalink / raw)
  To: Ivan Shcherbakov; +Cc: Paolo Bonzini, mst, qemu-devel, armbru

On Wed, 23 Feb 2022 at 20:08, Ivan Shcherbakov <ivan@sysprogs.com> wrote:
>

>  static GDBState gdbserver_state;
> +static bool gdbserver_is_connected;
> +
> +bool gdb_is_connected(void)
> +{
> +    return gdbserver_is_connected;
> +}

I haven't looked at the rest of the patch -- but can you explain where
whpx is different from how other accelerators handle debug such that
it needs to know whether gdb is connected or not ?

thanks
-- PMM


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-02-24  9:35     ` Peter Maydell
@ 2022-02-24 11:22       ` Alex Bennée
  2022-02-24 15:54         ` Ivan Shcherbakov
  0 siblings, 1 reply; 10+ messages in thread
From: Alex Bennée @ 2022-02-24 11:22 UTC (permalink / raw)
  To: Peter Maydell; +Cc: Paolo Bonzini, Ivan Shcherbakov, qemu-devel, armbru, mst


Peter Maydell <peter.maydell@linaro.org> writes:

> On Wed, 23 Feb 2022 at 20:08, Ivan Shcherbakov <ivan@sysprogs.com> wrote:
>>
>
>>  static GDBState gdbserver_state;
>> +static bool gdbserver_is_connected;
>> +
>> +bool gdb_is_connected(void)
>> +{
>> +    return gdbserver_is_connected;
>> +}
>
> I haven't looked at the rest of the patch -- but can you explain where
> whpx is different from how other accelerators handle debug such that
> it needs to know whether gdb is connected or not ?

It's backwards from how it should work. I don't know about the other
accelerators but in the KVM case the gdbserver handles the requests for
installing breakpoints and watchpoints and if KVM is enabled passes them
onto the arch specific code.

When QEMU prepares to enter the run loop it checks for the presence of
those things and then can make the arch specific decision about how to
invoke KVM (see kvm_update_guest_debug and friends).

Just the fact you have connected to the gdbserver shouldn't affect how
you run WHPX up until the point there are things you need to trap - i.e.
handling installed breakpoints.

FWIW we rejected a similar patch a few weeks ago that wanted to change
emulation behaviour based on gdbserver connection status. 

>
> thanks
> -- PMM


-- 
Alex Bennée


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-02-24 11:22       ` Alex Bennée
@ 2022-02-24 15:54         ` Ivan Shcherbakov
  2022-02-28  4:31           ` Ivan Shcherbakov
  0 siblings, 1 reply; 10+ messages in thread
From: Ivan Shcherbakov @ 2022-02-24 15:54 UTC (permalink / raw)
  To: 'Alex Bennée', 'Peter Maydell'
  Cc: 'Paolo Bonzini', qemu-devel, armbru, mst

> I haven't looked at the rest of the patch -- but can you explain where 
> whpx is different from how other accelerators handle debug such that 
> it needs to know whether gdb is connected or not ?
This mainly comes from the way single-stepping is handled. WHPX needs to know whether you want to trap INT1 before starting the first VCPU. The current gdbstub implementation doesn’t make it easy. Assume the scenario:

1. You have 2 VCPUs. You run the first one and step the second one.
2. gdb_continue_partial() calls cpu_resume(0)
3. gdb_continue_partial() calls cpu_single_step(1).

WHPX needs to know whether to trap INT1 at step #2 (starting the first CPU), but it won't know it until step #3. So, the current logic simply checks if gdb is connected at all in step #2.

>Just the fact you have connected to the gdbserver shouldn't affect how you run WHPX up until the point there are things you need to trap - i.e.
>handling installed breakpoints.

This can be addressed by adding a "bool stepping_expected" argument to vm_prepare_start(). It will be set to true if gdb_continue_partial() expects ANY thread to be stepped, and will be false otherwise. It will also require a new callback in AccelOpsClass (e.g. on_vm_starting(bool stepping_expected)) that will be called from vm_prepare_start(). The WHPX implementation will then check if any breakpoints are set, and if the last call to this function expected stepping, and use it to decide whether to trap INT1.

Let me know if this will work better.

Best,
Ivan



^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-02-24 15:54         ` Ivan Shcherbakov
@ 2022-02-28  4:31           ` Ivan Shcherbakov
  2022-02-28 10:28             ` Alex Bennée
  0 siblings, 1 reply; 10+ messages in thread
From: Ivan Shcherbakov @ 2022-02-28  4:31 UTC (permalink / raw)
  To: 'Alex Bennée', 'Peter Maydell'
  Cc: 'Paolo Bonzini', mst, qemu-devel, armbru

Hi Alex and Peter,

It would be great to hear your feedback on handling the WHPX stepping via an added argument to vm_prepare_start(). It is only called from 2 places, so the changes will be minimal. I this works for you, I will gladly send an updated patch.

I am also fully open to other suggestions. You obviously know the QEMU codebase much better, so I'm happy to refactor it so that it blends in well with the rest of the code.

Best,
Ivan

-----Original Message-----
From: Qemu-devel <qemu-devel-bounces+ivan=sysprogs.com@nongnu.org> On Behalf Of Ivan Shcherbakov
Sent: Thursday, February 24, 2022 7:54 AM
To: 'Alex Bennée' <alex.bennee@linaro.org>; 'Peter Maydell' <peter.maydell@linaro.org>
Cc: 'Paolo Bonzini' <pbonzini@redhat.com>; qemu-devel@nongnu.org; armbru@redhat.com; mst@redhat.com
Subject: RE: [PATCH 3/3] whpx: Added support for breakpoints and stepping

> I haven't looked at the rest of the patch -- but can you explain where 
> whpx is different from how other accelerators handle debug such that 
> it needs to know whether gdb is connected or not ?
This mainly comes from the way single-stepping is handled. WHPX needs to know whether you want to trap INT1 before starting the first VCPU. The current gdbstub implementation doesn’t make it easy. Assume the scenario:

1. You have 2 VCPUs. You run the first one and step the second one.
2. gdb_continue_partial() calls cpu_resume(0) 3. gdb_continue_partial() calls cpu_single_step(1).

WHPX needs to know whether to trap INT1 at step #2 (starting the first CPU), but it won't know it until step #3. So, the current logic simply checks if gdb is connected at all in step #2.

>Just the fact you have connected to the gdbserver shouldn't affect how you run WHPX up until the point there are things you need to trap - i.e.
>handling installed breakpoints.

This can be addressed by adding a "bool stepping_expected" argument to vm_prepare_start(). It will be set to true if gdb_continue_partial() expects ANY thread to be stepped, and will be false otherwise. It will also require a new callback in AccelOpsClass (e.g. on_vm_starting(bool stepping_expected)) that will be called from vm_prepare_start(). The WHPX implementation will then check if any breakpoints are set, and if the last call to this function expected stepping, and use it to decide whether to trap INT1.

Let me know if this will work better.

Best,
Ivan



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-02-28  4:31           ` Ivan Shcherbakov
@ 2022-02-28 10:28             ` Alex Bennée
  2022-03-01  2:08               ` Ivan Shcherbakov
  0 siblings, 1 reply; 10+ messages in thread
From: Alex Bennée @ 2022-02-28 10:28 UTC (permalink / raw)
  To: Ivan Shcherbakov
  Cc: 'Peter Maydell',
	mst, qemu-devel, armbru, 'Paolo Bonzini'


"Ivan Shcherbakov" <ivan@sysprogs.com> writes:

> Hi Alex and Peter,
>
> It would be great to hear your feedback on handling the WHPX stepping
> via an added argument to vm_prepare_start(). It is only called from 2
> places, so the changes will be minimal. I this works for you, I will
> gladly send an updated patch.

Is the limitation that whpx_set_exception_exit_bitmap needs to be set
before any vCPU can be run or that it cannot be set if any vCPUs are
currently running?

If it is the later wouldn't adding a hook into the vm_change_state_head
callbacks be enough?

> I am also fully open to other suggestions. You obviously know the QEMU
> codebase much better, so I'm happy to refactor it so that it blends in
> well with the rest of the code.
>
> Best,
> Ivan
>
> -----Original Message-----
> From: Qemu-devel <qemu-devel-bounces+ivan=sysprogs.com@nongnu.org> On Behalf Of Ivan Shcherbakov
> Sent: Thursday, February 24, 2022 7:54 AM
> To: 'Alex Bennée' <alex.bennee@linaro.org>; 'Peter Maydell' <peter.maydell@linaro.org>
> Cc: 'Paolo Bonzini' <pbonzini@redhat.com>; qemu-devel@nongnu.org; armbru@redhat.com; mst@redhat.com
> Subject: RE: [PATCH 3/3] whpx: Added support for breakpoints and stepping
>
>> I haven't looked at the rest of the patch -- but can you explain where 
>> whpx is different from how other accelerators handle debug such that 
>> it needs to know whether gdb is connected or not ?
> This mainly comes from the way single-stepping is handled. WHPX needs to know whether you want to trap INT1 before starting the first VCPU. The current gdbstub implementation doesn’t make it easy. Assume the scenario:
>
> 1. You have 2 VCPUs. You run the first one and step the second one.
> 2. gdb_continue_partial() calls cpu_resume(0) 3. gdb_continue_partial() calls cpu_single_step(1).
>
> WHPX needs to know whether to trap INT1 at step #2 (starting the first CPU), but it won't know it until step #3. So, the current logic simply checks if gdb is connected at all in step #2.
>
>>Just the fact you have connected to the gdbserver shouldn't affect how you run WHPX up until the point there are things you need to trap - i.e.
>>handling installed breakpoints.
>
> This can be addressed by adding a "bool stepping_expected" argument to
>> vm_prepare_start(). It will be set to true if gdb_continue_partial()
>> expects ANY thread to be stepped, and will be false otherwise. It
>> will also require a new callback in AccelOpsClass (e.g.
>> on_vm_starting(bool stepping_expected)) that will be called from
>> vm_prepare_start(). The WHPX implementation will then check if any
>> breakpoints are set, and if the last call to this function expected
>> stepping, and use it to decide whether to trap INT1.
>
> Let me know if this will work better.
>
> Best,
> Ivan


-- 
Alex Bennée


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-02-28 10:28             ` Alex Bennée
@ 2022-03-01  2:08               ` Ivan Shcherbakov
  2022-03-02  2:06                 ` Ivan Shcherbakov
  0 siblings, 1 reply; 10+ messages in thread
From: Ivan Shcherbakov @ 2022-03-01  2:08 UTC (permalink / raw)
  To: 'Alex Bennée'
  Cc: 'Peter Maydell',
	mst, qemu-devel, armbru, 'Paolo Bonzini'

Hi Alex,

Thanks for getting back to me. It is definitely the latter case (i.e. it is possible to change it while the target is stopped at a breakpoint before resuming any VCPUs).
vm_state_notify() does look like it's intended for this type of notifications, but unfortunately, it doesn't make a distinction between stepping and running normally.
Below is the relevant code from gdbstub.c:

>static int gdb_continue_partial(char *newstates)
>{
>    int flag = 0;
>
>    /* Various corner cases omitted for brevity  */
>        if (vm_prepare_start()) {
>            return 0;
>        }
>    CPU_FOREACH(cpu) {
>        switch (newstates[cpu->cpu_index]) {
>        case 's':
>            trace_gdbstub_op_stepping(cpu->cpu_index);
>            cpu_single_step(cpu, gdbserver_state.sstep_flags);
>            cpu_resume(cpu);
>            flag = 1;
>            break;
>        case 'c':
>            trace_gdbstub_op_continue_cpu(cpu->cpu_index);
>            cpu_resume(cpu);
>            flag = 1;
>            break;
>        default:
>            res = -1;
>            break;
>        }
>    }
>}

Also:

>int vm_prepare_start(void)
>{
>    runstate_set(RUN_STATE_RUNNING);
>    vm_state_notify(1, RUN_STATE_RUNNING);
>    return 0;
>}

and:

>void vm_state_notify(bool running, RunState state);

So, currently, vm_prepare_start() has no way of distinguishing between single-stepping and normal running unless gdb_continue_partial() scans the 'newstates' array before calling it, and passes some extra argument to vm_prepare_start(), indicating whether a step request was present anywhere in the array.

I couldn't find any existing run state that would match single-stepping, and adding another run state looks like a very non-trivial change that can easily backfire. Adding another argument to vm_state_notify() could be easier, but I am still afraid it could break some other part of QEMU, so I thought adding a new member to AccelOpsClass would be a safer bet. But again, if you think another way to do it is better, I am very open to it.

Best regards,
Ivan

-----Original Message-----
From: Alex Bennée <alex.bennee@linaro.org> 
Sent: Monday, February 28, 2022 2:28 AM
To: Ivan Shcherbakov <ivan@sysprogs.com>
Cc: 'Peter Maydell' <peter.maydell@linaro.org>; 'Paolo Bonzini' <pbonzini@redhat.com>; qemu-devel@nongnu.org; armbru@redhat.com; mst@redhat.com
Subject: Re: [PATCH 3/3] whpx: Added support for breakpoints and stepping

Is the limitation that whpx_set_exception_exit_bitmap needs to be set before any vCPU can be run or that it cannot be set if any vCPUs are currently running?
If it is the later wouldn't adding a hook into the vm_change_state_head callbacks be enough?



^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 3/3] whpx: Added support for breakpoints and stepping
  2022-03-01  2:08               ` Ivan Shcherbakov
@ 2022-03-02  2:06                 ` Ivan Shcherbakov
  0 siblings, 0 replies; 10+ messages in thread
From: Ivan Shcherbakov @ 2022-03-02  2:06 UTC (permalink / raw)
  To: 'Alex Bennée'
  Cc: 'Peter Maydell', 'Paolo Bonzini',
	qemu-devel, armbru, mst

Hi Alex,

Is there anything I could do to get the WHPX debugging support accepted into QEMU? Would the proposed callback AccelOpsClass work for you, or would you prefer another approach?

Best,
Ivan

-----Original Message-----
From: Qemu-devel <qemu-devel-bounces+ivan=sysprogs.com@nongnu.org> On Behalf Of Ivan Shcherbakov
Sent: Monday, February 28, 2022 6:09 PM
To: 'Alex Bennée' <alex.bennee@linaro.org>
Cc: 'Peter Maydell' <peter.maydell@linaro.org>; mst@redhat.com; qemu-devel@nongnu.org; armbru@redhat.com; 'Paolo Bonzini' <pbonzini@redhat.com>
Subject: RE: [PATCH 3/3] whpx: Added support for breakpoints and stepping

Hi Alex,

Thanks for getting back to me. It is definitely the latter case (i.e. it is possible to change it while the target is stopped at a breakpoint before resuming any VCPUs).
vm_state_notify() does look like it's intended for this type of notifications, but unfortunately, it doesn't make a distinction between stepping and running normally.
Below is the relevant code from gdbstub.c:

>static int gdb_continue_partial(char *newstates) {
>    int flag = 0;
>
>    /* Various corner cases omitted for brevity  */
>        if (vm_prepare_start()) {
>            return 0;
>        }
>    CPU_FOREACH(cpu) {
>        switch (newstates[cpu->cpu_index]) {
>        case 's':
>            trace_gdbstub_op_stepping(cpu->cpu_index);
>            cpu_single_step(cpu, gdbserver_state.sstep_flags);
>            cpu_resume(cpu);
>            flag = 1;
>            break;
>        case 'c':
>            trace_gdbstub_op_continue_cpu(cpu->cpu_index);
>            cpu_resume(cpu);
>            flag = 1;
>            break;
>        default:
>            res = -1;
>            break;
>        }
>    }
>}

Also:

>int vm_prepare_start(void)
>{
>    runstate_set(RUN_STATE_RUNNING);
>    vm_state_notify(1, RUN_STATE_RUNNING);
>    return 0;
>}

and:

>void vm_state_notify(bool running, RunState state);

So, currently, vm_prepare_start() has no way of distinguishing between single-stepping and normal running unless gdb_continue_partial() scans the 'newstates' array before calling it, and passes some extra argument to vm_prepare_start(), indicating whether a step request was present anywhere in the array.

I couldn't find any existing run state that would match single-stepping, and adding another run state looks like a very non-trivial change that can easily backfire. Adding another argument to vm_state_notify() could be easier, but I am still afraid it could break some other part of QEMU, so I thought adding a new member to AccelOpsClass would be a safer bet. But again, if you think another way to do it is better, I am very open to it.

Best regards,
Ivan

-----Original Message-----
From: Alex Bennée <alex.bennee@linaro.org>
Sent: Monday, February 28, 2022 2:28 AM
To: Ivan Shcherbakov <ivan@sysprogs.com>
Cc: 'Peter Maydell' <peter.maydell@linaro.org>; 'Paolo Bonzini' <pbonzini@redhat.com>; qemu-devel@nongnu.org; armbru@redhat.com; mst@redhat.com
Subject: Re: [PATCH 3/3] whpx: Added support for breakpoints and stepping

Is the limitation that whpx_set_exception_exit_bitmap needs to be set before any vCPU can be run or that it cannot be set if any vCPUs are currently running?
If it is the later wouldn't adding a hook into the vm_change_state_head callbacks be enough?



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-03-02  2:07 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-23  5:25 [PATCH 3/3] whpx: Added support for breakpoints and stepping Ivan Shcherbakov
2022-02-23  9:36 ` Paolo Bonzini
2022-02-23 20:05   ` Ivan Shcherbakov
2022-02-24  9:35     ` Peter Maydell
2022-02-24 11:22       ` Alex Bennée
2022-02-24 15:54         ` Ivan Shcherbakov
2022-02-28  4:31           ` Ivan Shcherbakov
2022-02-28 10:28             ` Alex Bennée
2022-03-01  2:08               ` Ivan Shcherbakov
2022-03-02  2:06                 ` Ivan Shcherbakov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.