All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
@ 2020-12-01 21:00 Peter Collingbourne via
  2020-12-01 21:00 ` [PATCH v2 2/2] arm/hvf: Stop setting current_cpu Peter Collingbourne via
  2020-12-01 23:24 ` [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Alexander Graf
  0 siblings, 2 replies; 10+ messages in thread
From: Peter Collingbourne via @ 2020-12-01 21:00 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Peter Collingbourne, Frank Yang, Roman Bolshakov, Peter Maydell,
	Eduardo Habkost, Richard Henderson, qemu-devel, Cameron Esfahani,
	qemu-arm, Claudio Fontana, Paolo Bonzini

Sleep on WFx until the VTIMER is due but allow ourselves to be woken
up on IPI.

Signed-off-by: Peter Collingbourne <pcc@google.com>
---
v2:
- simplify locking further
- wait indefinitely on disabled or masked timers

 accel/hvf/hvf-cpus.c     |   5 +-
 include/sysemu/hvf_int.h |   3 +-
 target/arm/hvf/hvf.c     | 116 ++++++++++++++-------------------------
 3 files changed, 43 insertions(+), 81 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 4360f64671..b2c8fb57f6 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
     sigact.sa_handler = dummy_signal;
     sigaction(SIG_IPI, &sigact, NULL);
 
-    pthread_sigmask(SIG_BLOCK, NULL, &set);
-    sigdelset(&set, SIG_IPI);
-    pthread_sigmask(SIG_SETMASK, &set, NULL);
+    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
+    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
 
 #ifdef __aarch64__
     r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index c56baa3ae8..13adf6ea77 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -62,8 +62,7 @@ extern HVFState *hvf_state;
 struct hvf_vcpu_state {
     uint64_t fd;
     void *exit;
-    struct timespec ts;
-    bool sleeping;
+    sigset_t unblock_ipi_mask;
 };
 
 void assert_hvf_ok(hv_return_t ret);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 8fe10966d2..3321d48aa2 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -2,6 +2,7 @@
  * QEMU Hypervisor.framework support for Apple Silicon
 
  * Copyright 2020 Alexander Graf <agraf@csgraf.de>
+ * Copyright 2020 Google LLC
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -18,6 +19,7 @@
 #include "sysemu/hw_accel.h"
 
 #include <Hypervisor/Hypervisor.h>
+#include <mach/mach_time.h>
 
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
@@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
-    if (cpu->hvf->sleeping) {
-        /*
-         * When sleeping, make sure we always send signals. Also, clear the
-         * timespec, so that an IPI that arrives between setting hvf->sleeping
-         * and the nanosleep syscall still aborts the sleep.
-         */
-        cpu->thread_kicked = false;
-        cpu->hvf->ts = (struct timespec){ };
-        cpus_kick_thread(cpu);
-    } else {
-        hv_vcpus_exit(&cpu->hvf->fd, 1);
-    }
+    cpus_kick_thread(cpu);
+    hv_vcpus_exit(&cpu->hvf->fd, 1);
 }
 
 static int hvf_inject_interrupts(CPUState *cpu)
@@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
     return 0;
 }
 
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
+{
+    /*
+     * Use pselect to sleep so that other threads can IPI us while we're
+     * sleeping.
+     */
+    qatomic_mb_set(&cpu->thread_kicked, false);
+    qemu_mutex_unlock_iothread();
+    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
+    qemu_mutex_lock_iothread();
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
     ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
     hv_return_t r;
     int ret = 0;
 
-    qemu_mutex_unlock_iothread();
-
     do {
         bool advance_pc = false;
 
-        qemu_mutex_lock_iothread();
         current_cpu = cpu;
         qemu_wait_io_event_common(cpu);
-        qemu_mutex_unlock_iothread();
 
         flush_cpu_state(cpu);
 
@@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
         }
 
         if (cpu->halted) {
-            qemu_mutex_lock_iothread();
             return EXCP_HLT;
         }
 
+        qemu_mutex_unlock_iothread();
         assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
 
         /* handle VMEXIT */
@@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
         uint64_t syndrome = hvf_exit->exception.syndrome;
         uint32_t ec = syn_get_ec(syndrome);
 
+        qemu_mutex_lock_iothread();
         switch (exit_reason) {
         case HV_EXIT_REASON_EXCEPTION:
             /* This is the main one, handle below. */
             break;
         case HV_EXIT_REASON_VTIMER_ACTIVATED:
-            qemu_mutex_lock_iothread();
             current_cpu = cpu;
             qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
-            qemu_mutex_unlock_iothread();
             continue;
         case HV_EXIT_REASON_CANCELED:
             /* we got kicked, no exit to process */
@@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
             uint32_t srt = (syndrome >> 16) & 0x1f;
             uint64_t val = 0;
 
-            qemu_mutex_lock_iothread();
             current_cpu = cpu;
 
             DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
@@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
                 hvf_set_reg(cpu, srt, val);
             }
 
-            qemu_mutex_unlock_iothread();
-
             advance_pc = true;
             break;
         }
@@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
         case EC_WFX_TRAP:
             if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
                 (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
-                uint64_t cval, ctl, val, diff, now;
+                advance_pc = true;
 
-                /* Set up a local timer for vtimer if necessary ... */
-                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
-                assert_hvf_ok(r);
-                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
+                uint64_t ctl;
+                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
+                                        &ctl);
                 assert_hvf_ok(r);
 
-                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
-                diff = cval - val;
-
-                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
-                      gt_cntfrq_period_ns(arm_cpu);
-
-                /* Timer disabled or masked, just wait for long */
                 if (!(ctl & 1) || (ctl & 2)) {
-                    diff = (120 * NANOSECONDS_PER_SECOND) /
-                           gt_cntfrq_period_ns(arm_cpu);
+                    /* Timer disabled or masked, just wait for an IPI. */
+                    hvf_wait_for_ipi(cpu, NULL);
+                    break;
                 }
 
-                if (diff < INT64_MAX) {
-                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
-                    struct timespec *ts = &cpu->hvf->ts;
-
-                    *ts = (struct timespec){
-                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
-                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
-                    };
-
-                    /*
-                     * Waking up easily takes 1ms, don't go to sleep for smaller
-                     * time periods than 2ms.
-                     */
-                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) {
-                        advance_pc = true;
-                        break;
-                    }
-
-                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI signal. */
-                    cpu->hvf->sleeping = true;
-                    smp_mb();
-
-                    /* Bail out if we received an IRQ meanwhile */
-                    if (cpu->thread_kicked || (cpu->interrupt_request &
-                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
-                        cpu->hvf->sleeping = false;
-                        break;
-                    }
-
-                    /* nanosleep returns on signal, so we wake up on kick. */
-                    nanosleep(ts, NULL);
-
-                    /* Out of sleep - either naturally or because of a kick */
-                    cpu->hvf->sleeping = false;
+                uint64_t cval;
+                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
+                                        &cval);
+                assert_hvf_ok(r);
+
+                int64_t ticks_to_sleep = cval - mach_absolute_time();
+                if (ticks_to_sleep < 0) {
+                    break;
                 }
 
-                advance_pc = true;
+                uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz;
+                uint64_t nanos =
+                    (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) *
+                    1000000000 / arm_cpu->gt_cntfrq_hz;
+                struct timespec ts = { seconds, nanos };
+
+                hvf_wait_for_ipi(cpu, &ts);
             }
             break;
         case EC_AA64_HVC:
             cpu_synchronize_state(cpu);
-            qemu_mutex_lock_iothread();
             current_cpu = cpu;
             if (arm_is_psci_call(arm_cpu, EXCP_HVC)) {
                 arm_handle_psci_call(arm_cpu);
@@ -562,11 +530,9 @@ int hvf_vcpu_exec(CPUState *cpu)
                 DPRINTF("unknown HVC! %016llx", env->xregs[0]);
                 env->xregs[0] = -1;
             }
-            qemu_mutex_unlock_iothread();
             break;
         case EC_AA64_SMC:
             cpu_synchronize_state(cpu);
-            qemu_mutex_lock_iothread();
             current_cpu = cpu;
             if (arm_is_psci_call(arm_cpu, EXCP_SMC)) {
                 arm_handle_psci_call(arm_cpu);
@@ -575,7 +541,6 @@ int hvf_vcpu_exec(CPUState *cpu)
                 env->xregs[0] = -1;
                 env->pc += 4;
             }
-            qemu_mutex_unlock_iothread();
             break;
         default:
             cpu_synchronize_state(cpu);
@@ -596,7 +561,6 @@ int hvf_vcpu_exec(CPUState *cpu)
         }
     } while (ret == 0);
 
-    qemu_mutex_lock_iothread();
     current_cpu = cpu;
 
     return ret;
-- 
2.29.2.454.gaff20da3a2-goog



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/2] arm/hvf: Stop setting current_cpu
  2020-12-01 21:00 [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Peter Collingbourne via
@ 2020-12-01 21:00 ` Peter Collingbourne via
  2020-12-01 22:11   ` Alexander Graf
  2020-12-01 23:24 ` [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Alexander Graf
  1 sibling, 1 reply; 10+ messages in thread
From: Peter Collingbourne via @ 2020-12-01 21:00 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Peter Collingbourne, Frank Yang, Roman Bolshakov, Peter Maydell,
	Eduardo Habkost, Richard Henderson, qemu-devel, Cameron Esfahani,
	qemu-arm, Claudio Fontana, Paolo Bonzini

This variable is already being set by the generic HVF code and it's a
thread-local variable so I don't see how it can be overwritten.

Signed-off-by: Peter Collingbourne <pcc@google.com>
---
 target/arm/hvf/hvf.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 3321d48aa2..40984fcf4d 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -364,7 +364,6 @@ int hvf_vcpu_exec(CPUState *cpu)
     do {
         bool advance_pc = false;
 
-        current_cpu = cpu;
         qemu_wait_io_event_common(cpu);
 
         flush_cpu_state(cpu);
@@ -391,7 +390,6 @@ int hvf_vcpu_exec(CPUState *cpu)
             /* This is the main one, handle below. */
             break;
         case HV_EXIT_REASON_VTIMER_ACTIVATED:
-            current_cpu = cpu;
             qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
             continue;
         case HV_EXIT_REASON_CANCELED:
@@ -412,8 +410,6 @@ int hvf_vcpu_exec(CPUState *cpu)
             uint32_t srt = (syndrome >> 16) & 0x1f;
             uint64_t val = 0;
 
-            current_cpu = cpu;
-
             DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
                     "iswrite=%x s1ptw=%x len=%d srt=%d]\n",
                     env->pc, hvf_exit->exception.virtual_address,
@@ -523,7 +519,6 @@ int hvf_vcpu_exec(CPUState *cpu)
             break;
         case EC_AA64_HVC:
             cpu_synchronize_state(cpu);
-            current_cpu = cpu;
             if (arm_is_psci_call(arm_cpu, EXCP_HVC)) {
                 arm_handle_psci_call(arm_cpu);
             } else {
@@ -533,7 +528,6 @@ int hvf_vcpu_exec(CPUState *cpu)
             break;
         case EC_AA64_SMC:
             cpu_synchronize_state(cpu);
-            current_cpu = cpu;
             if (arm_is_psci_call(arm_cpu, EXCP_SMC)) {
                 arm_handle_psci_call(arm_cpu);
             } else {
@@ -561,7 +555,5 @@ int hvf_vcpu_exec(CPUState *cpu)
         }
     } while (ret == 0);
 
-    current_cpu = cpu;
-
     return ret;
 }
-- 
2.29.2.454.gaff20da3a2-goog



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] arm/hvf: Stop setting current_cpu
  2020-12-01 21:00 ` [PATCH v2 2/2] arm/hvf: Stop setting current_cpu Peter Collingbourne via
@ 2020-12-01 22:11   ` Alexander Graf
  2020-12-02  0:13     ` Peter Collingbourne
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Graf @ 2020-12-01 22:11 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Peter Maydell, Eduardo Habkost, Richard Henderson, qemu-devel,
	Cameron Esfahani, Roman Bolshakov, qemu-arm, Claudio Fontana,
	Frank Yang, Paolo Bonzini


On 01.12.20 22:00, Peter Collingbourne wrote:
> This variable is already being set by the generic HVF code and it's a
> thread-local variable so I don't see how it can be overwritten.
>
> Signed-off-by: Peter Collingbourne <pcc@google.com>


Yikes :). Yes, absolutely!

Would you mind if I squash this straight into my patch?


Thanks,

Alex


> ---
>   target/arm/hvf/hvf.c | 8 --------
>   1 file changed, 8 deletions(-)
>
> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> index 3321d48aa2..40984fcf4d 100644
> --- a/target/arm/hvf/hvf.c
> +++ b/target/arm/hvf/hvf.c
> @@ -364,7 +364,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>       do {
>           bool advance_pc = false;
>   
> -        current_cpu = cpu;
>           qemu_wait_io_event_common(cpu);
>   
>           flush_cpu_state(cpu);
> @@ -391,7 +390,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>               /* This is the main one, handle below. */
>               break;
>           case HV_EXIT_REASON_VTIMER_ACTIVATED:
> -            current_cpu = cpu;
>               qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
>               continue;
>           case HV_EXIT_REASON_CANCELED:
> @@ -412,8 +410,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>               uint32_t srt = (syndrome >> 16) & 0x1f;
>               uint64_t val = 0;
>   
> -            current_cpu = cpu;
> -
>               DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
>                       "iswrite=%x s1ptw=%x len=%d srt=%d]\n",
>                       env->pc, hvf_exit->exception.virtual_address,
> @@ -523,7 +519,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>               break;
>           case EC_AA64_HVC:
>               cpu_synchronize_state(cpu);
> -            current_cpu = cpu;
>               if (arm_is_psci_call(arm_cpu, EXCP_HVC)) {
>                   arm_handle_psci_call(arm_cpu);
>               } else {
> @@ -533,7 +528,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>               break;
>           case EC_AA64_SMC:
>               cpu_synchronize_state(cpu);
> -            current_cpu = cpu;
>               if (arm_is_psci_call(arm_cpu, EXCP_SMC)) {
>                   arm_handle_psci_call(arm_cpu);
>               } else {
> @@ -561,7 +555,5 @@ int hvf_vcpu_exec(CPUState *cpu)
>           }
>       } while (ret == 0);
>   
> -    current_cpu = cpu;
> -
>       return ret;
>   }


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
  2020-12-01 21:00 [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Peter Collingbourne via
  2020-12-01 21:00 ` [PATCH v2 2/2] arm/hvf: Stop setting current_cpu Peter Collingbourne via
@ 2020-12-01 23:24 ` Alexander Graf
  2020-12-02  1:32   ` Peter Collingbourne
  1 sibling, 1 reply; 10+ messages in thread
From: Alexander Graf @ 2020-12-01 23:24 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Peter Maydell, Eduardo Habkost, Richard Henderson, qemu-devel,
	Cameron Esfahani, Roman Bolshakov, qemu-arm, Claudio Fontana,
	Frank Yang, Paolo Bonzini


On 01.12.20 22:00, Peter Collingbourne wrote:
> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> up on IPI.
>
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> ---
> v2:
> - simplify locking further
> - wait indefinitely on disabled or masked timers
>
>   accel/hvf/hvf-cpus.c     |   5 +-
>   include/sysemu/hvf_int.h |   3 +-
>   target/arm/hvf/hvf.c     | 116 ++++++++++++++-------------------------
>   3 files changed, 43 insertions(+), 81 deletions(-)
>
> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> index 4360f64671..b2c8fb57f6 100644
> --- a/accel/hvf/hvf-cpus.c
> +++ b/accel/hvf/hvf-cpus.c
> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
>       sigact.sa_handler = dummy_signal;
>       sigaction(SIG_IPI, &sigact, NULL);
>   
> -    pthread_sigmask(SIG_BLOCK, NULL, &set);
> -    sigdelset(&set, SIG_IPI);
> -    pthread_sigmask(SIG_SETMASK, &set, NULL);
> +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
> +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
>   
>   #ifdef __aarch64__
>       r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> index c56baa3ae8..13adf6ea77 100644
> --- a/include/sysemu/hvf_int.h
> +++ b/include/sysemu/hvf_int.h
> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
>   struct hvf_vcpu_state {
>       uint64_t fd;
>       void *exit;
> -    struct timespec ts;
> -    bool sleeping;
> +    sigset_t unblock_ipi_mask;
>   };
>   
>   void assert_hvf_ok(hv_return_t ret);
> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> index 8fe10966d2..3321d48aa2 100644
> --- a/target/arm/hvf/hvf.c
> +++ b/target/arm/hvf/hvf.c
> @@ -2,6 +2,7 @@
>    * QEMU Hypervisor.framework support for Apple Silicon
>   
>    * Copyright 2020 Alexander Graf <agraf@csgraf.de>
> + * Copyright 2020 Google LLC
>    *
>    * This work is licensed under the terms of the GNU GPL, version 2 or later.
>    * See the COPYING file in the top-level directory.
> @@ -18,6 +19,7 @@
>   #include "sysemu/hw_accel.h"
>   
>   #include <Hypervisor/Hypervisor.h>
> +#include <mach/mach_time.h>
>   
>   #include "exec/address-spaces.h"
>   #include "hw/irq.h"
> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
>   
>   void hvf_kick_vcpu_thread(CPUState *cpu)
>   {
> -    if (cpu->hvf->sleeping) {
> -        /*
> -         * When sleeping, make sure we always send signals. Also, clear the
> -         * timespec, so that an IPI that arrives between setting hvf->sleeping
> -         * and the nanosleep syscall still aborts the sleep.
> -         */
> -        cpu->thread_kicked = false;
> -        cpu->hvf->ts = (struct timespec){ };
> -        cpus_kick_thread(cpu);
> -    } else {
> -        hv_vcpus_exit(&cpu->hvf->fd, 1);
> -    }
> +    cpus_kick_thread(cpu);
> +    hv_vcpus_exit(&cpu->hvf->fd, 1);
>   }
>   
>   static int hvf_inject_interrupts(CPUState *cpu)
> @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
>       return 0;
>   }
>   
> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> +{
> +    /*
> +     * Use pselect to sleep so that other threads can IPI us while we're
> +     * sleeping.
> +     */
> +    qatomic_mb_set(&cpu->thread_kicked, false);
> +    qemu_mutex_unlock_iothread();
> +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
> +    qemu_mutex_lock_iothread();
> +}
> +
>   int hvf_vcpu_exec(CPUState *cpu)
>   {
>       ARMCPU *arm_cpu = ARM_CPU(cpu);
> @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
>       hv_return_t r;
>       int ret = 0;
>   
> -    qemu_mutex_unlock_iothread();
> -
>       do {
>           bool advance_pc = false;
>   
> -        qemu_mutex_lock_iothread();
>           current_cpu = cpu;
>           qemu_wait_io_event_common(cpu);
> -        qemu_mutex_unlock_iothread();
>   
>           flush_cpu_state(cpu);
>   
> @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
>           }
>   
>           if (cpu->halted) {
> -            qemu_mutex_lock_iothread();
>               return EXCP_HLT;
>           }
>   
> +        qemu_mutex_unlock_iothread();
>           assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
>   
>           /* handle VMEXIT */
> @@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
>           uint64_t syndrome = hvf_exit->exception.syndrome;
>           uint32_t ec = syn_get_ec(syndrome);
>   
> +        qemu_mutex_lock_iothread();
>           switch (exit_reason) {
>           case HV_EXIT_REASON_EXCEPTION:
>               /* This is the main one, handle below. */
>               break;
>           case HV_EXIT_REASON_VTIMER_ACTIVATED:
> -            qemu_mutex_lock_iothread();
>               current_cpu = cpu;
>               qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
> -            qemu_mutex_unlock_iothread();
>               continue;
>           case HV_EXIT_REASON_CANCELED:
>               /* we got kicked, no exit to process */
> @@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>               uint32_t srt = (syndrome >> 16) & 0x1f;
>               uint64_t val = 0;
>   
> -            qemu_mutex_lock_iothread();
>               current_cpu = cpu;
>   
>               DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
> @@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>                   hvf_set_reg(cpu, srt, val);
>               }
>   
> -            qemu_mutex_unlock_iothread();
> -
>               advance_pc = true;
>               break;
>           }
> @@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
>           case EC_WFX_TRAP:
>               if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
>                   (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> -                uint64_t cval, ctl, val, diff, now;
> +                advance_pc = true;
>   
> -                /* Set up a local timer for vtimer if necessary ... */
> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
> -                assert_hvf_ok(r);
> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
> +                uint64_t ctl;
> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
> +                                        &ctl);
>                   assert_hvf_ok(r);
>   
> -                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
> -                diff = cval - val;
> -
> -                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
> -                      gt_cntfrq_period_ns(arm_cpu);
> -
> -                /* Timer disabled or masked, just wait for long */
>                   if (!(ctl & 1) || (ctl & 2)) {
> -                    diff = (120 * NANOSECONDS_PER_SECOND) /
> -                           gt_cntfrq_period_ns(arm_cpu);
> +                    /* Timer disabled or masked, just wait for an IPI. */
> +                    hvf_wait_for_ipi(cpu, NULL);
> +                    break;
>                   }
>   
> -                if (diff < INT64_MAX) {
> -                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
> -                    struct timespec *ts = &cpu->hvf->ts;
> -
> -                    *ts = (struct timespec){
> -                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
> -                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
> -                    };
> -
> -                    /*
> -                     * Waking up easily takes 1ms, don't go to sleep for smaller
> -                     * time periods than 2ms.
> -                     */
> -                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) {
> -                        advance_pc = true;
> -                        break;
> -                    }
> -
> -                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI signal. */
> -                    cpu->hvf->sleeping = true;
> -                    smp_mb();
> -
> -                    /* Bail out if we received an IRQ meanwhile */
> -                    if (cpu->thread_kicked || (cpu->interrupt_request &
> -                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> -                        cpu->hvf->sleeping = false;
> -                        break;
> -                    }
> -
> -                    /* nanosleep returns on signal, so we wake up on kick. */
> -                    nanosleep(ts, NULL);
> -
> -                    /* Out of sleep - either naturally or because of a kick */
> -                    cpu->hvf->sleeping = false;
> +                uint64_t cval;
> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
> +                                        &cval);
> +                assert_hvf_ok(r);
> +
> +                int64_t ticks_to_sleep = cval - mach_absolute_time();


I think you touched based on it in a previous thread, but would you mind 
to explain again why mach_absolute_time() is the right thing to check 
cval against? If I read the headers correctly, the cnvt_off register 
should be 0, so cntvct should be the reference time, no?

Also, can you please split the patch into one that I can squash into my 
existing one (remove WFI handling altogether), an individual one to 
revive the global io lock (happy to squash too unless you think it's 
useful to keep separate) and then this one?


Alex




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/2] arm/hvf: Stop setting current_cpu
  2020-12-01 22:11   ` Alexander Graf
@ 2020-12-02  0:13     ` Peter Collingbourne
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Collingbourne @ 2020-12-02  0:13 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Frank Yang, Roman Bolshakov, Peter Maydell, Eduardo Habkost,
	Richard Henderson, qemu-devel, Cameron Esfahani, qemu-arm,
	Claudio Fontana, Paolo Bonzini

On Tue, Dec 1, 2020 at 2:11 PM Alexander Graf <agraf@csgraf.de> wrote:
>
>
> On 01.12.20 22:00, Peter Collingbourne wrote:
> > This variable is already being set by the generic HVF code and it's a
> > thread-local variable so I don't see how it can be overwritten.
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
>
>
> Yikes :). Yes, absolutely!
>
> Would you mind if I squash this straight into my patch?

Sure, please go ahead.

Peter

>
>
> Thanks,
>
> Alex
>
>
> > ---
> >   target/arm/hvf/hvf.c | 8 --------
> >   1 file changed, 8 deletions(-)
> >
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 3321d48aa2..40984fcf4d 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -364,7 +364,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >       do {
> >           bool advance_pc = false;
> >
> > -        current_cpu = cpu;
> >           qemu_wait_io_event_common(cpu);
> >
> >           flush_cpu_state(cpu);
> > @@ -391,7 +390,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >               /* This is the main one, handle below. */
> >               break;
> >           case HV_EXIT_REASON_VTIMER_ACTIVATED:
> > -            current_cpu = cpu;
> >               qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
> >               continue;
> >           case HV_EXIT_REASON_CANCELED:
> > @@ -412,8 +410,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >               uint32_t srt = (syndrome >> 16) & 0x1f;
> >               uint64_t val = 0;
> >
> > -            current_cpu = cpu;
> > -
> >               DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
> >                       "iswrite=%x s1ptw=%x len=%d srt=%d]\n",
> >                       env->pc, hvf_exit->exception.virtual_address,
> > @@ -523,7 +519,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >               break;
> >           case EC_AA64_HVC:
> >               cpu_synchronize_state(cpu);
> > -            current_cpu = cpu;
> >               if (arm_is_psci_call(arm_cpu, EXCP_HVC)) {
> >                   arm_handle_psci_call(arm_cpu);
> >               } else {
> > @@ -533,7 +528,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >               break;
> >           case EC_AA64_SMC:
> >               cpu_synchronize_state(cpu);
> > -            current_cpu = cpu;
> >               if (arm_is_psci_call(arm_cpu, EXCP_SMC)) {
> >                   arm_handle_psci_call(arm_cpu);
> >               } else {
> > @@ -561,7 +555,5 @@ int hvf_vcpu_exec(CPUState *cpu)
> >           }
> >       } while (ret == 0);
> >
> > -    current_cpu = cpu;
> > -
> >       return ret;
> >   }


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
  2020-12-01 23:24 ` [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Alexander Graf
@ 2020-12-02  1:32   ` Peter Collingbourne
  2020-12-02  1:39     ` Alexander Graf
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Collingbourne @ 2020-12-02  1:32 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Frank Yang, Roman Bolshakov, Peter Maydell, Eduardo Habkost,
	Richard Henderson, qemu-devel, Cameron Esfahani, qemu-arm,
	Claudio Fontana, Paolo Bonzini

On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf <agraf@csgraf.de> wrote:
>
>
> On 01.12.20 22:00, Peter Collingbourne wrote:
> > Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > ---
> > v2:
> > - simplify locking further
> > - wait indefinitely on disabled or masked timers
> >
> >   accel/hvf/hvf-cpus.c     |   5 +-
> >   include/sysemu/hvf_int.h |   3 +-
> >   target/arm/hvf/hvf.c     | 116 ++++++++++++++-------------------------
> >   3 files changed, 43 insertions(+), 81 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index 4360f64671..b2c8fb57f6 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >       sigact.sa_handler = dummy_signal;
> >       sigaction(SIG_IPI, &sigact, NULL);
> >
> > -    pthread_sigmask(SIG_BLOCK, NULL, &set);
> > -    sigdelset(&set, SIG_IPI);
> > -    pthread_sigmask(SIG_SETMASK, &set, NULL);
> > +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
> > +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
> >
> >   #ifdef __aarch64__
> >       r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
> > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > index c56baa3ae8..13adf6ea77 100644
> > --- a/include/sysemu/hvf_int.h
> > +++ b/include/sysemu/hvf_int.h
> > @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >   struct hvf_vcpu_state {
> >       uint64_t fd;
> >       void *exit;
> > -    struct timespec ts;
> > -    bool sleeping;
> > +    sigset_t unblock_ipi_mask;
> >   };
> >
> >   void assert_hvf_ok(hv_return_t ret);
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 8fe10966d2..3321d48aa2 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -2,6 +2,7 @@
> >    * QEMU Hypervisor.framework support for Apple Silicon
> >
> >    * Copyright 2020 Alexander Graf <agraf@csgraf.de>
> > + * Copyright 2020 Google LLC
> >    *
> >    * This work is licensed under the terms of the GNU GPL, version 2 or later.
> >    * See the COPYING file in the top-level directory.
> > @@ -18,6 +19,7 @@
> >   #include "sysemu/hw_accel.h"
> >
> >   #include <Hypervisor/Hypervisor.h>
> > +#include <mach/mach_time.h>
> >
> >   #include "exec/address-spaces.h"
> >   #include "hw/irq.h"
> > @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >
> >   void hvf_kick_vcpu_thread(CPUState *cpu)
> >   {
> > -    if (cpu->hvf->sleeping) {
> > -        /*
> > -         * When sleeping, make sure we always send signals. Also, clear the
> > -         * timespec, so that an IPI that arrives between setting hvf->sleeping
> > -         * and the nanosleep syscall still aborts the sleep.
> > -         */
> > -        cpu->thread_kicked = false;
> > -        cpu->hvf->ts = (struct timespec){ };
> > -        cpus_kick_thread(cpu);
> > -    } else {
> > -        hv_vcpus_exit(&cpu->hvf->fd, 1);
> > -    }
> > +    cpus_kick_thread(cpu);
> > +    hv_vcpus_exit(&cpu->hvf->fd, 1);
> >   }
> >
> >   static int hvf_inject_interrupts(CPUState *cpu)
> > @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >       return 0;
> >   }
> >
> > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> > +{
> > +    /*
> > +     * Use pselect to sleep so that other threads can IPI us while we're
> > +     * sleeping.
> > +     */
> > +    qatomic_mb_set(&cpu->thread_kicked, false);
> > +    qemu_mutex_unlock_iothread();
> > +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
> > +    qemu_mutex_lock_iothread();
> > +}
> > +
> >   int hvf_vcpu_exec(CPUState *cpu)
> >   {
> >       ARMCPU *arm_cpu = ARM_CPU(cpu);
> > @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
> >       hv_return_t r;
> >       int ret = 0;
> >
> > -    qemu_mutex_unlock_iothread();
> > -
> >       do {
> >           bool advance_pc = false;
> >
> > -        qemu_mutex_lock_iothread();
> >           current_cpu = cpu;
> >           qemu_wait_io_event_common(cpu);
> > -        qemu_mutex_unlock_iothread();
> >
> >           flush_cpu_state(cpu);
> >
> > @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
> >           }
> >
> >           if (cpu->halted) {
> > -            qemu_mutex_lock_iothread();
> >               return EXCP_HLT;
> >           }
> >
> > +        qemu_mutex_unlock_iothread();
> >           assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
> >
> >           /* handle VMEXIT */
> > @@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
> >           uint64_t syndrome = hvf_exit->exception.syndrome;
> >           uint32_t ec = syn_get_ec(syndrome);
> >
> > +        qemu_mutex_lock_iothread();
> >           switch (exit_reason) {
> >           case HV_EXIT_REASON_EXCEPTION:
> >               /* This is the main one, handle below. */
> >               break;
> >           case HV_EXIT_REASON_VTIMER_ACTIVATED:
> > -            qemu_mutex_lock_iothread();
> >               current_cpu = cpu;
> >               qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
> > -            qemu_mutex_unlock_iothread();
> >               continue;
> >           case HV_EXIT_REASON_CANCELED:
> >               /* we got kicked, no exit to process */
> > @@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >               uint32_t srt = (syndrome >> 16) & 0x1f;
> >               uint64_t val = 0;
> >
> > -            qemu_mutex_lock_iothread();
> >               current_cpu = cpu;
> >
> >               DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
> > @@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >                   hvf_set_reg(cpu, srt, val);
> >               }
> >
> > -            qemu_mutex_unlock_iothread();
> > -
> >               advance_pc = true;
> >               break;
> >           }
> > @@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
> >           case EC_WFX_TRAP:
> >               if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
> >                   (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> > -                uint64_t cval, ctl, val, diff, now;
> > +                advance_pc = true;
> >
> > -                /* Set up a local timer for vtimer if necessary ... */
> > -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
> > -                assert_hvf_ok(r);
> > -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
> > +                uint64_t ctl;
> > +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
> > +                                        &ctl);
> >                   assert_hvf_ok(r);
> >
> > -                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
> > -                diff = cval - val;
> > -
> > -                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
> > -                      gt_cntfrq_period_ns(arm_cpu);
> > -
> > -                /* Timer disabled or masked, just wait for long */
> >                   if (!(ctl & 1) || (ctl & 2)) {
> > -                    diff = (120 * NANOSECONDS_PER_SECOND) /
> > -                           gt_cntfrq_period_ns(arm_cpu);
> > +                    /* Timer disabled or masked, just wait for an IPI. */
> > +                    hvf_wait_for_ipi(cpu, NULL);
> > +                    break;
> >                   }
> >
> > -                if (diff < INT64_MAX) {
> > -                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
> > -                    struct timespec *ts = &cpu->hvf->ts;
> > -
> > -                    *ts = (struct timespec){
> > -                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
> > -                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
> > -                    };
> > -
> > -                    /*
> > -                     * Waking up easily takes 1ms, don't go to sleep for smaller
> > -                     * time periods than 2ms.
> > -                     */
> > -                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) {
> > -                        advance_pc = true;
> > -                        break;
> > -                    }
> > -
> > -                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI signal. */
> > -                    cpu->hvf->sleeping = true;
> > -                    smp_mb();
> > -
> > -                    /* Bail out if we received an IRQ meanwhile */
> > -                    if (cpu->thread_kicked || (cpu->interrupt_request &
> > -                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> > -                        cpu->hvf->sleeping = false;
> > -                        break;
> > -                    }
> > -
> > -                    /* nanosleep returns on signal, so we wake up on kick. */
> > -                    nanosleep(ts, NULL);
> > -
> > -                    /* Out of sleep - either naturally or because of a kick */
> > -                    cpu->hvf->sleeping = false;
> > +                uint64_t cval;
> > +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
> > +                                        &cval);
> > +                assert_hvf_ok(r);
> > +
> > +                int64_t ticks_to_sleep = cval - mach_absolute_time();
>
>
> I think you touched based on it in a previous thread, but would you mind
> to explain again why mach_absolute_time() is the right thing to check
> cval against? If I read the headers correctly, the cnvt_off register
> should be 0, so cntvct should be the reference time, no?

In my experiments I've found that CNTPCT_EL0 and CNTVCT_EL0 are the
same when read on the host (i.e. host CNTVOFF_EL2 = 0). When we look
at the guest we see that CNTPCT_EL0 corresponds to
mach_absolute_time() on the host and not host CNTPCT_EL0 (if you look
at XNU kernel sources you will see that mach_absolute_time() reads
CNTPCT_EL0 and adds a constant corresponding to the amount of time
that the machine spends asleep) so I think that what's going on at the
hypervisor level is that guest CNTPOFF_EL2 is being set to the same
constant to make it correspond to mach_absolute_time().

> Also, can you please split the patch into one that I can squash into my
> existing one (remove WFI handling altogether), an individual one to
> revive the global io lock (happy to squash too unless you think it's
> useful to keep separate) and then this one?

Sure, I'll do that.

Peter


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
  2020-12-02  1:32   ` Peter Collingbourne
@ 2020-12-02  1:39     ` Alexander Graf
  2020-12-02  1:52       ` Peter Collingbourne
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Graf @ 2020-12-02  1:39 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Peter Maydell, Eduardo Habkost, Richard Henderson, qemu-devel,
	Cameron Esfahani, Roman Bolshakov, qemu-arm, Claudio Fontana,
	Frank Yang, Paolo Bonzini


On 02.12.20 02:32, Peter Collingbourne wrote:
> On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf <agraf@csgraf.de> wrote:
>>
>> On 01.12.20 22:00, Peter Collingbourne wrote:
>>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
>>> up on IPI.
>>>
>>> Signed-off-by: Peter Collingbourne <pcc@google.com>
>>> ---
>>> v2:
>>> - simplify locking further
>>> - wait indefinitely on disabled or masked timers
>>>
>>>    accel/hvf/hvf-cpus.c     |   5 +-
>>>    include/sysemu/hvf_int.h |   3 +-
>>>    target/arm/hvf/hvf.c     | 116 ++++++++++++++-------------------------
>>>    3 files changed, 43 insertions(+), 81 deletions(-)
>>>
>>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
>>> index 4360f64671..b2c8fb57f6 100644
>>> --- a/accel/hvf/hvf-cpus.c
>>> +++ b/accel/hvf/hvf-cpus.c
>>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
>>>        sigact.sa_handler = dummy_signal;
>>>        sigaction(SIG_IPI, &sigact, NULL);
>>>
>>> -    pthread_sigmask(SIG_BLOCK, NULL, &set);
>>> -    sigdelset(&set, SIG_IPI);
>>> -    pthread_sigmask(SIG_SETMASK, &set, NULL);
>>> +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
>>> +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
>>>
>>>    #ifdef __aarch64__
>>>        r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
>>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
>>> index c56baa3ae8..13adf6ea77 100644
>>> --- a/include/sysemu/hvf_int.h
>>> +++ b/include/sysemu/hvf_int.h
>>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
>>>    struct hvf_vcpu_state {
>>>        uint64_t fd;
>>>        void *exit;
>>> -    struct timespec ts;
>>> -    bool sleeping;
>>> +    sigset_t unblock_ipi_mask;
>>>    };
>>>
>>>    void assert_hvf_ok(hv_return_t ret);
>>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
>>> index 8fe10966d2..3321d48aa2 100644
>>> --- a/target/arm/hvf/hvf.c
>>> +++ b/target/arm/hvf/hvf.c
>>> @@ -2,6 +2,7 @@
>>>     * QEMU Hypervisor.framework support for Apple Silicon
>>>
>>>     * Copyright 2020 Alexander Graf <agraf@csgraf.de>
>>> + * Copyright 2020 Google LLC
>>>     *
>>>     * This work is licensed under the terms of the GNU GPL, version 2 or later.
>>>     * See the COPYING file in the top-level directory.
>>> @@ -18,6 +19,7 @@
>>>    #include "sysemu/hw_accel.h"
>>>
>>>    #include <Hypervisor/Hypervisor.h>
>>> +#include <mach/mach_time.h>
>>>
>>>    #include "exec/address-spaces.h"
>>>    #include "hw/irq.h"
>>> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
>>>
>>>    void hvf_kick_vcpu_thread(CPUState *cpu)
>>>    {
>>> -    if (cpu->hvf->sleeping) {
>>> -        /*
>>> -         * When sleeping, make sure we always send signals. Also, clear the
>>> -         * timespec, so that an IPI that arrives between setting hvf->sleeping
>>> -         * and the nanosleep syscall still aborts the sleep.
>>> -         */
>>> -        cpu->thread_kicked = false;
>>> -        cpu->hvf->ts = (struct timespec){ };
>>> -        cpus_kick_thread(cpu);
>>> -    } else {
>>> -        hv_vcpus_exit(&cpu->hvf->fd, 1);
>>> -    }
>>> +    cpus_kick_thread(cpu);
>>> +    hv_vcpus_exit(&cpu->hvf->fd, 1);
>>>    }
>>>
>>>    static int hvf_inject_interrupts(CPUState *cpu)
>>> @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
>>>        return 0;
>>>    }
>>>
>>> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
>>> +{
>>> +    /*
>>> +     * Use pselect to sleep so that other threads can IPI us while we're
>>> +     * sleeping.
>>> +     */
>>> +    qatomic_mb_set(&cpu->thread_kicked, false);
>>> +    qemu_mutex_unlock_iothread();
>>> +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
>>> +    qemu_mutex_lock_iothread();
>>> +}
>>> +
>>>    int hvf_vcpu_exec(CPUState *cpu)
>>>    {
>>>        ARMCPU *arm_cpu = ARM_CPU(cpu);
>>> @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>        hv_return_t r;
>>>        int ret = 0;
>>>
>>> -    qemu_mutex_unlock_iothread();
>>> -
>>>        do {
>>>            bool advance_pc = false;
>>>
>>> -        qemu_mutex_lock_iothread();
>>>            current_cpu = cpu;
>>>            qemu_wait_io_event_common(cpu);
>>> -        qemu_mutex_unlock_iothread();
>>>
>>>            flush_cpu_state(cpu);
>>>
>>> @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>            }
>>>
>>>            if (cpu->halted) {
>>> -            qemu_mutex_lock_iothread();
>>>                return EXCP_HLT;
>>>            }
>>>
>>> +        qemu_mutex_unlock_iothread();
>>>            assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
>>>
>>>            /* handle VMEXIT */
>>> @@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>            uint64_t syndrome = hvf_exit->exception.syndrome;
>>>            uint32_t ec = syn_get_ec(syndrome);
>>>
>>> +        qemu_mutex_lock_iothread();
>>>            switch (exit_reason) {
>>>            case HV_EXIT_REASON_EXCEPTION:
>>>                /* This is the main one, handle below. */
>>>                break;
>>>            case HV_EXIT_REASON_VTIMER_ACTIVATED:
>>> -            qemu_mutex_lock_iothread();
>>>                current_cpu = cpu;
>>>                qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
>>> -            qemu_mutex_unlock_iothread();
>>>                continue;
>>>            case HV_EXIT_REASON_CANCELED:
>>>                /* we got kicked, no exit to process */
>>> @@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>                uint32_t srt = (syndrome >> 16) & 0x1f;
>>>                uint64_t val = 0;
>>>
>>> -            qemu_mutex_lock_iothread();
>>>                current_cpu = cpu;
>>>
>>>                DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
>>> @@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>                    hvf_set_reg(cpu, srt, val);
>>>                }
>>>
>>> -            qemu_mutex_unlock_iothread();
>>> -
>>>                advance_pc = true;
>>>                break;
>>>            }
>>> @@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>            case EC_WFX_TRAP:
>>>                if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
>>>                    (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
>>> -                uint64_t cval, ctl, val, diff, now;
>>> +                advance_pc = true;
>>>
>>> -                /* Set up a local timer for vtimer if necessary ... */
>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
>>> -                assert_hvf_ok(r);
>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
>>> +                uint64_t ctl;
>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
>>> +                                        &ctl);
>>>                    assert_hvf_ok(r);
>>>
>>> -                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
>>> -                diff = cval - val;
>>> -
>>> -                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
>>> -                      gt_cntfrq_period_ns(arm_cpu);
>>> -
>>> -                /* Timer disabled or masked, just wait for long */
>>>                    if (!(ctl & 1) || (ctl & 2)) {
>>> -                    diff = (120 * NANOSECONDS_PER_SECOND) /
>>> -                           gt_cntfrq_period_ns(arm_cpu);
>>> +                    /* Timer disabled or masked, just wait for an IPI. */
>>> +                    hvf_wait_for_ipi(cpu, NULL);
>>> +                    break;
>>>                    }
>>>
>>> -                if (diff < INT64_MAX) {
>>> -                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
>>> -                    struct timespec *ts = &cpu->hvf->ts;
>>> -
>>> -                    *ts = (struct timespec){
>>> -                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
>>> -                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
>>> -                    };
>>> -
>>> -                    /*
>>> -                     * Waking up easily takes 1ms, don't go to sleep for smaller
>>> -                     * time periods than 2ms.
>>> -                     */
>>> -                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) {
>>> -                        advance_pc = true;
>>> -                        break;
>>> -                    }
>>> -
>>> -                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI signal. */
>>> -                    cpu->hvf->sleeping = true;
>>> -                    smp_mb();
>>> -
>>> -                    /* Bail out if we received an IRQ meanwhile */
>>> -                    if (cpu->thread_kicked || (cpu->interrupt_request &
>>> -                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
>>> -                        cpu->hvf->sleeping = false;
>>> -                        break;
>>> -                    }
>>> -
>>> -                    /* nanosleep returns on signal, so we wake up on kick. */
>>> -                    nanosleep(ts, NULL);
>>> -
>>> -                    /* Out of sleep - either naturally or because of a kick */
>>> -                    cpu->hvf->sleeping = false;
>>> +                uint64_t cval;
>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
>>> +                                        &cval);
>>> +                assert_hvf_ok(r);
>>> +
>>> +                int64_t ticks_to_sleep = cval - mach_absolute_time();
>>
>> I think you touched based on it in a previous thread, but would you mind
>> to explain again why mach_absolute_time() is the right thing to check
>> cval against? If I read the headers correctly, the cnvt_off register
>> should be 0, so cntvct should be the reference time, no?
> In my experiments I've found that CNTPCT_EL0 and CNTVCT_EL0 are the
> same when read on the host (i.e. host CNTVOFF_EL2 = 0). When we look
> at the guest we see that CNTPCT_EL0 corresponds to
> mach_absolute_time() on the host and not host CNTPCT_EL0 (if you look
> at XNU kernel sources you will see that mach_absolute_time() reads
> CNTPCT_EL0 and adds a constant corresponding to the amount of time
> that the machine spends asleep) so I think that what's going on at the
> hypervisor level is that guest CNTPOFF_EL2 is being set to the same
> constant to make it correspond to mach_absolute_time().


Yes, I can absolutely see how it's different from CNTPCT, but it should 
be identical to CNTVCT_EL0 inside QEMU, no?


Alex




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
  2020-12-02  1:39     ` Alexander Graf
@ 2020-12-02  1:52       ` Peter Collingbourne
  2020-12-02  1:54         ` Alexander Graf
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Collingbourne @ 2020-12-02  1:52 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Frank Yang, Roman Bolshakov, Peter Maydell, Eduardo Habkost,
	Richard Henderson, qemu-devel, Cameron Esfahani, qemu-arm,
	Claudio Fontana, Paolo Bonzini

On Tue, Dec 1, 2020 at 5:39 PM Alexander Graf <agraf@csgraf.de> wrote:
>
>
> On 02.12.20 02:32, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf <agraf@csgraf.de> wrote:
> >>
> >> On 01.12.20 22:00, Peter Collingbourne wrote:
> >>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> >>> up on IPI.
> >>>
> >>> Signed-off-by: Peter Collingbourne <pcc@google.com>
> >>> ---
> >>> v2:
> >>> - simplify locking further
> >>> - wait indefinitely on disabled or masked timers
> >>>
> >>>    accel/hvf/hvf-cpus.c     |   5 +-
> >>>    include/sysemu/hvf_int.h |   3 +-
> >>>    target/arm/hvf/hvf.c     | 116 ++++++++++++++-------------------------
> >>>    3 files changed, 43 insertions(+), 81 deletions(-)
> >>>
> >>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> >>> index 4360f64671..b2c8fb57f6 100644
> >>> --- a/accel/hvf/hvf-cpus.c
> >>> +++ b/accel/hvf/hvf-cpus.c
> >>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >>>        sigact.sa_handler = dummy_signal;
> >>>        sigaction(SIG_IPI, &sigact, NULL);
> >>>
> >>> -    pthread_sigmask(SIG_BLOCK, NULL, &set);
> >>> -    sigdelset(&set, SIG_IPI);
> >>> -    pthread_sigmask(SIG_SETMASK, &set, NULL);
> >>> +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
> >>> +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
> >>>
> >>>    #ifdef __aarch64__
> >>>        r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
> >>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> >>> index c56baa3ae8..13adf6ea77 100644
> >>> --- a/include/sysemu/hvf_int.h
> >>> +++ b/include/sysemu/hvf_int.h
> >>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >>>    struct hvf_vcpu_state {
> >>>        uint64_t fd;
> >>>        void *exit;
> >>> -    struct timespec ts;
> >>> -    bool sleeping;
> >>> +    sigset_t unblock_ipi_mask;
> >>>    };
> >>>
> >>>    void assert_hvf_ok(hv_return_t ret);
> >>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> >>> index 8fe10966d2..3321d48aa2 100644
> >>> --- a/target/arm/hvf/hvf.c
> >>> +++ b/target/arm/hvf/hvf.c
> >>> @@ -2,6 +2,7 @@
> >>>     * QEMU Hypervisor.framework support for Apple Silicon
> >>>
> >>>     * Copyright 2020 Alexander Graf <agraf@csgraf.de>
> >>> + * Copyright 2020 Google LLC
> >>>     *
> >>>     * This work is licensed under the terms of the GNU GPL, version 2 or later.
> >>>     * See the COPYING file in the top-level directory.
> >>> @@ -18,6 +19,7 @@
> >>>    #include "sysemu/hw_accel.h"
> >>>
> >>>    #include <Hypervisor/Hypervisor.h>
> >>> +#include <mach/mach_time.h>
> >>>
> >>>    #include "exec/address-spaces.h"
> >>>    #include "hw/irq.h"
> >>> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >>>
> >>>    void hvf_kick_vcpu_thread(CPUState *cpu)
> >>>    {
> >>> -    if (cpu->hvf->sleeping) {
> >>> -        /*
> >>> -         * When sleeping, make sure we always send signals. Also, clear the
> >>> -         * timespec, so that an IPI that arrives between setting hvf->sleeping
> >>> -         * and the nanosleep syscall still aborts the sleep.
> >>> -         */
> >>> -        cpu->thread_kicked = false;
> >>> -        cpu->hvf->ts = (struct timespec){ };
> >>> -        cpus_kick_thread(cpu);
> >>> -    } else {
> >>> -        hv_vcpus_exit(&cpu->hvf->fd, 1);
> >>> -    }
> >>> +    cpus_kick_thread(cpu);
> >>> +    hv_vcpus_exit(&cpu->hvf->fd, 1);
> >>>    }
> >>>
> >>>    static int hvf_inject_interrupts(CPUState *cpu)
> >>> @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >>>        return 0;
> >>>    }
> >>>
> >>> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> >>> +{
> >>> +    /*
> >>> +     * Use pselect to sleep so that other threads can IPI us while we're
> >>> +     * sleeping.
> >>> +     */
> >>> +    qatomic_mb_set(&cpu->thread_kicked, false);
> >>> +    qemu_mutex_unlock_iothread();
> >>> +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
> >>> +    qemu_mutex_lock_iothread();
> >>> +}
> >>> +
> >>>    int hvf_vcpu_exec(CPUState *cpu)
> >>>    {
> >>>        ARMCPU *arm_cpu = ARM_CPU(cpu);
> >>> @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>        hv_return_t r;
> >>>        int ret = 0;
> >>>
> >>> -    qemu_mutex_unlock_iothread();
> >>> -
> >>>        do {
> >>>            bool advance_pc = false;
> >>>
> >>> -        qemu_mutex_lock_iothread();
> >>>            current_cpu = cpu;
> >>>            qemu_wait_io_event_common(cpu);
> >>> -        qemu_mutex_unlock_iothread();
> >>>
> >>>            flush_cpu_state(cpu);
> >>>
> >>> @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>            }
> >>>
> >>>            if (cpu->halted) {
> >>> -            qemu_mutex_lock_iothread();
> >>>                return EXCP_HLT;
> >>>            }
> >>>
> >>> +        qemu_mutex_unlock_iothread();
> >>>            assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
> >>>
> >>>            /* handle VMEXIT */
> >>> @@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>            uint64_t syndrome = hvf_exit->exception.syndrome;
> >>>            uint32_t ec = syn_get_ec(syndrome);
> >>>
> >>> +        qemu_mutex_lock_iothread();
> >>>            switch (exit_reason) {
> >>>            case HV_EXIT_REASON_EXCEPTION:
> >>>                /* This is the main one, handle below. */
> >>>                break;
> >>>            case HV_EXIT_REASON_VTIMER_ACTIVATED:
> >>> -            qemu_mutex_lock_iothread();
> >>>                current_cpu = cpu;
> >>>                qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
> >>> -            qemu_mutex_unlock_iothread();
> >>>                continue;
> >>>            case HV_EXIT_REASON_CANCELED:
> >>>                /* we got kicked, no exit to process */
> >>> @@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>                uint32_t srt = (syndrome >> 16) & 0x1f;
> >>>                uint64_t val = 0;
> >>>
> >>> -            qemu_mutex_lock_iothread();
> >>>                current_cpu = cpu;
> >>>
> >>>                DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
> >>> @@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>                    hvf_set_reg(cpu, srt, val);
> >>>                }
> >>>
> >>> -            qemu_mutex_unlock_iothread();
> >>> -
> >>>                advance_pc = true;
> >>>                break;
> >>>            }
> >>> @@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>            case EC_WFX_TRAP:
> >>>                if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
> >>>                    (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> >>> -                uint64_t cval, ctl, val, diff, now;
> >>> +                advance_pc = true;
> >>>
> >>> -                /* Set up a local timer for vtimer if necessary ... */
> >>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
> >>> -                assert_hvf_ok(r);
> >>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
> >>> +                uint64_t ctl;
> >>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
> >>> +                                        &ctl);
> >>>                    assert_hvf_ok(r);
> >>>
> >>> -                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
> >>> -                diff = cval - val;
> >>> -
> >>> -                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
> >>> -                      gt_cntfrq_period_ns(arm_cpu);
> >>> -
> >>> -                /* Timer disabled or masked, just wait for long */
> >>>                    if (!(ctl & 1) || (ctl & 2)) {
> >>> -                    diff = (120 * NANOSECONDS_PER_SECOND) /
> >>> -                           gt_cntfrq_period_ns(arm_cpu);
> >>> +                    /* Timer disabled or masked, just wait for an IPI. */
> >>> +                    hvf_wait_for_ipi(cpu, NULL);
> >>> +                    break;
> >>>                    }
> >>>
> >>> -                if (diff < INT64_MAX) {
> >>> -                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
> >>> -                    struct timespec *ts = &cpu->hvf->ts;
> >>> -
> >>> -                    *ts = (struct timespec){
> >>> -                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
> >>> -                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
> >>> -                    };
> >>> -
> >>> -                    /*
> >>> -                     * Waking up easily takes 1ms, don't go to sleep for smaller
> >>> -                     * time periods than 2ms.
> >>> -                     */
> >>> -                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) {
> >>> -                        advance_pc = true;
> >>> -                        break;
> >>> -                    }
> >>> -
> >>> -                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI signal. */
> >>> -                    cpu->hvf->sleeping = true;
> >>> -                    smp_mb();
> >>> -
> >>> -                    /* Bail out if we received an IRQ meanwhile */
> >>> -                    if (cpu->thread_kicked || (cpu->interrupt_request &
> >>> -                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> >>> -                        cpu->hvf->sleeping = false;
> >>> -                        break;
> >>> -                    }
> >>> -
> >>> -                    /* nanosleep returns on signal, so we wake up on kick. */
> >>> -                    nanosleep(ts, NULL);
> >>> -
> >>> -                    /* Out of sleep - either naturally or because of a kick */
> >>> -                    cpu->hvf->sleeping = false;
> >>> +                uint64_t cval;
> >>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
> >>> +                                        &cval);
> >>> +                assert_hvf_ok(r);
> >>> +
> >>> +                int64_t ticks_to_sleep = cval - mach_absolute_time();
> >>
> >> I think you touched based on it in a previous thread, but would you mind
> >> to explain again why mach_absolute_time() is the right thing to check
> >> cval against? If I read the headers correctly, the cnvt_off register
> >> should be 0, so cntvct should be the reference time, no?
> > In my experiments I've found that CNTPCT_EL0 and CNTVCT_EL0 are the
> > same when read on the host (i.e. host CNTVOFF_EL2 = 0). When we look
> > at the guest we see that CNTPCT_EL0 corresponds to
> > mach_absolute_time() on the host and not host CNTPCT_EL0 (if you look
> > at XNU kernel sources you will see that mach_absolute_time() reads
> > CNTPCT_EL0 and adds a constant corresponding to the amount of time
> > that the machine spends asleep) so I think that what's going on at the
> > hypervisor level is that guest CNTPOFF_EL2 is being set to the same
> > constant to make it correspond to mach_absolute_time().
>
>
> Yes, I can absolutely see how it's different from CNTPCT, but it should
> be identical to CNTVCT_EL0 inside QEMU, no?

Not necessarily. It's possible for the guest to observe a different
CNTVCT_EL0 either because CNTPOFF_EL2 is set to a non-zero value or
because the hypervisor is secretly adding the "time spent asleep"
constant to CNTVOFF_EL2 on top of the value that we set it to via the
API.

Peter


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
  2020-12-02  1:52       ` Peter Collingbourne
@ 2020-12-02  1:54         ` Alexander Graf
  2020-12-02  1:57           ` Peter Collingbourne
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Graf @ 2020-12-02  1:54 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Peter Maydell, Eduardo Habkost, Richard Henderson, qemu-devel,
	Cameron Esfahani, Roman Bolshakov, qemu-arm, Claudio Fontana,
	Frank Yang, Paolo Bonzini


On 02.12.20 02:52, Peter Collingbourne wrote:
> On Tue, Dec 1, 2020 at 5:39 PM Alexander Graf <agraf@csgraf.de> wrote:
>>
>> On 02.12.20 02:32, Peter Collingbourne wrote:
>>> On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf <agraf@csgraf.de> wrote:
>>>> On 01.12.20 22:00, Peter Collingbourne wrote:
>>>>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
>>>>> up on IPI.
>>>>>
>>>>> Signed-off-by: Peter Collingbourne <pcc@google.com>
>>>>> ---
>>>>> v2:
>>>>> - simplify locking further
>>>>> - wait indefinitely on disabled or masked timers
>>>>>
>>>>>     accel/hvf/hvf-cpus.c     |   5 +-
>>>>>     include/sysemu/hvf_int.h |   3 +-
>>>>>     target/arm/hvf/hvf.c     | 116 ++++++++++++++-------------------------
>>>>>     3 files changed, 43 insertions(+), 81 deletions(-)
>>>>>
>>>>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
>>>>> index 4360f64671..b2c8fb57f6 100644
>>>>> --- a/accel/hvf/hvf-cpus.c
>>>>> +++ b/accel/hvf/hvf-cpus.c
>>>>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
>>>>>         sigact.sa_handler = dummy_signal;
>>>>>         sigaction(SIG_IPI, &sigact, NULL);
>>>>>
>>>>> -    pthread_sigmask(SIG_BLOCK, NULL, &set);
>>>>> -    sigdelset(&set, SIG_IPI);
>>>>> -    pthread_sigmask(SIG_SETMASK, &set, NULL);
>>>>> +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
>>>>> +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
>>>>>
>>>>>     #ifdef __aarch64__
>>>>>         r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
>>>>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
>>>>> index c56baa3ae8..13adf6ea77 100644
>>>>> --- a/include/sysemu/hvf_int.h
>>>>> +++ b/include/sysemu/hvf_int.h
>>>>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
>>>>>     struct hvf_vcpu_state {
>>>>>         uint64_t fd;
>>>>>         void *exit;
>>>>> -    struct timespec ts;
>>>>> -    bool sleeping;
>>>>> +    sigset_t unblock_ipi_mask;
>>>>>     };
>>>>>
>>>>>     void assert_hvf_ok(hv_return_t ret);
>>>>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
>>>>> index 8fe10966d2..3321d48aa2 100644
>>>>> --- a/target/arm/hvf/hvf.c
>>>>> +++ b/target/arm/hvf/hvf.c
>>>>> @@ -2,6 +2,7 @@
>>>>>      * QEMU Hypervisor.framework support for Apple Silicon
>>>>>
>>>>>      * Copyright 2020 Alexander Graf <agraf@csgraf.de>
>>>>> + * Copyright 2020 Google LLC
>>>>>      *
>>>>>      * This work is licensed under the terms of the GNU GPL, version 2 or later.
>>>>>      * See the COPYING file in the top-level directory.
>>>>> @@ -18,6 +19,7 @@
>>>>>     #include "sysemu/hw_accel.h"
>>>>>
>>>>>     #include <Hypervisor/Hypervisor.h>
>>>>> +#include <mach/mach_time.h>
>>>>>
>>>>>     #include "exec/address-spaces.h"
>>>>>     #include "hw/irq.h"
>>>>> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
>>>>>
>>>>>     void hvf_kick_vcpu_thread(CPUState *cpu)
>>>>>     {
>>>>> -    if (cpu->hvf->sleeping) {
>>>>> -        /*
>>>>> -         * When sleeping, make sure we always send signals. Also, clear the
>>>>> -         * timespec, so that an IPI that arrives between setting hvf->sleeping
>>>>> -         * and the nanosleep syscall still aborts the sleep.
>>>>> -         */
>>>>> -        cpu->thread_kicked = false;
>>>>> -        cpu->hvf->ts = (struct timespec){ };
>>>>> -        cpus_kick_thread(cpu);
>>>>> -    } else {
>>>>> -        hv_vcpus_exit(&cpu->hvf->fd, 1);
>>>>> -    }
>>>>> +    cpus_kick_thread(cpu);
>>>>> +    hv_vcpus_exit(&cpu->hvf->fd, 1);
>>>>>     }
>>>>>
>>>>>     static int hvf_inject_interrupts(CPUState *cpu)
>>>>> @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
>>>>>         return 0;
>>>>>     }
>>>>>
>>>>> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
>>>>> +{
>>>>> +    /*
>>>>> +     * Use pselect to sleep so that other threads can IPI us while we're
>>>>> +     * sleeping.
>>>>> +     */
>>>>> +    qatomic_mb_set(&cpu->thread_kicked, false);
>>>>> +    qemu_mutex_unlock_iothread();
>>>>> +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
>>>>> +    qemu_mutex_lock_iothread();
>>>>> +}
>>>>> +
>>>>>     int hvf_vcpu_exec(CPUState *cpu)
>>>>>     {
>>>>>         ARMCPU *arm_cpu = ARM_CPU(cpu);
>>>>> @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>>>         hv_return_t r;
>>>>>         int ret = 0;
>>>>>
>>>>> -    qemu_mutex_unlock_iothread();
>>>>> -
>>>>>         do {
>>>>>             bool advance_pc = false;
>>>>>
>>>>> -        qemu_mutex_lock_iothread();
>>>>>             current_cpu = cpu;
>>>>>             qemu_wait_io_event_common(cpu);
>>>>> -        qemu_mutex_unlock_iothread();
>>>>>
>>>>>             flush_cpu_state(cpu);
>>>>>
>>>>> @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>>>             }
>>>>>
>>>>>             if (cpu->halted) {
>>>>> -            qemu_mutex_lock_iothread();
>>>>>                 return EXCP_HLT;
>>>>>             }
>>>>>
>>>>> +        qemu_mutex_unlock_iothread();
>>>>>             assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
>>>>>
>>>>>             /* handle VMEXIT */
>>>>> @@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>>>             uint64_t syndrome = hvf_exit->exception.syndrome;
>>>>>             uint32_t ec = syn_get_ec(syndrome);
>>>>>
>>>>> +        qemu_mutex_lock_iothread();
>>>>>             switch (exit_reason) {
>>>>>             case HV_EXIT_REASON_EXCEPTION:
>>>>>                 /* This is the main one, handle below. */
>>>>>                 break;
>>>>>             case HV_EXIT_REASON_VTIMER_ACTIVATED:
>>>>> -            qemu_mutex_lock_iothread();
>>>>>                 current_cpu = cpu;
>>>>>                 qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
>>>>> -            qemu_mutex_unlock_iothread();
>>>>>                 continue;
>>>>>             case HV_EXIT_REASON_CANCELED:
>>>>>                 /* we got kicked, no exit to process */
>>>>> @@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>>>                 uint32_t srt = (syndrome >> 16) & 0x1f;
>>>>>                 uint64_t val = 0;
>>>>>
>>>>> -            qemu_mutex_lock_iothread();
>>>>>                 current_cpu = cpu;
>>>>>
>>>>>                 DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
>>>>> @@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>>>                     hvf_set_reg(cpu, srt, val);
>>>>>                 }
>>>>>
>>>>> -            qemu_mutex_unlock_iothread();
>>>>> -
>>>>>                 advance_pc = true;
>>>>>                 break;
>>>>>             }
>>>>> @@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>>>             case EC_WFX_TRAP:
>>>>>                 if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
>>>>>                     (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
>>>>> -                uint64_t cval, ctl, val, diff, now;
>>>>> +                advance_pc = true;
>>>>>
>>>>> -                /* Set up a local timer for vtimer if necessary ... */
>>>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
>>>>> -                assert_hvf_ok(r);
>>>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
>>>>> +                uint64_t ctl;
>>>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
>>>>> +                                        &ctl);
>>>>>                     assert_hvf_ok(r);
>>>>>
>>>>> -                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
>>>>> -                diff = cval - val;
>>>>> -
>>>>> -                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
>>>>> -                      gt_cntfrq_period_ns(arm_cpu);
>>>>> -
>>>>> -                /* Timer disabled or masked, just wait for long */
>>>>>                     if (!(ctl & 1) || (ctl & 2)) {
>>>>> -                    diff = (120 * NANOSECONDS_PER_SECOND) /
>>>>> -                           gt_cntfrq_period_ns(arm_cpu);
>>>>> +                    /* Timer disabled or masked, just wait for an IPI. */
>>>>> +                    hvf_wait_for_ipi(cpu, NULL);
>>>>> +                    break;
>>>>>                     }
>>>>>
>>>>> -                if (diff < INT64_MAX) {
>>>>> -                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
>>>>> -                    struct timespec *ts = &cpu->hvf->ts;
>>>>> -
>>>>> -                    *ts = (struct timespec){
>>>>> -                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
>>>>> -                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
>>>>> -                    };
>>>>> -
>>>>> -                    /*
>>>>> -                     * Waking up easily takes 1ms, don't go to sleep for smaller
>>>>> -                     * time periods than 2ms.
>>>>> -                     */
>>>>> -                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) {
>>>>> -                        advance_pc = true;
>>>>> -                        break;
>>>>> -                    }
>>>>> -
>>>>> -                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI signal. */
>>>>> -                    cpu->hvf->sleeping = true;
>>>>> -                    smp_mb();
>>>>> -
>>>>> -                    /* Bail out if we received an IRQ meanwhile */
>>>>> -                    if (cpu->thread_kicked || (cpu->interrupt_request &
>>>>> -                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
>>>>> -                        cpu->hvf->sleeping = false;
>>>>> -                        break;
>>>>> -                    }
>>>>> -
>>>>> -                    /* nanosleep returns on signal, so we wake up on kick. */
>>>>> -                    nanosleep(ts, NULL);
>>>>> -
>>>>> -                    /* Out of sleep - either naturally or because of a kick */
>>>>> -                    cpu->hvf->sleeping = false;
>>>>> +                uint64_t cval;
>>>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
>>>>> +                                        &cval);
>>>>> +                assert_hvf_ok(r);
>>>>> +
>>>>> +                int64_t ticks_to_sleep = cval - mach_absolute_time();
>>>> I think you touched based on it in a previous thread, but would you mind
>>>> to explain again why mach_absolute_time() is the right thing to check
>>>> cval against? If I read the headers correctly, the cnvt_off register
>>>> should be 0, so cntvct should be the reference time, no?
>>> In my experiments I've found that CNTPCT_EL0 and CNTVCT_EL0 are the
>>> same when read on the host (i.e. host CNTVOFF_EL2 = 0). When we look
>>> at the guest we see that CNTPCT_EL0 corresponds to
>>> mach_absolute_time() on the host and not host CNTPCT_EL0 (if you look
>>> at XNU kernel sources you will see that mach_absolute_time() reads
>>> CNTPCT_EL0 and adds a constant corresponding to the amount of time
>>> that the machine spends asleep) so I think that what's going on at the
>>> hypervisor level is that guest CNTPOFF_EL2 is being set to the same
>>> constant to make it correspond to mach_absolute_time().
>>
>> Yes, I can absolutely see how it's different from CNTPCT, but it should
>> be identical to CNTVCT_EL0 inside QEMU, no?
> Not necessarily. It's possible for the guest to observe a different
> CNTVCT_EL0 either because CNTPOFF_EL2 is set to a non-zero value or
> because the hypervisor is secretly adding the "time spent asleep"
> constant to CNTVOFF_EL2 on top of the value that we set it to via the
> API.


Yes, it could be doing the same thing with mach_absolute_time(). What 
I'm asking is whether calling that over just pulling CNTVCT directly 
gives you any benefit.


Alex




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling
  2020-12-02  1:54         ` Alexander Graf
@ 2020-12-02  1:57           ` Peter Collingbourne
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Collingbourne @ 2020-12-02  1:57 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Frank Yang, Roman Bolshakov, Peter Maydell, Eduardo Habkost,
	Richard Henderson, qemu-devel, Cameron Esfahani, qemu-arm,
	Claudio Fontana, Paolo Bonzini

On Tue, Dec 1, 2020 at 5:54 PM Alexander Graf <agraf@csgraf.de> wrote:
>
>
> On 02.12.20 02:52, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 5:39 PM Alexander Graf <agraf@csgraf.de> wrote:
> >>
> >> On 02.12.20 02:32, Peter Collingbourne wrote:
> >>> On Tue, Dec 1, 2020 at 3:24 PM Alexander Graf <agraf@csgraf.de> wrote:
> >>>> On 01.12.20 22:00, Peter Collingbourne wrote:
> >>>>> Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> >>>>> up on IPI.
> >>>>>
> >>>>> Signed-off-by: Peter Collingbourne <pcc@google.com>
> >>>>> ---
> >>>>> v2:
> >>>>> - simplify locking further
> >>>>> - wait indefinitely on disabled or masked timers
> >>>>>
> >>>>>     accel/hvf/hvf-cpus.c     |   5 +-
> >>>>>     include/sysemu/hvf_int.h |   3 +-
> >>>>>     target/arm/hvf/hvf.c     | 116 ++++++++++++++-------------------------
> >>>>>     3 files changed, 43 insertions(+), 81 deletions(-)
> >>>>>
> >>>>> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> >>>>> index 4360f64671..b2c8fb57f6 100644
> >>>>> --- a/accel/hvf/hvf-cpus.c
> >>>>> +++ b/accel/hvf/hvf-cpus.c
> >>>>> @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> >>>>>         sigact.sa_handler = dummy_signal;
> >>>>>         sigaction(SIG_IPI, &sigact, NULL);
> >>>>>
> >>>>> -    pthread_sigmask(SIG_BLOCK, NULL, &set);
> >>>>> -    sigdelset(&set, SIG_IPI);
> >>>>> -    pthread_sigmask(SIG_SETMASK, &set, NULL);
> >>>>> +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
> >>>>> +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
> >>>>>
> >>>>>     #ifdef __aarch64__
> >>>>>         r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
> >>>>> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> >>>>> index c56baa3ae8..13adf6ea77 100644
> >>>>> --- a/include/sysemu/hvf_int.h
> >>>>> +++ b/include/sysemu/hvf_int.h
> >>>>> @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> >>>>>     struct hvf_vcpu_state {
> >>>>>         uint64_t fd;
> >>>>>         void *exit;
> >>>>> -    struct timespec ts;
> >>>>> -    bool sleeping;
> >>>>> +    sigset_t unblock_ipi_mask;
> >>>>>     };
> >>>>>
> >>>>>     void assert_hvf_ok(hv_return_t ret);
> >>>>> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> >>>>> index 8fe10966d2..3321d48aa2 100644
> >>>>> --- a/target/arm/hvf/hvf.c
> >>>>> +++ b/target/arm/hvf/hvf.c
> >>>>> @@ -2,6 +2,7 @@
> >>>>>      * QEMU Hypervisor.framework support for Apple Silicon
> >>>>>
> >>>>>      * Copyright 2020 Alexander Graf <agraf@csgraf.de>
> >>>>> + * Copyright 2020 Google LLC
> >>>>>      *
> >>>>>      * This work is licensed under the terms of the GNU GPL, version 2 or later.
> >>>>>      * See the COPYING file in the top-level directory.
> >>>>> @@ -18,6 +19,7 @@
> >>>>>     #include "sysemu/hw_accel.h"
> >>>>>
> >>>>>     #include <Hypervisor/Hypervisor.h>
> >>>>> +#include <mach/mach_time.h>
> >>>>>
> >>>>>     #include "exec/address-spaces.h"
> >>>>>     #include "hw/irq.h"
> >>>>> @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >>>>>
> >>>>>     void hvf_kick_vcpu_thread(CPUState *cpu)
> >>>>>     {
> >>>>> -    if (cpu->hvf->sleeping) {
> >>>>> -        /*
> >>>>> -         * When sleeping, make sure we always send signals. Also, clear the
> >>>>> -         * timespec, so that an IPI that arrives between setting hvf->sleeping
> >>>>> -         * and the nanosleep syscall still aborts the sleep.
> >>>>> -         */
> >>>>> -        cpu->thread_kicked = false;
> >>>>> -        cpu->hvf->ts = (struct timespec){ };
> >>>>> -        cpus_kick_thread(cpu);
> >>>>> -    } else {
> >>>>> -        hv_vcpus_exit(&cpu->hvf->fd, 1);
> >>>>> -    }
> >>>>> +    cpus_kick_thread(cpu);
> >>>>> +    hv_vcpus_exit(&cpu->hvf->fd, 1);
> >>>>>     }
> >>>>>
> >>>>>     static int hvf_inject_interrupts(CPUState *cpu)
> >>>>> @@ -349,6 +341,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >>>>>         return 0;
> >>>>>     }
> >>>>>
> >>>>> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> >>>>> +{
> >>>>> +    /*
> >>>>> +     * Use pselect to sleep so that other threads can IPI us while we're
> >>>>> +     * sleeping.
> >>>>> +     */
> >>>>> +    qatomic_mb_set(&cpu->thread_kicked, false);
> >>>>> +    qemu_mutex_unlock_iothread();
> >>>>> +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
> >>>>> +    qemu_mutex_lock_iothread();
> >>>>> +}
> >>>>> +
> >>>>>     int hvf_vcpu_exec(CPUState *cpu)
> >>>>>     {
> >>>>>         ARMCPU *arm_cpu = ARM_CPU(cpu);
> >>>>> @@ -357,15 +361,11 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>         hv_return_t r;
> >>>>>         int ret = 0;
> >>>>>
> >>>>> -    qemu_mutex_unlock_iothread();
> >>>>> -
> >>>>>         do {
> >>>>>             bool advance_pc = false;
> >>>>>
> >>>>> -        qemu_mutex_lock_iothread();
> >>>>>             current_cpu = cpu;
> >>>>>             qemu_wait_io_event_common(cpu);
> >>>>> -        qemu_mutex_unlock_iothread();
> >>>>>
> >>>>>             flush_cpu_state(cpu);
> >>>>>
> >>>>> @@ -374,10 +374,10 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>             }
> >>>>>
> >>>>>             if (cpu->halted) {
> >>>>> -            qemu_mutex_lock_iothread();
> >>>>>                 return EXCP_HLT;
> >>>>>             }
> >>>>>
> >>>>> +        qemu_mutex_unlock_iothread();
> >>>>>             assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
> >>>>>
> >>>>>             /* handle VMEXIT */
> >>>>> @@ -385,15 +385,14 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>             uint64_t syndrome = hvf_exit->exception.syndrome;
> >>>>>             uint32_t ec = syn_get_ec(syndrome);
> >>>>>
> >>>>> +        qemu_mutex_lock_iothread();
> >>>>>             switch (exit_reason) {
> >>>>>             case HV_EXIT_REASON_EXCEPTION:
> >>>>>                 /* This is the main one, handle below. */
> >>>>>                 break;
> >>>>>             case HV_EXIT_REASON_VTIMER_ACTIVATED:
> >>>>> -            qemu_mutex_lock_iothread();
> >>>>>                 current_cpu = cpu;
> >>>>>                 qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
> >>>>> -            qemu_mutex_unlock_iothread();
> >>>>>                 continue;
> >>>>>             case HV_EXIT_REASON_CANCELED:
> >>>>>                 /* we got kicked, no exit to process */
> >>>>> @@ -413,7 +412,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>                 uint32_t srt = (syndrome >> 16) & 0x1f;
> >>>>>                 uint64_t val = 0;
> >>>>>
> >>>>> -            qemu_mutex_lock_iothread();
> >>>>>                 current_cpu = cpu;
> >>>>>
> >>>>>                 DPRINTF("data abort: [pc=0x%llx va=0x%016llx pa=0x%016llx isv=%x "
> >>>>> @@ -446,8 +444,6 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>                     hvf_set_reg(cpu, srt, val);
> >>>>>                 }
> >>>>>
> >>>>> -            qemu_mutex_unlock_iothread();
> >>>>> -
> >>>>>                 advance_pc = true;
> >>>>>                 break;
> >>>>>             }
> >>>>> @@ -493,68 +489,40 @@ int hvf_vcpu_exec(CPUState *cpu)
> >>>>>             case EC_WFX_TRAP:
> >>>>>                 if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
> >>>>>                     (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> >>>>> -                uint64_t cval, ctl, val, diff, now;
> >>>>> +                advance_pc = true;
> >>>>>
> >>>>> -                /* Set up a local timer for vtimer if necessary ... */
> >>>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
> >>>>> -                assert_hvf_ok(r);
> >>>>> -                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
> >>>>> +                uint64_t ctl;
> >>>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
> >>>>> +                                        &ctl);
> >>>>>                     assert_hvf_ok(r);
> >>>>>
> >>>>> -                asm volatile("mrs %0, cntvct_el0" : "=r"(val));
> >>>>> -                diff = cval - val;
> >>>>> -
> >>>>> -                now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
> >>>>> -                      gt_cntfrq_period_ns(arm_cpu);
> >>>>> -
> >>>>> -                /* Timer disabled or masked, just wait for long */
> >>>>>                     if (!(ctl & 1) || (ctl & 2)) {
> >>>>> -                    diff = (120 * NANOSECONDS_PER_SECOND) /
> >>>>> -                           gt_cntfrq_period_ns(arm_cpu);
> >>>>> +                    /* Timer disabled or masked, just wait for an IPI. */
> >>>>> +                    hvf_wait_for_ipi(cpu, NULL);
> >>>>> +                    break;
> >>>>>                     }
> >>>>>
> >>>>> -                if (diff < INT64_MAX) {
> >>>>> -                    uint64_t ns = diff * gt_cntfrq_period_ns(arm_cpu);
> >>>>> -                    struct timespec *ts = &cpu->hvf->ts;
> >>>>> -
> >>>>> -                    *ts = (struct timespec){
> >>>>> -                        .tv_sec = ns / NANOSECONDS_PER_SECOND,
> >>>>> -                        .tv_nsec = ns % NANOSECONDS_PER_SECOND,
> >>>>> -                    };
> >>>>> -
> >>>>> -                    /*
> >>>>> -                     * Waking up easily takes 1ms, don't go to sleep for smaller
> >>>>> -                     * time periods than 2ms.
> >>>>> -                     */
> >>>>> -                    if (!ts->tv_sec && (ts->tv_nsec < (SCALE_MS * 2))) {
> >>>>> -                        advance_pc = true;
> >>>>> -                        break;
> >>>>> -                    }
> >>>>> -
> >>>>> -                    /* Set cpu->hvf->sleeping so that we get a SIG_IPI signal. */
> >>>>> -                    cpu->hvf->sleeping = true;
> >>>>> -                    smp_mb();
> >>>>> -
> >>>>> -                    /* Bail out if we received an IRQ meanwhile */
> >>>>> -                    if (cpu->thread_kicked || (cpu->interrupt_request &
> >>>>> -                        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> >>>>> -                        cpu->hvf->sleeping = false;
> >>>>> -                        break;
> >>>>> -                    }
> >>>>> -
> >>>>> -                    /* nanosleep returns on signal, so we wake up on kick. */
> >>>>> -                    nanosleep(ts, NULL);
> >>>>> -
> >>>>> -                    /* Out of sleep - either naturally or because of a kick */
> >>>>> -                    cpu->hvf->sleeping = false;
> >>>>> +                uint64_t cval;
> >>>>> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
> >>>>> +                                        &cval);
> >>>>> +                assert_hvf_ok(r);
> >>>>> +
> >>>>> +                int64_t ticks_to_sleep = cval - mach_absolute_time();
> >>>> I think you touched based on it in a previous thread, but would you mind
> >>>> to explain again why mach_absolute_time() is the right thing to check
> >>>> cval against? If I read the headers correctly, the cnvt_off register
> >>>> should be 0, so cntvct should be the reference time, no?
> >>> In my experiments I've found that CNTPCT_EL0 and CNTVCT_EL0 are the
> >>> same when read on the host (i.e. host CNTVOFF_EL2 = 0). When we look
> >>> at the guest we see that CNTPCT_EL0 corresponds to
> >>> mach_absolute_time() on the host and not host CNTPCT_EL0 (if you look
> >>> at XNU kernel sources you will see that mach_absolute_time() reads
> >>> CNTPCT_EL0 and adds a constant corresponding to the amount of time
> >>> that the machine spends asleep) so I think that what's going on at the
> >>> hypervisor level is that guest CNTPOFF_EL2 is being set to the same
> >>> constant to make it correspond to mach_absolute_time().
> >>
> >> Yes, I can absolutely see how it's different from CNTPCT, but it should
> >> be identical to CNTVCT_EL0 inside QEMU, no?
> > Not necessarily. It's possible for the guest to observe a different
> > CNTVCT_EL0 either because CNTPOFF_EL2 is set to a non-zero value or
> > because the hypervisor is secretly adding the "time spent asleep"
> > constant to CNTVOFF_EL2 on top of the value that we set it to via the
> > API.
>
>
> Yes, it could be doing the same thing with mach_absolute_time(). What
> I'm asking is whether calling that over just pulling CNTVCT directly
> gives you any benefit.

If you run this program you will see that mach_absolute_time() may
give you a different result to CNTVCT.

pcc@pac-mini ~> cat time.c
#include <stdio.h>
#include <mach/mach_time.h>

int main() {
  uint64_t cntpct, cntvct;
  __asm__ __volatile__("mrs %0, cntpct_el0" : "=r"(cntpct));
  __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(cntvct));

  printf("cntpct = %lx cntvct = %lx mat = %lx\n", cntpct, cntvct,
mach_absolute_time());
}
pcc@pac-mini ~> ./time
cntpct = d714e2217d0 cntvct = d714e2217d0 mat = 9259a829290

Peter


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-12-02  2:04 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-01 21:00 [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Peter Collingbourne via
2020-12-01 21:00 ` [PATCH v2 2/2] arm/hvf: Stop setting current_cpu Peter Collingbourne via
2020-12-01 22:11   ` Alexander Graf
2020-12-02  0:13     ` Peter Collingbourne
2020-12-01 23:24 ` [PATCH v2 1/2] arm/hvf: Optimize and simplify WFI handling Alexander Graf
2020-12-02  1:32   ` Peter Collingbourne
2020-12-02  1:39     ` Alexander Graf
2020-12-02  1:52       ` Peter Collingbourne
2020-12-02  1:54         ` Alexander Graf
2020-12-02  1:57           ` Peter Collingbourne

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.