Xen-Devel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
@ 2019-05-24 18:12 Andrii Anisov
  2019-05-24 18:12 ` [Xen-devel] " Andrii Anisov
                   ` (4 more replies)
  0 siblings, 5 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-24 18:12 UTC (permalink / raw)
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, Jan Beulich, xen-devel, Wei Liu

From: Andrii Anisov <andrii_anisov@epam.com>

An RFC version of the runstate registration with phys address.
Runstate area access is implemented with mapping on each update once for
all accesses.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/domain.c     |  63 ++++++++++++++++++++++++++---
 xen/common/domain.c       | 101 ++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/public/vcpu.h |  15 +++++++
 xen/include/xen/sched.h   |  28 +++++++++----
 4 files changed, 190 insertions(+), 17 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index a9f7ff5..04c4cff 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
     virt_timer_restore(n);
 }
 
-/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+static void update_runstate_by_gvaddr(struct vcpu *v)
 {
     void __user *guest_handle = NULL;
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
 
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
+        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
         guest_handle--;
         v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
         smp_wmb();
     }
 
-    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
 
     if ( guest_handle )
     {
@@ -303,6 +301,58 @@ static void update_runstate_area(struct vcpu *v)
     }
 }
 
+extern int map_runstate_area(struct vcpu *v, struct vcpu_runstate_info **area);
+extern void unmap_runstate_area(struct vcpu_runstate_info *area);
+
+static void update_runstate_by_gpaddr(struct vcpu *v)
+{
+    struct vcpu_runstate_info *runstate;
+
+    if ( map_runstate_area(v, &runstate) )
+        return;
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+    }
+
+    memcpy(runstate, &v->runstate, sizeof(v->runstate));
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+    }
+
+    unmap_runstate_area(runstate);
+}
+
+/* Update per-VCPU guest runstate shared memory area (if registered). */
+static void update_runstate_area(struct vcpu *v)
+{
+    if ( xchg(&v->runstate_in_use, 1) )
+        return;
+
+    switch ( v->runstate_guest_type )
+    {
+    case RUNSTATE_NONE:
+       break;
+
+    case RUNSTATE_VADDR:
+       update_runstate_by_gvaddr(v);
+       break;
+
+    case RUNSTATE_PADDR:
+       update_runstate_by_gpaddr(v);
+       break;
+    }
+
+    xchg(&v->runstate_in_use, 0);
+}
+
 static void schedule_tail(struct vcpu *prev)
 {
     ctxt_switch_from(prev);
@@ -998,6 +1048,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) a
     {
         case VCPUOP_register_vcpu_info:
         case VCPUOP_register_runstate_memory_area:
+        case VCPUOP_register_runstate_phys_memory_area:
             return do_vcpu_op(cmd, vcpuid, arg);
         default:
             return -EINVAL;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 32bca8d..f167a68 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -700,6 +700,68 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
     return 0;
 }
 
+void unmap_runstate_area(struct vcpu_runstate_info *area)
+{
+    mfn_t mfn;
+
+    ASSERT(area != NULL);
+
+    mfn = _mfn(domain_page_map_to_mfn(area));
+
+    unmap_domain_page_global((void *)
+                             ((unsigned long)area &
+                              PAGE_MASK));
+
+    put_page_and_type(mfn_to_page(mfn));
+}
+
+int map_runstate_area(struct vcpu *v, struct vcpu_runstate_info **area)
+{
+    unsigned long offset = v->runstate_guest.phys & ~PAGE_MASK;
+    gfn_t gfn = gaddr_to_gfn(v->runstate_guest.phys);
+    struct domain *d = v->domain;
+    void *mapping;
+    struct page_info *page;
+    size_t size = sizeof(struct vcpu_runstate_info);
+
+    if ( offset > (PAGE_SIZE - size) )
+        return -EINVAL;
+
+    page = get_page_from_gfn(d, gfn_x(gfn), NULL, P2M_ALLOC);
+    if ( !page )
+        return -EINVAL;
+
+    if ( !get_page_type(page, PGT_writable_page) )
+    {
+        put_page(page);
+        return -EINVAL;
+    }
+
+    mapping = __map_domain_page_global(page);
+
+    if ( mapping == NULL )
+    {
+        put_page_and_type(page);
+        return -ENOMEM;
+    }
+
+    *area = mapping + offset;
+
+    return 0;
+}
+
+static void discard_runstate_area(struct vcpu *v)
+{
+    v->runstate_guest_type = RUNSTATE_NONE;
+}
+
+static void discard_runstate_area_locked(struct vcpu *v)
+{
+    while ( xchg(&v->runstate_in_use, 1) );
+    discard_runstate_area(v);
+    xchg(&v->runstate_in_use, 0);
+}
+
 int domain_kill(struct domain *d)
 {
     int rc = 0;
@@ -738,7 +800,10 @@ int domain_kill(struct domain *d)
         if ( cpupool_move_domain(d, cpupool0) )
             return -ERESTART;
         for_each_vcpu ( d, v )
+        {
+            discard_runstate_area_locked(v);
             unmap_vcpu_info(v);
+        }
         d->is_dying = DOMDYING_dead;
         /* Mem event cleanup has to go here because the rings 
          * have to be put before we call put_domain. */
@@ -1192,7 +1257,7 @@ int domain_soft_reset(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        set_xen_guest_handle(runstate_guest(v), NULL);
+        discard_runstate_area_locked(v);
         unmap_vcpu_info(v);
     }
 
@@ -1520,18 +1585,46 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
             break;
 
         rc = 0;
-        runstate_guest(v) = area.addr.h;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
+        runstate_guest_virt(v) = area.addr.h;
+        v->runstate_guest_type = RUNSTATE_VADDR;
 
         if ( v == current )
         {
-            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+            __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
         }
         else
         {
             vcpu_runstate_get(v, &runstate);
-            __copy_to_guest(runstate_guest(v), &runstate, 1);
+            __copy_to_guest(runstate_guest_virt(v), &runstate, 1);
         }
 
+        xchg(&v->runstate_in_use, 0);
+
+        break;
+    }
+
+    case VCPUOP_register_runstate_phys_memory_area:
+    {
+        struct vcpu_register_runstate_memory_area area;
+
+        rc = -EFAULT;
+        if ( copy_from_guest(&area, arg, 1) )
+             break;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+        v->runstate_guest.phys = area.addr.p;
+        v->runstate_guest_type = RUNSTATE_PADDR;
+
+        xchg(&v->runstate_in_use, 0);
+        rc = 0;
+
         break;
     }
 
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index 3623af9..d7da4a3 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -235,6 +235,21 @@ struct vcpu_register_time_memory_area {
 typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
 
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address must be guest's physical address.
+ *  2. The registered runstate area should not cross page boundary.
+ *  3. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_phys_memory_area 14
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index edee52d..8ac597b 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -163,17 +163,31 @@ struct vcpu
     void            *sched_priv;    /* scheduler-specific data */
 
     struct vcpu_runstate_info runstate;
+
+    enum {
+        RUNSTATE_NONE = 0,
+        RUNSTATE_PADDR = 1,
+        RUNSTATE_VADDR = 2,
+    } runstate_guest_type;
+
+    unsigned long runstate_in_use;
+
+    union
+    {
 #ifndef CONFIG_COMPAT
-# define runstate_guest(v) ((v)->runstate_guest)
-    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt)
+           XEN_GUEST_HANDLE(vcpu_runstate_info_t) virt; /* guest address */
 #else
-# define runstate_guest(v) ((v)->runstate_guest.native)
-    union {
-        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
-        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
-    } runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt.native)
+           union {
+               XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
+               XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
+           } virt; /* guest address */
 #endif
 
+        paddr_t phys;
+    } runstate_guest;
+
     /* last time when vCPU is scheduled out */
     uint64_t last_run_time;
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [Xen-devel] [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-24 18:12 [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
@ 2019-05-24 18:12 ` " Andrii Anisov
  2019-05-24 18:12 ` [PATCH v3] Introduce runstate area registration with phys address Andrii Anisov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-24 18:12 UTC (permalink / raw)
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, Jan Beulich, xen-devel, Wei Liu

From: Andrii Anisov <andrii_anisov@epam.com>

An RFC version of the runstate registration with phys address.
Runstate area access is implemented with mapping on each update once for
all accesses.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/domain.c     |  63 ++++++++++++++++++++++++++---
 xen/common/domain.c       | 101 ++++++++++++++++++++++++++++++++++++++++++++--
 xen/include/public/vcpu.h |  15 +++++++
 xen/include/xen/sched.h   |  28 +++++++++----
 4 files changed, 190 insertions(+), 17 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index a9f7ff5..04c4cff 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
     virt_timer_restore(n);
 }
 
-/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+static void update_runstate_by_gvaddr(struct vcpu *v)
 {
     void __user *guest_handle = NULL;
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
 
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
+        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
         guest_handle--;
         v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
         smp_wmb();
     }
 
-    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
 
     if ( guest_handle )
     {
@@ -303,6 +301,58 @@ static void update_runstate_area(struct vcpu *v)
     }
 }
 
+extern int map_runstate_area(struct vcpu *v, struct vcpu_runstate_info **area);
+extern void unmap_runstate_area(struct vcpu_runstate_info *area);
+
+static void update_runstate_by_gpaddr(struct vcpu *v)
+{
+    struct vcpu_runstate_info *runstate;
+
+    if ( map_runstate_area(v, &runstate) )
+        return;
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+    }
+
+    memcpy(runstate, &v->runstate, sizeof(v->runstate));
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+    }
+
+    unmap_runstate_area(runstate);
+}
+
+/* Update per-VCPU guest runstate shared memory area (if registered). */
+static void update_runstate_area(struct vcpu *v)
+{
+    if ( xchg(&v->runstate_in_use, 1) )
+        return;
+
+    switch ( v->runstate_guest_type )
+    {
+    case RUNSTATE_NONE:
+       break;
+
+    case RUNSTATE_VADDR:
+       update_runstate_by_gvaddr(v);
+       break;
+
+    case RUNSTATE_PADDR:
+       update_runstate_by_gpaddr(v);
+       break;
+    }
+
+    xchg(&v->runstate_in_use, 0);
+}
+
 static void schedule_tail(struct vcpu *prev)
 {
     ctxt_switch_from(prev);
@@ -998,6 +1048,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) a
     {
         case VCPUOP_register_vcpu_info:
         case VCPUOP_register_runstate_memory_area:
+        case VCPUOP_register_runstate_phys_memory_area:
             return do_vcpu_op(cmd, vcpuid, arg);
         default:
             return -EINVAL;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 32bca8d..f167a68 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -700,6 +700,68 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
     return 0;
 }
 
+void unmap_runstate_area(struct vcpu_runstate_info *area)
+{
+    mfn_t mfn;
+
+    ASSERT(area != NULL);
+
+    mfn = _mfn(domain_page_map_to_mfn(area));
+
+    unmap_domain_page_global((void *)
+                             ((unsigned long)area &
+                              PAGE_MASK));
+
+    put_page_and_type(mfn_to_page(mfn));
+}
+
+int map_runstate_area(struct vcpu *v, struct vcpu_runstate_info **area)
+{
+    unsigned long offset = v->runstate_guest.phys & ~PAGE_MASK;
+    gfn_t gfn = gaddr_to_gfn(v->runstate_guest.phys);
+    struct domain *d = v->domain;
+    void *mapping;
+    struct page_info *page;
+    size_t size = sizeof(struct vcpu_runstate_info);
+
+    if ( offset > (PAGE_SIZE - size) )
+        return -EINVAL;
+
+    page = get_page_from_gfn(d, gfn_x(gfn), NULL, P2M_ALLOC);
+    if ( !page )
+        return -EINVAL;
+
+    if ( !get_page_type(page, PGT_writable_page) )
+    {
+        put_page(page);
+        return -EINVAL;
+    }
+
+    mapping = __map_domain_page_global(page);
+
+    if ( mapping == NULL )
+    {
+        put_page_and_type(page);
+        return -ENOMEM;
+    }
+
+    *area = mapping + offset;
+
+    return 0;
+}
+
+static void discard_runstate_area(struct vcpu *v)
+{
+    v->runstate_guest_type = RUNSTATE_NONE;
+}
+
+static void discard_runstate_area_locked(struct vcpu *v)
+{
+    while ( xchg(&v->runstate_in_use, 1) );
+    discard_runstate_area(v);
+    xchg(&v->runstate_in_use, 0);
+}
+
 int domain_kill(struct domain *d)
 {
     int rc = 0;
@@ -738,7 +800,10 @@ int domain_kill(struct domain *d)
         if ( cpupool_move_domain(d, cpupool0) )
             return -ERESTART;
         for_each_vcpu ( d, v )
+        {
+            discard_runstate_area_locked(v);
             unmap_vcpu_info(v);
+        }
         d->is_dying = DOMDYING_dead;
         /* Mem event cleanup has to go here because the rings 
          * have to be put before we call put_domain. */
@@ -1192,7 +1257,7 @@ int domain_soft_reset(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        set_xen_guest_handle(runstate_guest(v), NULL);
+        discard_runstate_area_locked(v);
         unmap_vcpu_info(v);
     }
 
@@ -1520,18 +1585,46 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
             break;
 
         rc = 0;
-        runstate_guest(v) = area.addr.h;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
+        runstate_guest_virt(v) = area.addr.h;
+        v->runstate_guest_type = RUNSTATE_VADDR;
 
         if ( v == current )
         {
-            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+            __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
         }
         else
         {
             vcpu_runstate_get(v, &runstate);
-            __copy_to_guest(runstate_guest(v), &runstate, 1);
+            __copy_to_guest(runstate_guest_virt(v), &runstate, 1);
         }
 
+        xchg(&v->runstate_in_use, 0);
+
+        break;
+    }
+
+    case VCPUOP_register_runstate_phys_memory_area:
+    {
+        struct vcpu_register_runstate_memory_area area;
+
+        rc = -EFAULT;
+        if ( copy_from_guest(&area, arg, 1) )
+             break;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+        v->runstate_guest.phys = area.addr.p;
+        v->runstate_guest_type = RUNSTATE_PADDR;
+
+        xchg(&v->runstate_in_use, 0);
+        rc = 0;
+
         break;
     }
 
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index 3623af9..d7da4a3 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -235,6 +235,21 @@ struct vcpu_register_time_memory_area {
 typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
 
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address must be guest's physical address.
+ *  2. The registered runstate area should not cross page boundary.
+ *  3. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_phys_memory_area 14
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index edee52d..8ac597b 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -163,17 +163,31 @@ struct vcpu
     void            *sched_priv;    /* scheduler-specific data */
 
     struct vcpu_runstate_info runstate;
+
+    enum {
+        RUNSTATE_NONE = 0,
+        RUNSTATE_PADDR = 1,
+        RUNSTATE_VADDR = 2,
+    } runstate_guest_type;
+
+    unsigned long runstate_in_use;
+
+    union
+    {
 #ifndef CONFIG_COMPAT
-# define runstate_guest(v) ((v)->runstate_guest)
-    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt)
+           XEN_GUEST_HANDLE(vcpu_runstate_info_t) virt; /* guest address */
 #else
-# define runstate_guest(v) ((v)->runstate_guest.native)
-    union {
-        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
-        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
-    } runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt.native)
+           union {
+               XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
+               XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
+           } virt; /* guest address */
 #endif
 
+        paddr_t phys;
+    } runstate_guest;
+
     /* last time when vCPU is scheduled out */
     uint64_t last_run_time;
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH v3] Introduce runstate area registration with phys address
  2019-05-24 18:12 [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
  2019-05-24 18:12 ` [Xen-devel] " Andrii Anisov
@ 2019-05-24 18:12 ` Andrii Anisov
  2019-05-24 18:12   ` [Xen-devel] " Andrii Anisov
  2019-05-24 18:12 ` [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-05-24 18:12 UTC (permalink / raw)
  To: xen-devel; +Cc: Julien Grall, Andrii Anisov, Jan Beulich, Roger Pau Monné

From: Andrii Anisov <andrii_anisov@epam.com>

Following discussion [1] it is introduced and implemented a runstate
registration interface which uses guest's phys address instead of a virtual one.
The new hypercall employes the same data structures as a predecessor, but
expects the vcpu_runstate_info structure to not cross a page boundary.
The interface is implemented in a way vcpu_runstate_info structure is mapped to
the hypervisor on the hypercall processing and is directly accessed during its
updates. This runstate area mapping follows vcpu_info structure registration.

Permanent mapping of runstate area would consume vmap area on arm32 what is
limited to 1G. Though it might be OK because it would be possible to increase 
the ARM32 virtual address area by reworking the address space. 

The series is tested for ARM64. Build tested for x86. I'd appreciate if someone
could check it with x86.
The Linux kernel patch is here [2]. Though it is for 4.14. It is not still
convinced the absolute correctness of that patch, yet this should be better
aligned with linux community.

Changes in:

  v3: This version again implements runstate mapping on init approach.
      Patches are squashed and refactored in order to not allow virt and phys
      interfaces function simultaneously but replace one another on init.
      In order to measure performance impact of permanent mapping vs mapping on
      access there written two RFC patches which follow mapping on access
      approach with the little difference: 
       - RFC 1 - using copy_to_guest_phys_flush_dcache() for each access to
         runstate area.
       - RFC 2 - mapping runstate area before all update manipulations and unmap
         after.

      RFC patches were implemented for ARM only, because performance measurements
      were done on ARM64 machine.

      There were made performance measurements of approaches (runstate mapped on
      access vs mapped on registration). The test setups are as following:
     
      Thin Dom0 (Linux with intiramfs) with DomD running rich Yocto Linux. In
      DomD 3d benchmark numbers are compared. The benchmark is GlMark2. GLMark2
      is ran with different resolutions in order to emit different irq load, 
      where 320x240 emits high IRQ load, but 1920x1080 emits low irq load.
      Separately tested baking DomD benchmark run with primitive Dom0 CPU burn
      (dd), in order to stimulate VCPU(dX)->VCPU(dY) switches rather than
      VCPU(dX)->idle->VCPU(dX).
      with following results:

                            mapped         mapped         mapped
                            on init        on access      on update
                                           RFC 1          RFC 2
      GLMark2 320x240       2906           2856 (-2%)     2903 (0)
          +Dom0 CPUBurn     2166           2106 (-3%)     2134 (1%)
      GLMark2 800x600       2396           2367 (-1%)     2393 (0%)
          +Dom0 CPUBurn     1958           1911 (-2%)     1942 (-1%)
      GLMark2 1920x1080     939            936  (0%)      935  (0%)
          +Dom0 CPUBurn     909            901  (-1%)     907  (0%)

      Also it was checked IRQ latency difference using TBM in a setup similar to
      [5]. Please note that the IRQ rate is one in 30 seconds, and only
      VCPU->idle->VCPU use-case is considered. With following results (in ns,
      the timer granularity 120ns):

      mapped on init:
          max=10080 warm_max=8760 min=6600 avg=6699
      mapped on update (RFC1):
          max=10440 warm_max=7560 min=7320 avg=7419
      mapped on access (RFC2)
          max=11520 warm_max=7920 min=7200 avg=7299

  v2: It was reconsidered the new runstate interface implementation. The new 
      interface is made independent of the old one. Do not share runstate_area
      field, and consequently avoid excessive concurrency with the old runstate
      interface usage.
      Introduced locks in order to resolve possible concurrency between runstate
      area registration and usage. 
      Addressed comments from Jan Beulich [3][4] about coding style nits. Though
      some of them become obsolete with refactoring and few are picked into this
      thread for further discussion.

      There were made performance measurements of approaches (runstate mapped on
      access vs mapped on registration). The test setups are as following:
     
      Thin Dom0 (Linux with intiramfs) with DomD running rich Yocto Linux. In
      DomD 3d benchmark numbers are compared. The benchmark is GlMark2. GLMark2
      is ran with different resolutions in order to emit different irq load, 
      where 320x240 emits high IRQ load, but 1920x1080 emits low irq load.
      Separately tested baking DomD benchmark run with primitive Dom0 CPU burn
      (dd), in order to stimulate VCPU(dX)->VCPU(dY) switches rather than
      VCPU(dX)->idle->VCPU(dX).
      with following results:

                            mapped               mapped
                            on access            on init
      GLMark2 320x240       2852                 2877          +0.8%
          +Dom0 CPUBurn     2088                 2094          +0.2%
      GLMark2 800x600       2368                 2375          +0.3%
          +Dom0 CPUBurn     1868                 1921          +2.8%
      GLMark2 1920x1080     931                  931            0%
          +Dom0 CPUBurn     892                  894           +0.2%

      Please note that "mapped on access" means using the old runstate
      registering interface. And runstate update in this case still often fails
      to map runstate area like [5], despite the fact that our Linux kernel
      does not have KPTI enabled. So runstate area update, in this case, is
      really shortened.


      Also it was checked IRQ latency difference using TBM in a setup similar to
      [5]. Please note that the IRQ rate is one in 30 seconds, and only
      VCPU->idle->VCPU use-case is considered. With following results (in ns,
      the timer granularity 120ns):

      mapped on access:
          max=9960 warm_max=8640 min=7200 avg=7626
      mapped on init:
          max=9480 warm_max=8400 min=7080 avg=7341

      Unfortunately there are no consitent results yet from profiling using
      Lauterbach PowerTrace. Still in communication with the tracer vendor in
      order to setup the proper configuration.



[1] https://lists.xenproject.org/archives/html/xen-devel/2019-02/msg00416.html
[2] https://github.com/aanisov/linux/commit/ba34d2780f57ea43f81810cd695aace7b55c0f29
[3] https://lists.xenproject.org/archives/html/xen-devel/2019-03/msg00936.html
[4] https://lists.xenproject.org/archives/html/xen-devel/2019-03/msg00934.html
[5] https://lists.xenproject.org/archives/html/xen-devel/2019-01/msg02369.html
[6] https://lists.xenproject.org/archives/html/xen-devel/2018-12/msg02297.html


Andrii Anisov (1):
  xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall

 xen/arch/arm/domain.c        |  58 ++++++++++++++++++---
 xen/arch/x86/domain.c        |  99 ++++++++++++++++++++++++++++++++---
 xen/arch/x86/x86_64/domain.c |  16 +++++-
 xen/common/domain.c          | 121 +++++++++++++++++++++++++++++++++++++++----
 xen/include/public/vcpu.h    |  15 ++++++
 xen/include/xen/sched.h      |  28 +++++++---
 6 files changed, 306 insertions(+), 31 deletions(-)

-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [Xen-devel] [PATCH v3] Introduce runstate area registration with phys address
  2019-05-24 18:12 ` [PATCH v3] Introduce runstate area registration with phys address Andrii Anisov
@ 2019-05-24 18:12   ` " Andrii Anisov
  0 siblings, 0 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-24 18:12 UTC (permalink / raw)
  To: xen-devel; +Cc: Julien Grall, Andrii Anisov, Jan Beulich, Roger Pau Monné

From: Andrii Anisov <andrii_anisov@epam.com>

Following discussion [1] it is introduced and implemented a runstate
registration interface which uses guest's phys address instead of a virtual one.
The new hypercall employes the same data structures as a predecessor, but
expects the vcpu_runstate_info structure to not cross a page boundary.
The interface is implemented in a way vcpu_runstate_info structure is mapped to
the hypervisor on the hypercall processing and is directly accessed during its
updates. This runstate area mapping follows vcpu_info structure registration.

Permanent mapping of runstate area would consume vmap area on arm32 what is
limited to 1G. Though it might be OK because it would be possible to increase 
the ARM32 virtual address area by reworking the address space. 

The series is tested for ARM64. Build tested for x86. I'd appreciate if someone
could check it with x86.
The Linux kernel patch is here [2]. Though it is for 4.14. It is not still
convinced the absolute correctness of that patch, yet this should be better
aligned with linux community.

Changes in:

  v3: This version again implements runstate mapping on init approach.
      Patches are squashed and refactored in order to not allow virt and phys
      interfaces function simultaneously but replace one another on init.
      In order to measure performance impact of permanent mapping vs mapping on
      access there written two RFC patches which follow mapping on access
      approach with the little difference: 
       - RFC 1 - using copy_to_guest_phys_flush_dcache() for each access to
         runstate area.
       - RFC 2 - mapping runstate area before all update manipulations and unmap
         after.

      RFC patches were implemented for ARM only, because performance measurements
      were done on ARM64 machine.

      There were made performance measurements of approaches (runstate mapped on
      access vs mapped on registration). The test setups are as following:
     
      Thin Dom0 (Linux with intiramfs) with DomD running rich Yocto Linux. In
      DomD 3d benchmark numbers are compared. The benchmark is GlMark2. GLMark2
      is ran with different resolutions in order to emit different irq load, 
      where 320x240 emits high IRQ load, but 1920x1080 emits low irq load.
      Separately tested baking DomD benchmark run with primitive Dom0 CPU burn
      (dd), in order to stimulate VCPU(dX)->VCPU(dY) switches rather than
      VCPU(dX)->idle->VCPU(dX).
      with following results:

                            mapped         mapped         mapped
                            on init        on access      on update
                                           RFC 1          RFC 2
      GLMark2 320x240       2906           2856 (-2%)     2903 (0)
          +Dom0 CPUBurn     2166           2106 (-3%)     2134 (1%)
      GLMark2 800x600       2396           2367 (-1%)     2393 (0%)
          +Dom0 CPUBurn     1958           1911 (-2%)     1942 (-1%)
      GLMark2 1920x1080     939            936  (0%)      935  (0%)
          +Dom0 CPUBurn     909            901  (-1%)     907  (0%)

      Also it was checked IRQ latency difference using TBM in a setup similar to
      [5]. Please note that the IRQ rate is one in 30 seconds, and only
      VCPU->idle->VCPU use-case is considered. With following results (in ns,
      the timer granularity 120ns):

      mapped on init:
          max=10080 warm_max=8760 min=6600 avg=6699
      mapped on update (RFC1):
          max=10440 warm_max=7560 min=7320 avg=7419
      mapped on access (RFC2)
          max=11520 warm_max=7920 min=7200 avg=7299

  v2: It was reconsidered the new runstate interface implementation. The new 
      interface is made independent of the old one. Do not share runstate_area
      field, and consequently avoid excessive concurrency with the old runstate
      interface usage.
      Introduced locks in order to resolve possible concurrency between runstate
      area registration and usage. 
      Addressed comments from Jan Beulich [3][4] about coding style nits. Though
      some of them become obsolete with refactoring and few are picked into this
      thread for further discussion.

      There were made performance measurements of approaches (runstate mapped on
      access vs mapped on registration). The test setups are as following:
     
      Thin Dom0 (Linux with intiramfs) with DomD running rich Yocto Linux. In
      DomD 3d benchmark numbers are compared. The benchmark is GlMark2. GLMark2
      is ran with different resolutions in order to emit different irq load, 
      where 320x240 emits high IRQ load, but 1920x1080 emits low irq load.
      Separately tested baking DomD benchmark run with primitive Dom0 CPU burn
      (dd), in order to stimulate VCPU(dX)->VCPU(dY) switches rather than
      VCPU(dX)->idle->VCPU(dX).
      with following results:

                            mapped               mapped
                            on access            on init
      GLMark2 320x240       2852                 2877          +0.8%
          +Dom0 CPUBurn     2088                 2094          +0.2%
      GLMark2 800x600       2368                 2375          +0.3%
          +Dom0 CPUBurn     1868                 1921          +2.8%
      GLMark2 1920x1080     931                  931            0%
          +Dom0 CPUBurn     892                  894           +0.2%

      Please note that "mapped on access" means using the old runstate
      registering interface. And runstate update in this case still often fails
      to map runstate area like [5], despite the fact that our Linux kernel
      does not have KPTI enabled. So runstate area update, in this case, is
      really shortened.


      Also it was checked IRQ latency difference using TBM in a setup similar to
      [5]. Please note that the IRQ rate is one in 30 seconds, and only
      VCPU->idle->VCPU use-case is considered. With following results (in ns,
      the timer granularity 120ns):

      mapped on access:
          max=9960 warm_max=8640 min=7200 avg=7626
      mapped on init:
          max=9480 warm_max=8400 min=7080 avg=7341

      Unfortunately there are no consitent results yet from profiling using
      Lauterbach PowerTrace. Still in communication with the tracer vendor in
      order to setup the proper configuration.



[1] https://lists.xenproject.org/archives/html/xen-devel/2019-02/msg00416.html
[2] https://github.com/aanisov/linux/commit/ba34d2780f57ea43f81810cd695aace7b55c0f29
[3] https://lists.xenproject.org/archives/html/xen-devel/2019-03/msg00936.html
[4] https://lists.xenproject.org/archives/html/xen-devel/2019-03/msg00934.html
[5] https://lists.xenproject.org/archives/html/xen-devel/2019-01/msg02369.html
[6] https://lists.xenproject.org/archives/html/xen-devel/2018-12/msg02297.html


Andrii Anisov (1):
  xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall

 xen/arch/arm/domain.c        |  58 ++++++++++++++++++---
 xen/arch/x86/domain.c        |  99 ++++++++++++++++++++++++++++++++---
 xen/arch/x86/x86_64/domain.c |  16 +++++-
 xen/common/domain.c          | 121 +++++++++++++++++++++++++++++++++++++++----
 xen/include/public/vcpu.h    |  15 ++++++
 xen/include/xen/sched.h      |  28 +++++++---
 6 files changed, 306 insertions(+), 31 deletions(-)

-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-24 18:12 [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
  2019-05-24 18:12 ` [Xen-devel] " Andrii Anisov
  2019-05-24 18:12 ` [PATCH v3] Introduce runstate area registration with phys address Andrii Anisov
@ 2019-05-24 18:12 ` Andrii Anisov
  2019-05-24 18:12   ` [Xen-devel] " Andrii Anisov
  2019-06-07 14:23   ` Jan Beulich
  2019-05-24 18:12 ` [PATCH RFC 1] [DO NOT APPLY] " Andrii Anisov
  2019-05-28  8:59 ` [PATCH RFC 2] " Julien Grall
  4 siblings, 2 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-24 18:12 UTC (permalink / raw)
  To: xen-devel
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, Jan Beulich, Wei Liu, Roger Pau Monné

From: Andrii Anisov <andrii_anisov@epam.com>

Existing interface to register runstate are with its virtual address
is prone to issues which became more obvious with KPTI enablement in
guests. The nature of those issues is the fact that the guest could
be interrupted by the hypervisor at any time, and there is no guarantee
to have the registered virtual address translated with the currently
available guest's page tables. Before the KPTI such a situation was
possible in case the guest is caught in the middle of PT processing
(e.g. superpage shattering). With the KPTI this happens also when the
guest runs userspace, so has a pretty high probability.
So it was agreed to register runstate with the guest's physical address
so that its mapping is permanent from the hypervisor point of view. [1]

The hypercall employs the same vcpu_register_runstate_memory_area
structure for the interface, but requires a registered area to not
cross a page boundary.

[1] https://lists.xenproject.org/archives/html/xen-devel/2019-02/msg00416.html

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/domain.c        |  58 ++++++++++++++++++---
 xen/arch/x86/domain.c        |  99 ++++++++++++++++++++++++++++++++---
 xen/arch/x86/x86_64/domain.c |  16 +++++-
 xen/common/domain.c          | 121 +++++++++++++++++++++++++++++++++++++++----
 xen/include/public/vcpu.h    |  15 ++++++
 xen/include/xen/sched.h      |  28 +++++++---
 6 files changed, 306 insertions(+), 31 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index ff330b3..ecedf1c 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
     virt_timer_restore(n);
 }
 
-/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+static void update_runstate_by_gvaddr(struct vcpu *v)
 {
     void __user *guest_handle = NULL;
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
 
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
+        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
         guest_handle--;
         v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
         smp_wmb();
     }
 
-    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
 
     if ( guest_handle )
     {
@@ -303,6 +301,53 @@ static void update_runstate_area(struct vcpu *v)
     }
 }
 
+static void update_runstate_by_gpaddr(struct vcpu *v)
+{
+    struct vcpu_runstate_info *runstate =
+            (struct vcpu_runstate_info *)v->runstate_guest.phys;
+
+    ASSERT(runstate != NULL);
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+    }
+
+    memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+    }
+}
+
+/* Update per-VCPU guest runstate shared memory area (if registered). */
+static void update_runstate_area(struct vcpu *v)
+{
+    if ( xchg(&v->runstate_in_use, 1) )
+        return;
+
+    switch ( v->runstate_guest_type )
+    {
+    case RUNSTATE_NONE:
+        break;
+
+    case RUNSTATE_VADDR:
+        update_runstate_by_gvaddr(v);
+        break;
+
+    case RUNSTATE_PADDR:
+        update_runstate_by_gpaddr(v);
+        break;
+    }
+
+    xchg(&v->runstate_in_use, 0);
+}
+
 static void schedule_tail(struct vcpu *prev)
 {
     ASSERT(prev != current);
@@ -998,6 +1043,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) a
     {
         case VCPUOP_register_vcpu_info:
         case VCPUOP_register_runstate_memory_area:
+        case VCPUOP_register_runstate_phys_memory_area:
             return do_vcpu_op(cmd, vcpuid, arg);
         default:
             return -EINVAL;
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index ac960dd..fe71776 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1566,22 +1566,21 @@ void paravirt_ctxt_switch_to(struct vcpu *v)
 }
 
 /* Update per-VCPU guest runstate shared memory area (if registered). */
-bool update_runstate_area(struct vcpu *v)
+static bool update_runstate_by_gvaddr(struct vcpu *v)
 {
     bool rc;
     struct guest_memory_policy policy = { .nested_guest_mode = false };
     void __user *guest_handle = NULL;
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return true;
+    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
 
     update_guest_memory_policy(v, &policy);
 
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
         guest_handle = has_32bit_shinfo(v->domain)
-            ? &v->runstate_guest.compat.p->state_entry_time + 1
-            : &v->runstate_guest.native.p->state_entry_time + 1;
+            ? &v->runstate_guest.virt.compat.p->state_entry_time + 1
+            : &v->runstate_guest.virt.native.p->state_entry_time + 1;
         guest_handle--;
         v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -1594,11 +1593,11 @@ bool update_runstate_area(struct vcpu *v)
         struct compat_vcpu_runstate_info info;
 
         XLAT_vcpu_runstate_info(&info, &v->runstate);
-        __copy_to_guest(v->runstate_guest.compat, &info, 1);
+        __copy_to_guest(v->runstate_guest.virt.compat, &info, 1);
         rc = true;
     }
     else
-        rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
+        rc = __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1) !=
              sizeof(v->runstate);
 
     if ( guest_handle )
@@ -1614,6 +1613,92 @@ bool update_runstate_area(struct vcpu *v)
     return rc;
 }
 
+static bool update_runstate_by_gpaddr_native(struct vcpu *v)
+{
+    struct vcpu_runstate_info *runstate =
+            (struct vcpu_runstate_info *)v->runstate_guest.phys;
+
+    ASSERT(runstate != NULL);
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+    }
+
+    memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+    }
+
+    return true;
+}
+
+static bool update_runstate_by_gpaddr_compat(struct vcpu *v)
+{
+    struct compat_vcpu_runstate_info *runstate =
+            (struct compat_vcpu_runstate_info *)v->runstate_guest.phys;
+
+    ASSERT(runstate != NULL);
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+    }
+
+    {
+        struct compat_vcpu_runstate_info info;
+        XLAT_vcpu_runstate_info(&info, &v->runstate);
+        memcpy(v->runstate_guest.phys, &info, sizeof(info));
+    }
+    else
+        memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+    }
+
+    return true;
+}
+
+bool update_runstate_area(struct vcpu *v)
+{
+    bool rc = true;
+
+    if ( xchg(&v->runstate_in_use, 1) )
+        return rc;
+
+    switch ( v->runstate_guest_type )
+    {
+    case RUNSTATE_NONE:
+        break;
+
+    case RUNSTATE_VADDR:
+        rc = update_runstate_by_gvaddr(v);
+        break;
+
+    case RUNSTATE_PADDR:
+        if ( has_32bit_shinfo(v->domain) )
+            rc = update_runstate_by_gpaddr_compat(v);
+        else
+            rc = update_runstate_by_gpaddr_native(v);
+        break;
+    }
+
+    xchg(&v->runstate_in_use, 0);
+    return rc;
+}
+
 static void _update_runstate_area(struct vcpu *v)
 {
     if ( !update_runstate_area(v) && is_pv_vcpu(v) &&
diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c
index c46dccc..85d0072 100644
--- a/xen/arch/x86/x86_64/domain.c
+++ b/xen/arch/x86/x86_64/domain.c
@@ -12,6 +12,8 @@
 CHECK_vcpu_get_physid;
 #undef xen_vcpu_get_physid
 
+extern void discard_runstate_area(struct vcpu *v);
+
 int
 arch_compat_vcpu_op(
     int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
@@ -35,8 +37,16 @@ arch_compat_vcpu_op(
              !compat_handle_okay(area.addr.h, 1) )
             break;
 
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
         rc = 0;
-        guest_from_compat_handle(v->runstate_guest.compat, area.addr.h);
+
+        guest_from_compat_handle(v->runstate_guest.virt.compat,
+                                 area.addr.h);
+
+        v->runstate_guest_type = RUNSTATE_VADDR;
 
         if ( v == current )
         {
@@ -49,7 +59,9 @@ arch_compat_vcpu_op(
             vcpu_runstate_get(v, &runstate);
             XLAT_vcpu_runstate_info(&info, &runstate);
         }
-        __copy_to_guest(v->runstate_guest.compat, &info, 1);
+        __copy_to_guest(v->runstate_guest.virt.compat, &info, 1);
+
+        xchg(&v->runstate_in_use, 0);
 
         break;
     }
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 90c6607..d276b87 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -698,6 +698,74 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
     return 0;
 }
 
+static void unmap_runstate_area(struct vcpu *v)
+{
+    mfn_t mfn;
+
+    if ( ! v->runstate_guest.phys )
+        return;
+
+    mfn = domain_page_map_to_mfn(v->runstate_guest.phys);
+
+    unmap_domain_page_global((void *)
+                             ((unsigned long)v->runstate_guest.phys &
+                              PAGE_MASK));
+
+    v->runstate_guest.phys = NULL;
+    put_page_and_type(mfn_to_page(mfn));
+}
+
+static int map_runstate_area(struct vcpu *v,
+                      struct vcpu_register_runstate_memory_area *area)
+{
+    unsigned long offset = area->addr.p & ~PAGE_MASK;
+    gfn_t gfn = gaddr_to_gfn(area->addr.p);
+    struct domain *d = v->domain;
+    void *mapping;
+    struct page_info *page;
+    size_t size = sizeof(struct vcpu_runstate_info);
+
+    if ( offset > (PAGE_SIZE - size) )
+        return -EINVAL;
+
+    page = get_page_from_gfn(d, gfn_x(gfn), NULL, P2M_ALLOC);
+    if ( !page )
+        return -EINVAL;
+
+    if ( !get_page_type(page, PGT_writable_page) )
+    {
+        put_page(page);
+        return -EINVAL;
+    }
+
+    mapping = __map_domain_page_global(page);
+
+    if ( mapping == NULL )
+    {
+        put_page_and_type(page);
+        return -ENOMEM;
+    }
+
+    v->runstate_guest.phys = mapping + offset;
+
+    return 0;
+}
+
+void discard_runstate_area(struct vcpu *v)
+{
+    if ( v->runstate_guest_type == RUNSTATE_PADDR )
+        unmap_runstate_area(v);
+
+    v->runstate_guest_type = RUNSTATE_NONE;
+}
+
+static void discard_runstate_area_locked(struct vcpu *v)
+{
+    while ( xchg(&v->runstate_in_use, 1) );
+    discard_runstate_area(v);
+    xchg(&v->runstate_in_use, 0);
+}
+
 int domain_kill(struct domain *d)
 {
     int rc = 0;
@@ -734,7 +802,10 @@ int domain_kill(struct domain *d)
         if ( cpupool_move_domain(d, cpupool0) )
             return -ERESTART;
         for_each_vcpu ( d, v )
+        {
+            discard_runstate_area_locked(v);
             unmap_vcpu_info(v);
+        }
         d->is_dying = DOMDYING_dead;
         /* Mem event cleanup has to go here because the rings 
          * have to be put before we call put_domain. */
@@ -1188,7 +1259,7 @@ int domain_soft_reset(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        set_xen_guest_handle(runstate_guest(v), NULL);
+        discard_runstate_area_locked(v);
         unmap_vcpu_info(v);
     }
 
@@ -1518,18 +1589,50 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
             break;
 
         rc = 0;
-        runstate_guest(v) = area.addr.h;
 
-        if ( v == current )
-        {
-            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
-        }
-        else
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
+        if ( !guest_handle_is_null(runstate_guest_virt(v)) )
         {
-            vcpu_runstate_get(v, &runstate);
-            __copy_to_guest(runstate_guest(v), &runstate, 1);
+            runstate_guest_virt(v) = area.addr.h;
+            v->runstate_guest_type = RUNSTATE_VADDR;
+
+            if ( v == current )
+            {
+                __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
+            }
+            else
+            {
+                vcpu_runstate_get(v, &runstate);
+                __copy_to_guest(runstate_guest_virt(v), &runstate, 1);
+            }
         }
 
+        xchg(&v->runstate_in_use, 0);
+
+        break;
+    }
+
+    case VCPUOP_register_runstate_phys_memory_area:
+    {
+        struct vcpu_register_runstate_memory_area area;
+
+        rc = -EFAULT;
+        if ( copy_from_guest(&area, arg, 1) )
+             break;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
+        rc = map_runstate_area(v, &area);
+        if ( !rc )
+            v->runstate_guest_type = RUNSTATE_PADDR;
+
+        xchg(&v->runstate_in_use, 0);
+
         break;
     }
 
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index 3623af9..d7da4a3 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -235,6 +235,21 @@ struct vcpu_register_time_memory_area {
 typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
 
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address must be guest's physical address.
+ *  2. The registered runstate area should not cross page boundary.
+ *  3. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_phys_memory_area 14
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 2201fac..6c8de8f 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -163,17 +163,31 @@ struct vcpu
     void            *sched_priv;    /* scheduler-specific data */
 
     struct vcpu_runstate_info runstate;
+
+    enum {
+        RUNSTATE_NONE = 0,
+        RUNSTATE_PADDR = 1,
+        RUNSTATE_VADDR = 2,
+    } runstate_guest_type;
+
+    unsigned long runstate_in_use;
+
+    union
+    {
 #ifndef CONFIG_COMPAT
-# define runstate_guest(v) ((v)->runstate_guest)
-    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt)
+           XEN_GUEST_HANDLE(vcpu_runstate_info_t) virt; /* guest address */
 #else
-# define runstate_guest(v) ((v)->runstate_guest.native)
-    union {
-        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
-        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
-    } runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt.native)
+           union {
+               XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
+               XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
+           } virt; /* guest address */
 #endif
 
+        void*   phys;
+    } runstate_guest;
+
     /* last time when vCPU is scheduled out */
     uint64_t last_run_time;
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-24 18:12 ` [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
@ 2019-05-24 18:12   ` " Andrii Anisov
  2019-06-07 14:23   ` Jan Beulich
  1 sibling, 0 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-24 18:12 UTC (permalink / raw)
  To: xen-devel
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, Jan Beulich, Wei Liu, Roger Pau Monné

From: Andrii Anisov <andrii_anisov@epam.com>

Existing interface to register runstate are with its virtual address
is prone to issues which became more obvious with KPTI enablement in
guests. The nature of those issues is the fact that the guest could
be interrupted by the hypervisor at any time, and there is no guarantee
to have the registered virtual address translated with the currently
available guest's page tables. Before the KPTI such a situation was
possible in case the guest is caught in the middle of PT processing
(e.g. superpage shattering). With the KPTI this happens also when the
guest runs userspace, so has a pretty high probability.
So it was agreed to register runstate with the guest's physical address
so that its mapping is permanent from the hypervisor point of view. [1]

The hypercall employs the same vcpu_register_runstate_memory_area
structure for the interface, but requires a registered area to not
cross a page boundary.

[1] https://lists.xenproject.org/archives/html/xen-devel/2019-02/msg00416.html

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/domain.c        |  58 ++++++++++++++++++---
 xen/arch/x86/domain.c        |  99 ++++++++++++++++++++++++++++++++---
 xen/arch/x86/x86_64/domain.c |  16 +++++-
 xen/common/domain.c          | 121 +++++++++++++++++++++++++++++++++++++++----
 xen/include/public/vcpu.h    |  15 ++++++
 xen/include/xen/sched.h      |  28 +++++++---
 6 files changed, 306 insertions(+), 31 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index ff330b3..ecedf1c 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
     virt_timer_restore(n);
 }
 
-/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+static void update_runstate_by_gvaddr(struct vcpu *v)
 {
     void __user *guest_handle = NULL;
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
 
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
+        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
         guest_handle--;
         v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
         smp_wmb();
     }
 
-    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
 
     if ( guest_handle )
     {
@@ -303,6 +301,53 @@ static void update_runstate_area(struct vcpu *v)
     }
 }
 
+static void update_runstate_by_gpaddr(struct vcpu *v)
+{
+    struct vcpu_runstate_info *runstate =
+            (struct vcpu_runstate_info *)v->runstate_guest.phys;
+
+    ASSERT(runstate != NULL);
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+    }
+
+    memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+    }
+}
+
+/* Update per-VCPU guest runstate shared memory area (if registered). */
+static void update_runstate_area(struct vcpu *v)
+{
+    if ( xchg(&v->runstate_in_use, 1) )
+        return;
+
+    switch ( v->runstate_guest_type )
+    {
+    case RUNSTATE_NONE:
+        break;
+
+    case RUNSTATE_VADDR:
+        update_runstate_by_gvaddr(v);
+        break;
+
+    case RUNSTATE_PADDR:
+        update_runstate_by_gpaddr(v);
+        break;
+    }
+
+    xchg(&v->runstate_in_use, 0);
+}
+
 static void schedule_tail(struct vcpu *prev)
 {
     ASSERT(prev != current);
@@ -998,6 +1043,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) a
     {
         case VCPUOP_register_vcpu_info:
         case VCPUOP_register_runstate_memory_area:
+        case VCPUOP_register_runstate_phys_memory_area:
             return do_vcpu_op(cmd, vcpuid, arg);
         default:
             return -EINVAL;
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index ac960dd..fe71776 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1566,22 +1566,21 @@ void paravirt_ctxt_switch_to(struct vcpu *v)
 }
 
 /* Update per-VCPU guest runstate shared memory area (if registered). */
-bool update_runstate_area(struct vcpu *v)
+static bool update_runstate_by_gvaddr(struct vcpu *v)
 {
     bool rc;
     struct guest_memory_policy policy = { .nested_guest_mode = false };
     void __user *guest_handle = NULL;
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return true;
+    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
 
     update_guest_memory_policy(v, &policy);
 
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
         guest_handle = has_32bit_shinfo(v->domain)
-            ? &v->runstate_guest.compat.p->state_entry_time + 1
-            : &v->runstate_guest.native.p->state_entry_time + 1;
+            ? &v->runstate_guest.virt.compat.p->state_entry_time + 1
+            : &v->runstate_guest.virt.native.p->state_entry_time + 1;
         guest_handle--;
         v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -1594,11 +1593,11 @@ bool update_runstate_area(struct vcpu *v)
         struct compat_vcpu_runstate_info info;
 
         XLAT_vcpu_runstate_info(&info, &v->runstate);
-        __copy_to_guest(v->runstate_guest.compat, &info, 1);
+        __copy_to_guest(v->runstate_guest.virt.compat, &info, 1);
         rc = true;
     }
     else
-        rc = __copy_to_guest(runstate_guest(v), &v->runstate, 1) !=
+        rc = __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1) !=
              sizeof(v->runstate);
 
     if ( guest_handle )
@@ -1614,6 +1613,92 @@ bool update_runstate_area(struct vcpu *v)
     return rc;
 }
 
+static bool update_runstate_by_gpaddr_native(struct vcpu *v)
+{
+    struct vcpu_runstate_info *runstate =
+            (struct vcpu_runstate_info *)v->runstate_guest.phys;
+
+    ASSERT(runstate != NULL);
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+    }
+
+    memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+    }
+
+    return true;
+}
+
+static bool update_runstate_by_gpaddr_compat(struct vcpu *v)
+{
+    struct compat_vcpu_runstate_info *runstate =
+            (struct compat_vcpu_runstate_info *)v->runstate_guest.phys;
+
+    ASSERT(runstate != NULL);
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+    }
+
+    {
+        struct compat_vcpu_runstate_info info;
+        XLAT_vcpu_runstate_info(&info, &v->runstate);
+        memcpy(v->runstate_guest.phys, &info, sizeof(info));
+    }
+    else
+        memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+    }
+
+    return true;
+}
+
+bool update_runstate_area(struct vcpu *v)
+{
+    bool rc = true;
+
+    if ( xchg(&v->runstate_in_use, 1) )
+        return rc;
+
+    switch ( v->runstate_guest_type )
+    {
+    case RUNSTATE_NONE:
+        break;
+
+    case RUNSTATE_VADDR:
+        rc = update_runstate_by_gvaddr(v);
+        break;
+
+    case RUNSTATE_PADDR:
+        if ( has_32bit_shinfo(v->domain) )
+            rc = update_runstate_by_gpaddr_compat(v);
+        else
+            rc = update_runstate_by_gpaddr_native(v);
+        break;
+    }
+
+    xchg(&v->runstate_in_use, 0);
+    return rc;
+}
+
 static void _update_runstate_area(struct vcpu *v)
 {
     if ( !update_runstate_area(v) && is_pv_vcpu(v) &&
diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c
index c46dccc..85d0072 100644
--- a/xen/arch/x86/x86_64/domain.c
+++ b/xen/arch/x86/x86_64/domain.c
@@ -12,6 +12,8 @@
 CHECK_vcpu_get_physid;
 #undef xen_vcpu_get_physid
 
+extern void discard_runstate_area(struct vcpu *v);
+
 int
 arch_compat_vcpu_op(
     int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
@@ -35,8 +37,16 @@ arch_compat_vcpu_op(
              !compat_handle_okay(area.addr.h, 1) )
             break;
 
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
         rc = 0;
-        guest_from_compat_handle(v->runstate_guest.compat, area.addr.h);
+
+        guest_from_compat_handle(v->runstate_guest.virt.compat,
+                                 area.addr.h);
+
+        v->runstate_guest_type = RUNSTATE_VADDR;
 
         if ( v == current )
         {
@@ -49,7 +59,9 @@ arch_compat_vcpu_op(
             vcpu_runstate_get(v, &runstate);
             XLAT_vcpu_runstate_info(&info, &runstate);
         }
-        __copy_to_guest(v->runstate_guest.compat, &info, 1);
+        __copy_to_guest(v->runstate_guest.virt.compat, &info, 1);
+
+        xchg(&v->runstate_in_use, 0);
 
         break;
     }
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 90c6607..d276b87 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -698,6 +698,74 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
     return 0;
 }
 
+static void unmap_runstate_area(struct vcpu *v)
+{
+    mfn_t mfn;
+
+    if ( ! v->runstate_guest.phys )
+        return;
+
+    mfn = domain_page_map_to_mfn(v->runstate_guest.phys);
+
+    unmap_domain_page_global((void *)
+                             ((unsigned long)v->runstate_guest.phys &
+                              PAGE_MASK));
+
+    v->runstate_guest.phys = NULL;
+    put_page_and_type(mfn_to_page(mfn));
+}
+
+static int map_runstate_area(struct vcpu *v,
+                      struct vcpu_register_runstate_memory_area *area)
+{
+    unsigned long offset = area->addr.p & ~PAGE_MASK;
+    gfn_t gfn = gaddr_to_gfn(area->addr.p);
+    struct domain *d = v->domain;
+    void *mapping;
+    struct page_info *page;
+    size_t size = sizeof(struct vcpu_runstate_info);
+
+    if ( offset > (PAGE_SIZE - size) )
+        return -EINVAL;
+
+    page = get_page_from_gfn(d, gfn_x(gfn), NULL, P2M_ALLOC);
+    if ( !page )
+        return -EINVAL;
+
+    if ( !get_page_type(page, PGT_writable_page) )
+    {
+        put_page(page);
+        return -EINVAL;
+    }
+
+    mapping = __map_domain_page_global(page);
+
+    if ( mapping == NULL )
+    {
+        put_page_and_type(page);
+        return -ENOMEM;
+    }
+
+    v->runstate_guest.phys = mapping + offset;
+
+    return 0;
+}
+
+void discard_runstate_area(struct vcpu *v)
+{
+    if ( v->runstate_guest_type == RUNSTATE_PADDR )
+        unmap_runstate_area(v);
+
+    v->runstate_guest_type = RUNSTATE_NONE;
+}
+
+static void discard_runstate_area_locked(struct vcpu *v)
+{
+    while ( xchg(&v->runstate_in_use, 1) );
+    discard_runstate_area(v);
+    xchg(&v->runstate_in_use, 0);
+}
+
 int domain_kill(struct domain *d)
 {
     int rc = 0;
@@ -734,7 +802,10 @@ int domain_kill(struct domain *d)
         if ( cpupool_move_domain(d, cpupool0) )
             return -ERESTART;
         for_each_vcpu ( d, v )
+        {
+            discard_runstate_area_locked(v);
             unmap_vcpu_info(v);
+        }
         d->is_dying = DOMDYING_dead;
         /* Mem event cleanup has to go here because the rings 
          * have to be put before we call put_domain. */
@@ -1188,7 +1259,7 @@ int domain_soft_reset(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        set_xen_guest_handle(runstate_guest(v), NULL);
+        discard_runstate_area_locked(v);
         unmap_vcpu_info(v);
     }
 
@@ -1518,18 +1589,50 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
             break;
 
         rc = 0;
-        runstate_guest(v) = area.addr.h;
 
-        if ( v == current )
-        {
-            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
-        }
-        else
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
+        if ( !guest_handle_is_null(runstate_guest_virt(v)) )
         {
-            vcpu_runstate_get(v, &runstate);
-            __copy_to_guest(runstate_guest(v), &runstate, 1);
+            runstate_guest_virt(v) = area.addr.h;
+            v->runstate_guest_type = RUNSTATE_VADDR;
+
+            if ( v == current )
+            {
+                __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
+            }
+            else
+            {
+                vcpu_runstate_get(v, &runstate);
+                __copy_to_guest(runstate_guest_virt(v), &runstate, 1);
+            }
         }
 
+        xchg(&v->runstate_in_use, 0);
+
+        break;
+    }
+
+    case VCPUOP_register_runstate_phys_memory_area:
+    {
+        struct vcpu_register_runstate_memory_area area;
+
+        rc = -EFAULT;
+        if ( copy_from_guest(&area, arg, 1) )
+             break;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
+        rc = map_runstate_area(v, &area);
+        if ( !rc )
+            v->runstate_guest_type = RUNSTATE_PADDR;
+
+        xchg(&v->runstate_in_use, 0);
+
         break;
     }
 
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index 3623af9..d7da4a3 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -235,6 +235,21 @@ struct vcpu_register_time_memory_area {
 typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
 
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address must be guest's physical address.
+ *  2. The registered runstate area should not cross page boundary.
+ *  3. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_phys_memory_area 14
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 2201fac..6c8de8f 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -163,17 +163,31 @@ struct vcpu
     void            *sched_priv;    /* scheduler-specific data */
 
     struct vcpu_runstate_info runstate;
+
+    enum {
+        RUNSTATE_NONE = 0,
+        RUNSTATE_PADDR = 1,
+        RUNSTATE_VADDR = 2,
+    } runstate_guest_type;
+
+    unsigned long runstate_in_use;
+
+    union
+    {
 #ifndef CONFIG_COMPAT
-# define runstate_guest(v) ((v)->runstate_guest)
-    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt)
+           XEN_GUEST_HANDLE(vcpu_runstate_info_t) virt; /* guest address */
 #else
-# define runstate_guest(v) ((v)->runstate_guest.native)
-    union {
-        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
-        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
-    } runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt.native)
+           union {
+               XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
+               XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
+           } virt; /* guest address */
 #endif
 
+        void*   phys;
+    } runstate_guest;
+
     /* last time when vCPU is scheduled out */
     uint64_t last_run_time;
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH RFC 1] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-24 18:12 [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
                   ` (2 preceding siblings ...)
  2019-05-24 18:12 ` [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
@ 2019-05-24 18:12 ` " Andrii Anisov
  2019-05-24 18:12   ` [Xen-devel] " Andrii Anisov
  2019-05-28  8:59 ` [PATCH RFC 2] " Julien Grall
  4 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-05-24 18:12 UTC (permalink / raw)
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, Jan Beulich, xen-devel, Wei Liu

From: Andrii Anisov <andrii_anisov@epam.com>

An RFC version of the runstate registration with phys address.
Runstate area access is implemented with mapping on each access, like
old interface did.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/domain.c     | 63 ++++++++++++++++++++++++++++++++++++++++++-----
 xen/common/domain.c       | 51 +++++++++++++++++++++++++++++++++++---
 xen/include/public/vcpu.h | 15 +++++++++++
 xen/include/xen/sched.h   | 28 +++++++++++++++------
 4 files changed, 140 insertions(+), 17 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index a9f7ff5..610957a 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
     virt_timer_restore(n);
 }
 
-/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+static void update_runstate_by_gvaddr(struct vcpu *v)
 {
     void __user *guest_handle = NULL;
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
 
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
+        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
         guest_handle--;
         v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
         smp_wmb();
     }
 
-    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
 
     if ( guest_handle )
     {
@@ -303,6 +301,58 @@ static void update_runstate_area(struct vcpu *v)
     }
 }
 
+extern int map_runstate_area(struct vcpu *v, struct vcpu_runstate_info **area);
+extern void unmap_runstate_area(struct vcpu_runstate_info *area);
+
+static void update_runstate_by_gpaddr(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    paddr_t gpaddr = 0;
+
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        gpaddr = v->runstate_guest.phys + offsetof(struct vcpu_runstate_info, state_entry_time) + sizeof(uint64_t) - 1;
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+        copy_to_guest_phys_flush_dcache (d, gpaddr,
+                                         (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
+        smp_wmb();
+    }
+
+    copy_to_guest_phys_flush_dcache (d, v->runstate_guest.phys, &v->runstate, sizeof(struct vcpu_runstate_info));
+
+    if ( gpaddr )
+    {
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        copy_to_guest_phys_flush_dcache (d, gpaddr,
+                                         (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
+    }
+}
+
+/* Update per-VCPU guest runstate shared memory area (if registered). */
+static void update_runstate_area(struct vcpu *v)
+{
+    if ( xchg(&v->runstate_in_use, 1) )
+        return;
+
+    switch ( v->runstate_guest_type )
+    {
+    case RUNSTATE_NONE:
+       break;
+
+    case RUNSTATE_VADDR:
+       update_runstate_by_gvaddr(v);
+       break;
+
+    case RUNSTATE_PADDR:
+       update_runstate_by_gpaddr(v);
+       break;
+    }
+
+    xchg(&v->runstate_in_use, 0);
+}
+
 static void schedule_tail(struct vcpu *prev)
 {
     ctxt_switch_from(prev);
@@ -998,6 +1048,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) a
     {
         case VCPUOP_register_vcpu_info:
         case VCPUOP_register_runstate_memory_area:
+        case VCPUOP_register_runstate_phys_memory_area:
             return do_vcpu_op(cmd, vcpuid, arg);
         default:
             return -EINVAL;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 32bca8d..b58d6dd 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -700,6 +700,18 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
     return 0;
 }
 
+static void discard_runstate_area(struct vcpu *v)
+{
+    v->runstate_guest_type = RUNSTATE_NONE;
+}
+
+static void discard_runstate_area_locked(struct vcpu *v)
+{
+    while ( xchg(&v->runstate_in_use, 1) );
+    discard_runstate_area(v);
+    xchg(&v->runstate_in_use, 0);
+}
+
 int domain_kill(struct domain *d)
 {
     int rc = 0;
@@ -738,7 +750,10 @@ int domain_kill(struct domain *d)
         if ( cpupool_move_domain(d, cpupool0) )
             return -ERESTART;
         for_each_vcpu ( d, v )
+        {
+            discard_runstate_area_locked(v);
             unmap_vcpu_info(v);
+        }
         d->is_dying = DOMDYING_dead;
         /* Mem event cleanup has to go here because the rings 
          * have to be put before we call put_domain. */
@@ -1192,7 +1207,7 @@ int domain_soft_reset(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        set_xen_guest_handle(runstate_guest(v), NULL);
+        discard_runstate_area_locked(v);
         unmap_vcpu_info(v);
     }
 
@@ -1520,18 +1535,46 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
             break;
 
         rc = 0;
-        runstate_guest(v) = area.addr.h;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
+        runstate_guest_virt(v) = area.addr.h;
+        v->runstate_guest_type = RUNSTATE_VADDR;
 
         if ( v == current )
         {
-            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+            __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
         }
         else
         {
             vcpu_runstate_get(v, &runstate);
-            __copy_to_guest(runstate_guest(v), &runstate, 1);
+            __copy_to_guest(runstate_guest_virt(v), &runstate, 1);
         }
 
+        xchg(&v->runstate_in_use, 0);
+
+        break;
+    }
+
+    case VCPUOP_register_runstate_phys_memory_area:
+    {
+        struct vcpu_register_runstate_memory_area area;
+
+        rc = -EFAULT;
+        if ( copy_from_guest(&area, arg, 1) )
+             break;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+        v->runstate_guest.phys = area.addr.p;
+        v->runstate_guest_type = RUNSTATE_PADDR;
+
+        xchg(&v->runstate_in_use, 0);
+        rc = 0;
+
         break;
     }
 
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index 3623af9..d7da4a3 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -235,6 +235,21 @@ struct vcpu_register_time_memory_area {
 typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
 
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address must be guest's physical address.
+ *  2. The registered runstate area should not cross page boundary.
+ *  3. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_phys_memory_area 14
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index edee52d..8ac597b 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -163,17 +163,31 @@ struct vcpu
     void            *sched_priv;    /* scheduler-specific data */
 
     struct vcpu_runstate_info runstate;
+
+    enum {
+        RUNSTATE_NONE = 0,
+        RUNSTATE_PADDR = 1,
+        RUNSTATE_VADDR = 2,
+    } runstate_guest_type;
+
+    unsigned long runstate_in_use;
+
+    union
+    {
 #ifndef CONFIG_COMPAT
-# define runstate_guest(v) ((v)->runstate_guest)
-    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt)
+           XEN_GUEST_HANDLE(vcpu_runstate_info_t) virt; /* guest address */
 #else
-# define runstate_guest(v) ((v)->runstate_guest.native)
-    union {
-        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
-        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
-    } runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt.native)
+           union {
+               XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
+               XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
+           } virt; /* guest address */
 #endif
 
+        paddr_t phys;
+    } runstate_guest;
+
     /* last time when vCPU is scheduled out */
     uint64_t last_run_time;
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [Xen-devel] [PATCH RFC 1] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-24 18:12 ` [PATCH RFC 1] [DO NOT APPLY] " Andrii Anisov
@ 2019-05-24 18:12   ` " Andrii Anisov
  0 siblings, 0 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-24 18:12 UTC (permalink / raw)
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, Jan Beulich, xen-devel, Wei Liu

From: Andrii Anisov <andrii_anisov@epam.com>

An RFC version of the runstate registration with phys address.
Runstate area access is implemented with mapping on each access, like
old interface did.

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
---
 xen/arch/arm/domain.c     | 63 ++++++++++++++++++++++++++++++++++++++++++-----
 xen/common/domain.c       | 51 +++++++++++++++++++++++++++++++++++---
 xen/include/public/vcpu.h | 15 +++++++++++
 xen/include/xen/sched.h   | 28 +++++++++++++++------
 4 files changed, 140 insertions(+), 17 deletions(-)

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index a9f7ff5..610957a 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
     virt_timer_restore(n);
 }
 
-/* Update per-VCPU guest runstate shared memory area (if registered). */
-static void update_runstate_area(struct vcpu *v)
+static void update_runstate_by_gvaddr(struct vcpu *v)
 {
     void __user *guest_handle = NULL;
 
-    if ( guest_handle_is_null(runstate_guest(v)) )
-        return;
+    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
 
     if ( VM_ASSIST(v->domain, runstate_update_flag) )
     {
-        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
+        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
         guest_handle--;
         v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
         __raw_copy_to_guest(guest_handle,
@@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
         smp_wmb();
     }
 
-    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
 
     if ( guest_handle )
     {
@@ -303,6 +301,58 @@ static void update_runstate_area(struct vcpu *v)
     }
 }
 
+extern int map_runstate_area(struct vcpu *v, struct vcpu_runstate_info **area);
+extern void unmap_runstate_area(struct vcpu_runstate_info *area);
+
+static void update_runstate_by_gpaddr(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    paddr_t gpaddr = 0;
+
+
+    if ( VM_ASSIST(v->domain, runstate_update_flag) )
+    {
+        gpaddr = v->runstate_guest.phys + offsetof(struct vcpu_runstate_info, state_entry_time) + sizeof(uint64_t) - 1;
+        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
+        copy_to_guest_phys_flush_dcache (d, gpaddr,
+                                         (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
+        smp_wmb();
+    }
+
+    copy_to_guest_phys_flush_dcache (d, v->runstate_guest.phys, &v->runstate, sizeof(struct vcpu_runstate_info));
+
+    if ( gpaddr )
+    {
+        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+        smp_wmb();
+        copy_to_guest_phys_flush_dcache (d, gpaddr,
+                                         (void *)(&v->runstate.state_entry_time + 1) - 1, 1);
+    }
+}
+
+/* Update per-VCPU guest runstate shared memory area (if registered). */
+static void update_runstate_area(struct vcpu *v)
+{
+    if ( xchg(&v->runstate_in_use, 1) )
+        return;
+
+    switch ( v->runstate_guest_type )
+    {
+    case RUNSTATE_NONE:
+       break;
+
+    case RUNSTATE_VADDR:
+       update_runstate_by_gvaddr(v);
+       break;
+
+    case RUNSTATE_PADDR:
+       update_runstate_by_gpaddr(v);
+       break;
+    }
+
+    xchg(&v->runstate_in_use, 0);
+}
+
 static void schedule_tail(struct vcpu *prev)
 {
     ctxt_switch_from(prev);
@@ -998,6 +1048,7 @@ long do_arm_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) a
     {
         case VCPUOP_register_vcpu_info:
         case VCPUOP_register_runstate_memory_area:
+        case VCPUOP_register_runstate_phys_memory_area:
             return do_vcpu_op(cmd, vcpuid, arg);
         default:
             return -EINVAL;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 32bca8d..b58d6dd 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -700,6 +700,18 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
     return 0;
 }
 
+static void discard_runstate_area(struct vcpu *v)
+{
+    v->runstate_guest_type = RUNSTATE_NONE;
+}
+
+static void discard_runstate_area_locked(struct vcpu *v)
+{
+    while ( xchg(&v->runstate_in_use, 1) );
+    discard_runstate_area(v);
+    xchg(&v->runstate_in_use, 0);
+}
+
 int domain_kill(struct domain *d)
 {
     int rc = 0;
@@ -738,7 +750,10 @@ int domain_kill(struct domain *d)
         if ( cpupool_move_domain(d, cpupool0) )
             return -ERESTART;
         for_each_vcpu ( d, v )
+        {
+            discard_runstate_area_locked(v);
             unmap_vcpu_info(v);
+        }
         d->is_dying = DOMDYING_dead;
         /* Mem event cleanup has to go here because the rings 
          * have to be put before we call put_domain. */
@@ -1192,7 +1207,7 @@ int domain_soft_reset(struct domain *d)
 
     for_each_vcpu ( d, v )
     {
-        set_xen_guest_handle(runstate_guest(v), NULL);
+        discard_runstate_area_locked(v);
         unmap_vcpu_info(v);
     }
 
@@ -1520,18 +1535,46 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
             break;
 
         rc = 0;
-        runstate_guest(v) = area.addr.h;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+
+        runstate_guest_virt(v) = area.addr.h;
+        v->runstate_guest_type = RUNSTATE_VADDR;
 
         if ( v == current )
         {
-            __copy_to_guest(runstate_guest(v), &v->runstate, 1);
+            __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
         }
         else
         {
             vcpu_runstate_get(v, &runstate);
-            __copy_to_guest(runstate_guest(v), &runstate, 1);
+            __copy_to_guest(runstate_guest_virt(v), &runstate, 1);
         }
 
+        xchg(&v->runstate_in_use, 0);
+
+        break;
+    }
+
+    case VCPUOP_register_runstate_phys_memory_area:
+    {
+        struct vcpu_register_runstate_memory_area area;
+
+        rc = -EFAULT;
+        if ( copy_from_guest(&area, arg, 1) )
+             break;
+
+        while( xchg(&v->runstate_in_use, 1) == 0);
+
+        discard_runstate_area(v);
+        v->runstate_guest.phys = area.addr.p;
+        v->runstate_guest_type = RUNSTATE_PADDR;
+
+        xchg(&v->runstate_in_use, 0);
+        rc = 0;
+
         break;
     }
 
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index 3623af9..d7da4a3 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -235,6 +235,21 @@ struct vcpu_register_time_memory_area {
 typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
 
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address must be guest's physical address.
+ *  2. The registered runstate area should not cross page boundary.
+ *  3. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_phys_memory_area 14
+
 #endif /* __XEN_PUBLIC_VCPU_H__ */
 
 /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index edee52d..8ac597b 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -163,17 +163,31 @@ struct vcpu
     void            *sched_priv;    /* scheduler-specific data */
 
     struct vcpu_runstate_info runstate;
+
+    enum {
+        RUNSTATE_NONE = 0,
+        RUNSTATE_PADDR = 1,
+        RUNSTATE_VADDR = 2,
+    } runstate_guest_type;
+
+    unsigned long runstate_in_use;
+
+    union
+    {
 #ifndef CONFIG_COMPAT
-# define runstate_guest(v) ((v)->runstate_guest)
-    XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt)
+           XEN_GUEST_HANDLE(vcpu_runstate_info_t) virt; /* guest address */
 #else
-# define runstate_guest(v) ((v)->runstate_guest.native)
-    union {
-        XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
-        XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
-    } runstate_guest; /* guest address */
+# define runstate_guest_virt(v) ((v)->runstate_guest.virt.native)
+           union {
+               XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
+               XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
+           } virt; /* guest address */
 #endif
 
+        paddr_t phys;
+    } runstate_guest;
+
     /* last time when vCPU is scheduled out */
     uint64_t last_run_time;
 
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-24 18:12 [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
                   ` (3 preceding siblings ...)
  2019-05-24 18:12 ` [PATCH RFC 1] [DO NOT APPLY] " Andrii Anisov
@ 2019-05-28  8:59 ` " Julien Grall
  2019-05-28  8:59   ` [Xen-devel] " Julien Grall
  2019-05-28  9:17   ` Andrii Anisov
  4 siblings, 2 replies; 51+ messages in thread
From: Julien Grall @ 2019-05-28  8:59 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu

Hi Andrii,

I am not answering on the content yet, I will do that separately. The threading 
for this series looks quite confusing. The head of the thread is this patch (i.e 
RFC 2) but then you have a cover letter within the thread tagged "V3".

 From what I understand, the 2 e-mails tagged "V3" is the original version where 
as RFC 2 and RFC 3 are variants. Am I correct?

If so, for next time, I would recommend to have the cover letter first and then 
all the patches send "In-Reply-To" the cover letter. This makes easier to track 
series.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-28  8:59 ` [PATCH RFC 2] " Julien Grall
@ 2019-05-28  8:59   ` " Julien Grall
  2019-05-28  9:17   ` Andrii Anisov
  1 sibling, 0 replies; 51+ messages in thread
From: Julien Grall @ 2019-05-28  8:59 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu

Hi Andrii,

I am not answering on the content yet, I will do that separately. The threading 
for this series looks quite confusing. The head of the thread is this patch (i.e 
RFC 2) but then you have a cover letter within the thread tagged "V3".

 From what I understand, the 2 e-mails tagged "V3" is the original version where 
as RFC 2 and RFC 3 are variants. Am I correct?

If so, for next time, I would recommend to have the cover letter first and then 
all the patches send "In-Reply-To" the cover letter. This makes easier to track 
series.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-28  8:59 ` [PATCH RFC 2] " Julien Grall
  2019-05-28  8:59   ` [Xen-devel] " Julien Grall
@ 2019-05-28  9:17   ` Andrii Anisov
  2019-05-28  9:17     ` [Xen-devel] " Andrii Anisov
  2019-05-28  9:23     ` Julien Grall
  1 sibling, 2 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-28  9:17 UTC (permalink / raw)
  To: Julien Grall
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu

Hello Julien,

On 28.05.19 11:59, Julien Grall wrote:
> I am not answering on the content yet, I will do that separately. The threading for this series looks quite confusing. The head of the thread is this patch (i.e RFC 2) but then you have a cover letter within the thread tagged "V3".

Actually I've noticed the mangled threading. But not sure why it is so. I just passed a folder with those patches to git-send-mail.

>  From what I understand, the 2 e-mails tagged "V3" is the original version where as RFC 2 and RFC 3 are variants. Am I correct?

Yes, sure.

> If so, for next time, I would recommend to have the cover letter first and then all the patches send "In-Reply-To" the cover letter. This makes easier to track series.

It might help. But before it worked well without additional tricks.


-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-28  9:17   ` Andrii Anisov
@ 2019-05-28  9:17     ` " Andrii Anisov
  2019-05-28  9:23     ` Julien Grall
  1 sibling, 0 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-28  9:17 UTC (permalink / raw)
  To: Julien Grall
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu

Hello Julien,

On 28.05.19 11:59, Julien Grall wrote:
> I am not answering on the content yet, I will do that separately. The threading for this series looks quite confusing. The head of the thread is this patch (i.e RFC 2) but then you have a cover letter within the thread tagged "V3".

Actually I've noticed the mangled threading. But not sure why it is so. I just passed a folder with those patches to git-send-mail.

>  From what I understand, the 2 e-mails tagged "V3" is the original version where as RFC 2 and RFC 3 are variants. Am I correct?

Yes, sure.

> If so, for next time, I would recommend to have the cover letter first and then all the patches send "In-Reply-To" the cover letter. This makes easier to track series.

It might help. But before it worked well without additional tricks.


-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-28  9:17   ` Andrii Anisov
  2019-05-28  9:17     ` [Xen-devel] " Andrii Anisov
@ 2019-05-28  9:23     ` Julien Grall
  2019-05-28  9:23       ` [Xen-devel] " Julien Grall
  2019-05-28  9:36       ` Andrii Anisov
  1 sibling, 2 replies; 51+ messages in thread
From: Julien Grall @ 2019-05-28  9:23 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu



On 28/05/2019 10:17, Andrii Anisov wrote:
> Hello Julien,
> 
> On 28.05.19 11:59, Julien Grall wrote:
>> I am not answering on the content yet, I will do that separately. The 
>> threading for this series looks quite confusing. The head of the thread is 
>> this patch (i.e RFC 2) but then you have a cover letter within the thread 
>> tagged "V3".
> 
> Actually I've noticed the mangled threading. But not sure why it is so. I just 
> passed a folder with those patches to git-send-mail.

IIRC, git-send-email will use the first e-mail in the alphabetical order as the 
"top e-mail" and all the other will be send "In-Reply-To".

> 
>>  From what I understand, the 2 e-mails tagged "V3" is the original version 
>> where as RFC 2 and RFC 3 are variants. Am I correct?
> 
> Yes, sure.
> 
>> If so, for next time, I would recommend to have the cover letter first and 
>> then all the patches send "In-Reply-To" the cover letter. This makes easier to 
>> track series.
> 
> It might help. But before it worked well without additional tricks.

In general, all the patch sent are starting with a number followed by the title. 
The number ensure the correct ordering. If you don't have number then, you will 
fallback to alphabetical order.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-28  9:23     ` Julien Grall
@ 2019-05-28  9:23       ` " Julien Grall
  2019-05-28  9:36       ` Andrii Anisov
  1 sibling, 0 replies; 51+ messages in thread
From: Julien Grall @ 2019-05-28  9:23 UTC (permalink / raw)
  To: Andrii Anisov
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu



On 28/05/2019 10:17, Andrii Anisov wrote:
> Hello Julien,
> 
> On 28.05.19 11:59, Julien Grall wrote:
>> I am not answering on the content yet, I will do that separately. The 
>> threading for this series looks quite confusing. The head of the thread is 
>> this patch (i.e RFC 2) but then you have a cover letter within the thread 
>> tagged "V3".
> 
> Actually I've noticed the mangled threading. But not sure why it is so. I just 
> passed a folder with those patches to git-send-mail.

IIRC, git-send-email will use the first e-mail in the alphabetical order as the 
"top e-mail" and all the other will be send "In-Reply-To".

> 
>>  From what I understand, the 2 e-mails tagged "V3" is the original version 
>> where as RFC 2 and RFC 3 are variants. Am I correct?
> 
> Yes, sure.
> 
>> If so, for next time, I would recommend to have the cover letter first and 
>> then all the patches send "In-Reply-To" the cover letter. This makes easier to 
>> track series.
> 
> It might help. But before it worked well without additional tricks.

In general, all the patch sent are starting with a number followed by the title. 
The number ensure the correct ordering. If you don't have number then, you will 
fallback to alphabetical order.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-28  9:23     ` Julien Grall
  2019-05-28  9:23       ` [Xen-devel] " Julien Grall
@ 2019-05-28  9:36       ` Andrii Anisov
  2019-05-28  9:36         ` [Xen-devel] " Andrii Anisov
  1 sibling, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-05-28  9:36 UTC (permalink / raw)
  To: Julien Grall
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu



On 28.05.19 12:23, Julien Grall wrote:
> In general, all the patch sent are starting with a number followed by the title. The number ensure the correct ordering. If you don't have number then, you will fallback to alphabetical order.
Ah, yes, sure. I did rename manually RFC1 patch file. But somewhy missed renaming RFC2 patch file.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-28  9:36       ` Andrii Anisov
@ 2019-05-28  9:36         ` " Andrii Anisov
  0 siblings, 0 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-05-28  9:36 UTC (permalink / raw)
  To: Julien Grall
  Cc: Stefano Stabellini, Andrii Anisov, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Jan Beulich, xen-devel, Wei Liu



On 28.05.19 12:23, Julien Grall wrote:
> In general, all the patch sent are starting with a number followed by the title. The number ensure the correct ordering. If you don't have number then, you will fallback to alphabetical order.
Ah, yes, sure. I did rename manually RFC1 patch file. But somewhy missed renaming RFC2 patch file.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-05-24 18:12 ` [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
  2019-05-24 18:12   ` [Xen-devel] " Andrii Anisov
@ 2019-06-07 14:23   ` Jan Beulich
  2019-06-10 11:44     ` Julien Grall
                       ` (2 more replies)
  1 sibling, 3 replies; 51+ messages in thread
From: Jan Beulich @ 2019-06-07 14:23 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 24.05.19 at 20:12, <andrii.anisov@gmail.com> wrote:
> From: Andrii Anisov <andrii_anisov@epam.com>
> 
> Existing interface to register runstate are with its virtual address
> is prone to issues which became more obvious with KPTI enablement in
> guests. The nature of those issues is the fact that the guest could
> be interrupted by the hypervisor at any time, and there is no guarantee
> to have the registered virtual address translated with the currently
> available guest's page tables. Before the KPTI such a situation was
> possible in case the guest is caught in the middle of PT processing
> (e.g. superpage shattering). With the KPTI this happens also when the
> guest runs userspace, so has a pretty high probability.

Except when there's no need for KPTI in the guest in the first place,
as is the case for x86-64 PV guests. I think this is worthwhile clarifying.

> So it was agreed to register runstate with the guest's physical address
> so that its mapping is permanent from the hypervisor point of view. [1]
> 
> The hypercall employs the same vcpu_register_runstate_memory_area
> structure for the interface, but requires a registered area to not
> cross a page boundary.
> 
> [1] https://lists.xenproject.org/archives/html/xen-devel/2019-02/msg00416.html 
> 
> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>

I would have really hoped that you would outline the intended interaction
between both interfaces, such that while reviewing one can judge whether
you're actually matching the goal. I think it is actually mandatory to make
explicit in the public header what level of mixing is permitted, what is not
permitted, and what mix of requests is simply undefined.

In particular, while we did work out during prior discussions that some
level of mixing has to be permitted, I'm unconvinced that arbitrary
mixing has to be allowed. For example, switching from one model to
another could be permitted only with just a single active vCPU. This
might allow to do away again with the runstate_in_use field you add.

> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
>      virt_timer_restore(n);
>  }
>  
> -/* Update per-VCPU guest runstate shared memory area (if registered). */
> -static void update_runstate_area(struct vcpu *v)
> +static void update_runstate_by_gvaddr(struct vcpu *v)
>  {
>      void __user *guest_handle = NULL;
>  
> -    if ( guest_handle_is_null(runstate_guest(v)) )
> -        return;
> +    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
>  
>      if ( VM_ASSIST(v->domain, runstate_update_flag) )
>      {
> -        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
> +        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
>          guest_handle--;
>          v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>          __raw_copy_to_guest(guest_handle,
> @@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
>          smp_wmb();
>      }
>  
> -    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
> +    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);

In a prior version you did the mechanical part of adjusting the VA-based
code in a prereq patch, aiding review. Is there a particular reason you
folded everything into one patch now?

> @@ -303,6 +301,53 @@ static void update_runstate_area(struct vcpu *v)
>      }
>  }
>  
> +static void update_runstate_by_gpaddr(struct vcpu *v)
> +{
> +    struct vcpu_runstate_info *runstate =
> +            (struct vcpu_runstate_info *)v->runstate_guest.phys;

What's the cast for here?

> @@ -1614,6 +1613,92 @@ bool update_runstate_area(struct vcpu *v)
>      return rc;
>  }
>  
> +static bool update_runstate_by_gpaddr_native(struct vcpu *v)
> +{
> +    struct vcpu_runstate_info *runstate =
> +            (struct vcpu_runstate_info *)v->runstate_guest.phys;
> +
> +    ASSERT(runstate != NULL);
> +
> +    if ( VM_ASSIST(v->domain, runstate_update_flag) )
> +    {
> +        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
> +        smp_wmb();
> +        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
> +    }
> +
> +    memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
> +
> +    if ( VM_ASSIST(v->domain, runstate_update_flag) )
> +    {
> +        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
> +        smp_wmb();
> +        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
> +    }
> +
> +    return true;
> +}

I can't help thinking that this matches the Arm code. Can common things
please be put in common code? I may be asking too much, but if the
pre-existing implementations are similar enough (I didn't check) perhaps
they could be folded in a first step, too?

> +static bool update_runstate_by_gpaddr_compat(struct vcpu *v)
> +{
> +    struct compat_vcpu_runstate_info *runstate =
> +            (struct compat_vcpu_runstate_info *)v->runstate_guest.phys;
> +
> +    ASSERT(runstate != NULL);
> +
> +    if ( VM_ASSIST(v->domain, runstate_update_flag) )
> +    {
> +        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
> +        smp_wmb();
> +        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
> +    }
> +
> +    {
> +        struct compat_vcpu_runstate_info info;
> +        XLAT_vcpu_runstate_info(&info, &v->runstate);
> +        memcpy(v->runstate_guest.phys, &info, sizeof(info));
> +    }
> +    else
> +        memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));

This "else" does not seem to be paired with an if(). Does this build
at all?

> --- a/xen/arch/x86/x86_64/domain.c
> +++ b/xen/arch/x86/x86_64/domain.c
> @@ -12,6 +12,8 @@
>  CHECK_vcpu_get_physid;
>  #undef xen_vcpu_get_physid
>  
> +extern void discard_runstate_area(struct vcpu *v);

No, this is not allowed. The declaration must be visible to both consumer
and producer, so that when either side is changed things won't build until
the other side gets changed too.

> @@ -35,8 +37,16 @@ arch_compat_vcpu_op(
>               !compat_handle_okay(area.addr.h, 1) )
>              break;
>  
> +        while( xchg(&v->runstate_in_use, 1) == 0);

At the very least such loops want a cpu_relax() in their bodies.
But this being on a hypercall path - are there theoretical guarantees
that a guest can't abuse this to lock up a CPU?

Furthermore I don't understand how this is supposed to work in
the first place. The first xchg() will store 1, no matter what. Thus
on the second iteration you'll find the wanted value unless the
other side stored 0. Yet the other side is a simple xchg() too.
Hence its first attempt would fail, but its second attempt (which
we didn't exit the critical region here yet) would succeed.

Also - style.

> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -698,6 +698,74 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
>      return 0;
>  }
>  
> +static void unmap_runstate_area(struct vcpu *v)
> +{
> +    mfn_t mfn;
> +
> +    if ( ! v->runstate_guest.phys )

Stray blank after ! .

> +        return;
> +
> +    mfn = domain_page_map_to_mfn(v->runstate_guest.phys);
> +
> +    unmap_domain_page_global((void *)
> +                             ((unsigned long)v->runstate_guest.phys &
> +                              PAGE_MASK));
> +
> +    v->runstate_guest.phys = NULL;

I think you would better store NULL before unmapping.

> @@ -734,7 +802,10 @@ int domain_kill(struct domain *d)
>          if ( cpupool_move_domain(d, cpupool0) )
>              return -ERESTART;
>          for_each_vcpu ( d, v )
> +        {
> +            discard_runstate_area_locked(v);
>              unmap_vcpu_info(v);
> +        }

What is the "locked" aspect here about? The guest itself is dead (i.e.
fully paused) at this point, isn't it? And it won't ever be unpaused
again.

> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -163,17 +163,31 @@ struct vcpu
>      void            *sched_priv;    /* scheduler-specific data */
>  
>      struct vcpu_runstate_info runstate;
> +
> +    enum {
> +        RUNSTATE_NONE = 0,
> +        RUNSTATE_PADDR = 1,
> +        RUNSTATE_VADDR = 2,
> +    } runstate_guest_type;
> +
> +    unsigned long runstate_in_use;

Why "unsigned long"? Isn't a bool all you need?

Also these would now all want to be grouped in a sub-structure named
"runstate", rather than having "runstate_" prefixes.

> +        void*   phys;

Bad ordering between * and the blanks. There ought to be a blank
ahead of the *, and I don't see why you need any after it.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-07 14:23   ` Jan Beulich
@ 2019-06-10 11:44     ` Julien Grall
  2019-06-11  9:10       ` Jan Beulich
  2019-06-11 16:09     ` Andrii Anisov
  2019-06-11 16:13     ` Andrii Anisov
  2 siblings, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-10 11:44 UTC (permalink / raw)
  To: Jan Beulich, andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne

Hi Jan,

On 07/06/2019 15:23, Jan Beulich wrote:
>>>> On 24.05.19 at 20:12, <andrii.anisov@gmail.com> wrote:
>> From: Andrii Anisov <andrii_anisov@epam.com>
>>
>> Existing interface to register runstate are with its virtual address
>> is prone to issues which became more obvious with KPTI enablement in
>> guests. The nature of those issues is the fact that the guest could
>> be interrupted by the hypervisor at any time, and there is no guarantee
>> to have the registered virtual address translated with the currently
>> available guest's page tables. Before the KPTI such a situation was
>> possible in case the guest is caught in the middle of PT processing
>> (e.g. superpage shattering). With the KPTI this happens also when the
>> guest runs userspace, so has a pretty high probability.
> 
> Except when there's no need for KPTI in the guest in the first place,
> as is the case for x86-64 PV guests. I think this is worthwhile clarifying.

I am not sure what is your point here. At least on Arm, using virtual address is 
not safe at all (whether KPTI is used or not). A guest can genuinely decides to 
shatter the mapping where the virtual address is. On Arm, this require to use 
the break-before-make sequence. It means the translation VA -> PA may fail is 
you happen to do it while the guest is using the sequence.

Some of the intermittent issues I have seen on the Arndale in the past [1] might 
be related to using virtual address. I am not 100% sure because even if the 
debug, the error does not make sense. But this is the most plausible reason for 
the failure.

I want to discuss this in part of the bigger attempt to rework the hypercall ABI 
during Xen Summit in July.

[...]

>> @@ -35,8 +37,16 @@ arch_compat_vcpu_op(
>>                !compat_handle_okay(area.addr.h, 1) )
>>               break;
>>   
>> +        while( xchg(&v->runstate_in_use, 1) == 0);
> 
> At the very least such loops want a cpu_relax() in their bodies.
> But this being on a hypercall path - are there theoretical guarantees
> that a guest can't abuse this to lock up a CPU?
Hmmm, I suggested this but it looks like a guest may call the hypercall multiple 
time from different vCPU. So this could be a way to delay work on the CPU.

I wanted to make the context switch mostly lockless and therefore avoiding to 
introduce a spinlock.

[1] https://lists.xen.org/archives/html/xen-devel/2017-11/msg00942.html

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-10 11:44     ` Julien Grall
@ 2019-06-11  9:10       ` Jan Beulich
  2019-06-11 10:22         ` Andrii Anisov
  2019-06-13 12:32         ` Andrii Anisov
  0 siblings, 2 replies; 51+ messages in thread
From: Jan Beulich @ 2019-06-11  9:10 UTC (permalink / raw)
  To: Julien Grall
  Cc: Tim Deegan, Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, andrii.anisov,
	xen-devel, andrii_anisov, Roger Pau Monne

>>> On 10.06.19 at 13:44, <julien.grall@arm.com> wrote:
> Hi Jan,
> 
> On 07/06/2019 15:23, Jan Beulich wrote:
>>>>> On 24.05.19 at 20:12, <andrii.anisov@gmail.com> wrote:
>>> From: Andrii Anisov <andrii_anisov@epam.com>
>>>
>>> Existing interface to register runstate are with its virtual address
>>> is prone to issues which became more obvious with KPTI enablement in
>>> guests. The nature of those issues is the fact that the guest could
>>> be interrupted by the hypervisor at any time, and there is no guarantee
>>> to have the registered virtual address translated with the currently
>>> available guest's page tables. Before the KPTI such a situation was
>>> possible in case the guest is caught in the middle of PT processing
>>> (e.g. superpage shattering). With the KPTI this happens also when the
>>> guest runs userspace, so has a pretty high probability.
>> 
>> Except when there's no need for KPTI in the guest in the first place,
>> as is the case for x86-64 PV guests. I think this is worthwhile clarifying.
> 
> I am not sure what is your point here. At least on Arm, using virtual address is 
> not safe at all (whether KPTI is used or not). A guest can genuinely decides to 
> shatter the mapping where the virtual address is. On Arm, this require to use 
> the break-before-make sequence. It means the translation VA -> PA may fail is 
> you happen to do it while the guest is using the sequence.
> 
> Some of the intermittent issues I have seen on the Arndale in the past [1] might 
> be related to using virtual address. I am not 100% sure because even if the 
> debug, the error does not make sense. But this is the most plausible reason for 
> the failure.

All fine, but Arm-specific. The point of my comment was to ask to call
out that there is one environment (x86-64 PV) where this KPTI
discussion is entirely inapplicable.

>>> @@ -35,8 +37,16 @@ arch_compat_vcpu_op(
>>>                !compat_handle_okay(area.addr.h, 1) )
>>>               break;
>>>   
>>> +        while( xchg(&v->runstate_in_use, 1) == 0);
>> 
>> At the very least such loops want a cpu_relax() in their bodies.
>> But this being on a hypercall path - are there theoretical guarantees
>> that a guest can't abuse this to lock up a CPU?
> Hmmm, I suggested this but it looks like a guest may call the hypercall multiple 
> time from different vCPU. So this could be a way to delay work on the CPU.
> 
> I wanted to make the context switch mostly lockless and therefore avoiding to 
> introduce a spinlock.

Well, constructs like the above are trying to mimic a spinlock
without actually using a spinlock. There are extremely rare
situation in which this may indeed be warranted, but here it
falls in the common "makes things worse overall" bucket, I
think. To not unduly penalize the actual update paths, I think
using a r/w lock would be appropriate here.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-11  9:10       ` Jan Beulich
@ 2019-06-11 10:22         ` Andrii Anisov
  2019-06-11 12:12           ` Julien Grall
  2019-06-13 12:21           ` Andrii Anisov
  2019-06-13 12:32         ` Andrii Anisov
  1 sibling, 2 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-06-11 10:22 UTC (permalink / raw)
  To: Jan Beulich, Julien Grall
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne

Hello Jan,

On 11.06.19 12:10, Jan Beulich wrote:
>>> Except when there's no need for KPTI in the guest in the first place,
>>> as is the case for x86-64 PV guests. I think this is worthwhile clarifying.
>>
>> I am not sure what is your point here. At least on Arm, using virtual address is
>> not safe at all (whether KPTI is used or not). A guest can genuinely decides to
>> shatter the mapping where the virtual address is. On Arm, this require to use
>> the break-before-make sequence. It means the translation VA -> PA may fail is
>> you happen to do it while the guest is using the sequence.
>>
>> Some of the intermittent issues I have seen on the Arndale in the past [1] might
>> be related to using virtual address. I am not 100% sure because even if the
>> debug, the error does not make sense. But this is the most plausible reason for
>> the failure.
> 
> All fine, but Arm-specific. The point of my comment was to ask to call
> out that there is one environment (x86-64 PV) where this KPTI
> discussion is entirely inapplicable.

I admit that x86 specifics are quite unclear to me so clarifications and corrections in that domain are desirable.

> 
>>>> @@ -35,8 +37,16 @@ arch_compat_vcpu_op(
>>>>                 !compat_handle_okay(area.addr.h, 1) )
>>>>                break;
>>>>    
>>>> +        while( xchg(&v->runstate_in_use, 1) == 0);
>>>
>>> At the very least such loops want a cpu_relax() in their bodies.
>>> But this being on a hypercall path - are there theoretical guarantees
>>> that a guest can't abuse this to lock up a CPU?
>> Hmmm, I suggested this but it looks like a guest may call the hypercall multiple
>> time from different vCPU. So this could be a way to delay work on the CPU.

Julien, I'm not sure I understand how work on (p?)CPU could be delayed. We are here with interrupts enabled, so here guest would just spend his own vcpu time budget. On the runstate update there is a kind-of trylock, so we would not hang there either. So what the problem?

>> I wanted to make the context switch mostly lockless and therefore avoiding to
>> introduce a spinlock.>
> Well, constructs like the above are trying to mimic a spinlock
> without actually using a spinlock. There are extremely rare
> situation in which this may indeed be warranted, but here it
> falls in the common "makes things worse overall" bucket, I
> think. To not unduly penalize the actual update paths, I think
> using a r/w lock would be appropriate here.

Firstly I did not think r/w lock specifics are needed here. We have only one reader path - runstate update on vcpu scheduling. And this path can have only one instance at the time.
But it seems to be more efficient than the spinlock for the case we are not locking.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-11 10:22         ` Andrii Anisov
@ 2019-06-11 12:12           ` Julien Grall
  2019-06-11 12:26             ` Andrii Anisov
  2019-06-13 12:21           ` Andrii Anisov
  1 sibling, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-11 12:12 UTC (permalink / raw)
  To: Andrii Anisov, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne

Hi,

On 11/06/2019 11:22, Andrii Anisov wrote:
> On 11.06.19 12:10, Jan Beulich wrote:
>>>>> @@ -35,8 +37,16 @@ arch_compat_vcpu_op(
>>>>>                 !compat_handle_okay(area.addr.h, 1) )
>>>>>                break;
>>>>> +        while( xchg(&v->runstate_in_use, 1) == 0);
>>>>
>>>> At the very least such loops want a cpu_relax() in their bodies.
>>>> But this being on a hypercall path - are there theoretical guarantees
>>>> that a guest can't abuse this to lock up a CPU?
>>> Hmmm, I suggested this but it looks like a guest may call the hypercall multiple
>>> time from different vCPU. So this could be a way to delay work on the CPU.
> 
> Julien, I'm not sure I understand how work on (p?)CPU could be delayed. We are 
> here with interrupts enabled, so here guest would just spend his own vcpu time 
> budget. 

Xen only supports only voluntary preemption. This means that if the hypercall 
run for a long time, you have no way to interrupt it and schedule another vCPU. 
In other words, the rest of the work on that specific pCPU would be delayed.

In this context, I think Jan is seeking guarantees that the lock can be taken in 
timely manner. If not, then we want to use a try-lock style.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-11 12:12           ` Julien Grall
@ 2019-06-11 12:26             ` Andrii Anisov
  2019-06-11 12:32               ` Julien Grall
  0 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-11 12:26 UTC (permalink / raw)
  To: Julien Grall, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne



On 11.06.19 15:12, Julien Grall wrote:
>> Julien, I'm not sure I understand how work on (p?)CPU could be delayed. We are here with interrupts enabled, so here guest would just spend his own vcpu time budget. 
> 
> Xen only supports only voluntary preemption.

Oh, really? Let me look into it a bit closer.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-11 12:26             ` Andrii Anisov
@ 2019-06-11 12:32               ` Julien Grall
  2019-06-11 12:40                 ` Andrii Anisov
  0 siblings, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-11 12:32 UTC (permalink / raw)
  To: Andrii Anisov, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne



On 11/06/2019 13:26, Andrii Anisov wrote:
> 
> 
> On 11.06.19 15:12, Julien Grall wrote:
>>> Julien, I'm not sure I understand how work on (p?)CPU could be delayed. We 
>>> are here with interrupts enabled, so here guest would just spend his own vcpu 
>>> time budget. 
>>
>> Xen only supports only voluntary preemption.
> 
> Oh, really? Let me look into it a bit closer.

Did you expect Xen to be fully preemptible?

The function to do the scheduling is schedule(). This is either call from a 
softirq or directly in a few places (e.g wait()).

The only place in this path where do_softirq() will be called is on return to 
the guest (see leave_hypervisor_tail).

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-11 12:32               ` Julien Grall
@ 2019-06-11 12:40                 ` Andrii Anisov
  0 siblings, 0 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-06-11 12:40 UTC (permalink / raw)
  To: Julien Grall, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne



On 11.06.19 15:32, Julien Grall wrote:
> Did you expect Xen to be fully preemptible?
> 
> The function to do the scheduling is schedule(). This is either call from a softirq or directly in a few places (e.g wait()).
> 
> The only place in this path where do_softirq() will be called is on return to the guest (see leave_hypervisor_tail).

Right you are! I forgot that bit. We will not pass through it on serving timer irq interrupted us in hyp mode.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-07 14:23   ` Jan Beulich
  2019-06-10 11:44     ` Julien Grall
@ 2019-06-11 16:09     ` Andrii Anisov
  2019-06-12  7:27       ` Jan Beulich
  2019-06-11 16:13     ` Andrii Anisov
  2 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-11 16:09 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

Hello Jan,

On 07.06.19 17:23, Jan Beulich wrote:
>>>> On 24.05.19 at 20:12, <andrii.anisov@gmail.com> wrote:
>> From: Andrii Anisov <andrii_anisov@epam.com>
>>
>> Existing interface to register runstate are with its virtual address
>> is prone to issues which became more obvious with KPTI enablement in
>> guests. The nature of those issues is the fact that the guest could
>> be interrupted by the hypervisor at any time, and there is no guarantee
>> to have the registered virtual address translated with the currently
>> available guest's page tables. Before the KPTI such a situation was
>> possible in case the guest is caught in the middle of PT processing
>> (e.g. superpage shattering). With the KPTI this happens also when the
>> guest runs userspace, so has a pretty high probability.
> 
> Except when there's no need for KPTI in the guest in the first place,
> as is the case for x86-64 PV guests. I think this is worthwhile clarifying.
> 
>> So it was agreed to register runstate with the guest's physical address
>> so that its mapping is permanent from the hypervisor point of view. [1]
>>
>> The hypercall employs the same vcpu_register_runstate_memory_area
>> structure for the interface, but requires a registered area to not
>> cross a page boundary.
>>
>> [1] https://lists.xenproject.org/archives/html/xen-devel/2019-02/msg00416.html
>>
>> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
> 
> I would have really hoped that you would outline the intended interaction
> between both interfaces,

I'd suppose no specific interaction between interfaces. I hardly imagine realistic use-cases where this might be needed.

> such that while reviewing one can judge whether
> you're actually matching the goal. I think it is actually mandatory to make
> explicit in the public header what level of mixing is permitted, what is not
> permitted, and what mix of requests is simply undefined.> 
> In particular, while we did work out during prior discussions that some
> level of mixing has to be permitted, I'm unconvinced that arbitrary
> mixing has to be allowed. For example, switching from one model to
> another could be permitted only with just a single active vCPU. This
> might allow to do away again with the runstate_in_use field you add.

Well, my point here is to left it as is, maybe add more documentation. If one likes shooting his leg, we should only care about avoiding ricochet harms hypervisor or other guests.
If you disagree, please suggest your interaction model, I'll be happy to implement it.

> 
>> --- a/xen/arch/arm/domain.c
>> +++ b/xen/arch/arm/domain.c
>> @@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
>>       virt_timer_restore(n);
>>   }
>>   
>> -/* Update per-VCPU guest runstate shared memory area (if registered). */
>> -static void update_runstate_area(struct vcpu *v)
>> +static void update_runstate_by_gvaddr(struct vcpu *v)
>>   {
>>       void __user *guest_handle = NULL;
>>   
>> -    if ( guest_handle_is_null(runstate_guest(v)) )
>> -        return;
>> +    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
>>   
>>       if ( VM_ASSIST(v->domain, runstate_update_flag) )
>>       {
>> -        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
>> +        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
>>           guest_handle--;
>>           v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>>           __raw_copy_to_guest(guest_handle,
>> @@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
>>           smp_wmb();
>>       }
>>   
>> -    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
>> +    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
> 
> In a prior version you did the mechanical part of adjusting the VA-based
> code in a prereq patch, aiding review. Is there a particular reason you
> folded everything into one patch now?

I silently followed suggestion from George [1]. Any objections?

> 
>> @@ -303,6 +301,53 @@ static void update_runstate_area(struct vcpu *v)
>>       }
>>   }
>>   
>> +static void update_runstate_by_gpaddr(struct vcpu *v)
>> +{
>> +    struct vcpu_runstate_info *runstate =
>> +            (struct vcpu_runstate_info *)v->runstate_guest.phys;
> 
> What's the cast for here?
> 
>> @@ -1614,6 +1613,92 @@ bool update_runstate_area(struct vcpu *v)
>>       return rc;
>>   }
>>   
>> +static bool update_runstate_by_gpaddr_native(struct vcpu *v)
>> +{
>> +    struct vcpu_runstate_info *runstate =
>> +            (struct vcpu_runstate_info *)v->runstate_guest.phys;
>> +
>> +    ASSERT(runstate != NULL);
>> +
>> +    if ( VM_ASSIST(v->domain, runstate_update_flag) )
>> +    {
>> +        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
>> +        smp_wmb();
>> +        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>> +    }
>> +
>> +    memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
>> +
>> +    if ( VM_ASSIST(v->domain, runstate_update_flag) )
>> +    {
>> +        runstate->state_entry_time &= ~XEN_RUNSTATE_UPDATE;
>> +        smp_wmb();
>> +        v->runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
>> +    }
>> +
>> +    return true;
>> +}
> 
> I can't help thinking that this matches the Arm code. Can common things
> please be put in common code? I may be asking too much, but if the
> pre-existing implementations are similar enough (I didn't check) perhaps
> they could be folded in a first step, too?

The problem thought to be the interface: on x86 update_runstate_area() returns bool, but on ARM it is void.
But for the common function update_runstate_by_gpaddr_~native~() it would not be a problem, because we will return proper bool from the refactored update_runstate_area().
Thank you for the point.

> 
>> +static bool update_runstate_by_gpaddr_compat(struct vcpu *v)
>> +{
>> +    struct compat_vcpu_runstate_info *runstate =
>> +            (struct compat_vcpu_runstate_info *)v->runstate_guest.phys;
>> +
>> +    ASSERT(runstate != NULL);
>> +
>> +    if ( VM_ASSIST(v->domain, runstate_update_flag) )
>> +    {
>> +        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
>> +        smp_wmb();
>> +        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>> +    }
>> +
>> +    {
>> +        struct compat_vcpu_runstate_info info;
>> +        XLAT_vcpu_runstate_info(&info, &v->runstate);
>> +        memcpy(v->runstate_guest.phys, &info, sizeof(info));
>> +    }
>> +    else
>> +        memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
> 
> This "else" does not seem to be paired with an if(). Does this build
> at all?

This particular - not!
And it is really strange, I remember I checked patch compilation for x86. Looking through git reflog to realize at what amend it became broken.
But also I do not completely understand the meaning of "_compat" and if it should be supported here?

> 
>> --- a/xen/arch/x86/x86_64/domain.c
>> +++ b/xen/arch/x86/x86_64/domain.c
>> @@ -12,6 +12,8 @@
>>   CHECK_vcpu_get_physid;
>>   #undef xen_vcpu_get_physid
>>   
>> +extern void discard_runstate_area(struct vcpu *v);
> 
> No, this is not allowed. The declaration must be visible to both consumer
> and producer, so that when either side is changed things won't build until
> the other side gets changed too.
> 
>> @@ -35,8 +37,16 @@ arch_compat_vcpu_op(
>>                !compat_handle_okay(area.addr.h, 1) )
>>               break;
>>   
>> +        while( xchg(&v->runstate_in_use, 1) == 0);
> 
> At the very least such loops want a cpu_relax() in their bodies.
> But this being on a hypercall path - are there theoretical guarantees
> that a guest can't abuse this to lock up a CPU?
> 
> Furthermore I don't understand how this is supposed to work in
> the first place. The first xchg() will store 1, no matter what. Thus
> on the second iteration you'll find the wanted value unless the
> other side stored 0. Yet the other side is a simple xchg() too.
> Hence its first attempt would fail, but its second attempt (which
> we didn't exit the critical region here yet) would succeed.
> 
> Also - style.

Will leave this part for locking issue discussion.

> 
>> --- a/xen/common/domain.c
>> +++ b/xen/common/domain.c
>> @@ -698,6 +698,74 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
>>       return 0;
>>   }
>>   
>> +static void unmap_runstate_area(struct vcpu *v)
>> +{
>> +    mfn_t mfn;
>> +
>> +    if ( ! v->runstate_guest.phys )
> 
> Stray blank after ! .

OK.

> 
>> +        return;
>> +
>> +    mfn = domain_page_map_to_mfn(v->runstate_guest.phys);
>> +
>> +    unmap_domain_page_global((void *)
>> +                             ((unsigned long)v->runstate_guest.phys &
>> +                              PAGE_MASK));
>> +
>> +    v->runstate_guest.phys = NULL;
> 
> I think you would better store NULL before unmapping.

Do you mean using local variable to pass address to unmap_domain_page_global(), and setting v->runstate_guest.phys to NULL prior to unmap?

> 
>> @@ -734,7 +802,10 @@ int domain_kill(struct domain *d)
>>           if ( cpupool_move_domain(d, cpupool0) )
>>               return -ERESTART;
>>           for_each_vcpu ( d, v )
>> +        {
>> +            discard_runstate_area_locked(v);
>>               unmap_vcpu_info(v);
>> +        }
> 
> What is the "locked" aspect here about? The guest itself is dead (i.e.
> fully paused) at this point, isn't it? And it won't ever be unpaused
> again.

You are right. All vcpus here are stopped. We can discard runstate area without locking.

> 
>> --- a/xen/include/xen/sched.h
>> +++ b/xen/include/xen/sched.h
>> @@ -163,17 +163,31 @@ struct vcpu
>>       void            *sched_priv;    /* scheduler-specific data */
>>   
>>       struct vcpu_runstate_info runstate;
>> +
>> +    enum {
>> +        RUNSTATE_NONE = 0,
>> +        RUNSTATE_PADDR = 1,
>> +        RUNSTATE_VADDR = 2,
>> +    } runstate_guest_type;
>> +
>> +    unsigned long runstate_in_use;
> 
> Why "unsigned long"? Isn't a bool all you need?

Bool should be enough. But it seems we will have a lock here.

> Also these would now all want to be grouped in a sub-structure named
> "runstate", rather than having "runstate_" prefixes.

Member `runstate` has already a type of `struct vcpu_runstate_info` which is an interface type.
`runstate_guest` is a union. I'd not like moving `runstate_guest` union into another substructure. Because we would have long lines like `v->struct.runstate_guest.virt.p->state_entry_time`.

> 
>> +        void*   phys;
> 
> Bad ordering between * and the blanks. There ought to be a blank
> ahead of the *, and I don't see why you need any after it.

OK.


[1] https://lists.xenproject.org/archives/html/xen-devel/2019-05/msg00599.html

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-07 14:23   ` Jan Beulich
  2019-06-10 11:44     ` Julien Grall
  2019-06-11 16:09     ` Andrii Anisov
@ 2019-06-11 16:13     ` Andrii Anisov
  2 siblings, 0 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-06-11 16:13 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

Sorry,

Missed one comment

On 07.06.19 17:23, Jan Beulich wrote:
>> +static void update_runstate_by_gpaddr(struct vcpu *v)
>> +{
>> +    struct vcpu_runstate_info *runstate =
>> +            (struct vcpu_runstate_info *)v->runstate_guest.phys;
> 
> What's the cast for here?

Seems to be not needed.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-11 16:09     ` Andrii Anisov
@ 2019-06-12  7:27       ` Jan Beulich
  2019-06-13 12:17         ` Andrii Anisov
  0 siblings, 1 reply; 51+ messages in thread
From: Jan Beulich @ 2019-06-12  7:27 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 11.06.19 at 18:09, <andrii.anisov@gmail.com> wrote:
> On 07.06.19 17:23, Jan Beulich wrote:
>>>>> On 24.05.19 at 20:12, <andrii.anisov@gmail.com> wrote:
>>> From: Andrii Anisov <andrii_anisov@epam.com>
>>>
>>> Existing interface to register runstate are with its virtual address
>>> is prone to issues which became more obvious with KPTI enablement in
>>> guests. The nature of those issues is the fact that the guest could
>>> be interrupted by the hypervisor at any time, and there is no guarantee
>>> to have the registered virtual address translated with the currently
>>> available guest's page tables. Before the KPTI such a situation was
>>> possible in case the guest is caught in the middle of PT processing
>>> (e.g. superpage shattering). With the KPTI this happens also when the
>>> guest runs userspace, so has a pretty high probability.
>> 
>> Except when there's no need for KPTI in the guest in the first place,
>> as is the case for x86-64 PV guests. I think this is worthwhile clarifying.
>> 
>>> So it was agreed to register runstate with the guest's physical address
>>> so that its mapping is permanent from the hypervisor point of view. [1]
>>>
>>> The hypercall employs the same vcpu_register_runstate_memory_area
>>> structure for the interface, but requires a registered area to not
>>> cross a page boundary.
>>>
>>> [1] https://lists.xenproject.org/archives/html/xen-devel/2019-02/msg00416.html 
>>>
>>> Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
>> 
>> I would have really hoped that you would outline the intended interaction
>> between both interfaces,
> 
> I'd suppose no specific interaction between interfaces. I hardly imagine 
> realistic use-cases where this might be needed.
> 
>> such that while reviewing one can judge whether
>> you're actually matching the goal. I think it is actually mandatory to make
>> explicit in the public header what level of mixing is permitted, what is not
>> permitted, and what mix of requests is simply undefined.> 
>> In particular, while we did work out during prior discussions that some
>> level of mixing has to be permitted, I'm unconvinced that arbitrary
>> mixing has to be allowed. For example, switching from one model to
>> another could be permitted only with just a single active vCPU. This
>> might allow to do away again with the runstate_in_use field you add.
> 
> Well, my point here is to left it as is, maybe add more documentation. If 
> one likes shooting his leg, we should only care about avoiding ricochet harms 
> hypervisor or other guests.
> If you disagree, please suggest your interaction model, I'll be happy to 
> implement it.

Well, if "mix as you like" is fine for guests to follow, then okay. But
we need to be _really_ certain there's no issue with this. Relaxing
the interface is always possible, while tightening an interface is
almost always at least a problem, if possible at all.

>>> --- a/xen/arch/arm/domain.c
>>> +++ b/xen/arch/arm/domain.c
>>> @@ -274,17 +274,15 @@ static void ctxt_switch_to(struct vcpu *n)
>>>       virt_timer_restore(n);
>>>   }
>>>   
>>> -/* Update per-VCPU guest runstate shared memory area (if registered). */
>>> -static void update_runstate_area(struct vcpu *v)
>>> +static void update_runstate_by_gvaddr(struct vcpu *v)
>>>   {
>>>       void __user *guest_handle = NULL;
>>>   
>>> -    if ( guest_handle_is_null(runstate_guest(v)) )
>>> -        return;
>>> +    ASSERT(!guest_handle_is_null(runstate_guest_virt(v)));
>>>   
>>>       if ( VM_ASSIST(v->domain, runstate_update_flag) )
>>>       {
>>> -        guest_handle = &v->runstate_guest.p->state_entry_time + 1;
>>> +        guest_handle = &v->runstate_guest.virt.p->state_entry_time + 1;
>>>           guest_handle--;
>>>           v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>>>           __raw_copy_to_guest(guest_handle,
>>> @@ -292,7 +290,7 @@ static void update_runstate_area(struct vcpu *v)
>>>           smp_wmb();
>>>       }
>>>   
>>> -    __copy_to_guest(runstate_guest(v), &v->runstate, 1);
>>> +    __copy_to_guest(runstate_guest_virt(v), &v->runstate, 1);
>> 
>> In a prior version you did the mechanical part of adjusting the VA-based
>> code in a prereq patch, aiding review. Is there a particular reason you
>> folded everything into one patch now?
> 
> I silently followed suggestion from George [1]. Any objections?

Hmm, I can't read this into George's suggestion. Aiui he did suggest
not to split the definition of the new interface from its implementation.
But that doesn't necessarily mean to squash _everything_ in one
patch.

>>> +static bool update_runstate_by_gpaddr_compat(struct vcpu *v)
>>> +{
>>> +    struct compat_vcpu_runstate_info *runstate =
>>> +            (struct compat_vcpu_runstate_info *)v->runstate_guest.phys;
>>> +
>>> +    ASSERT(runstate != NULL);
>>> +
>>> +    if ( VM_ASSIST(v->domain, runstate_update_flag) )
>>> +    {
>>> +        runstate->state_entry_time |= XEN_RUNSTATE_UPDATE;
>>> +        smp_wmb();
>>> +        v->runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
>>> +    }
>>> +
>>> +    {
>>> +        struct compat_vcpu_runstate_info info;
>>> +        XLAT_vcpu_runstate_info(&info, &v->runstate);
>>> +        memcpy(v->runstate_guest.phys, &info, sizeof(info));
>>> +    }
>>> +    else
>>> +        memcpy(v->runstate_guest.phys, &v->runstate, sizeof(v->runstate));
>> 
>> This "else" does not seem to be paired with an if(). Does this build
>> at all?
> 
> This particular - not!
> And it is really strange, I remember I checked patch compilation for x86. 
> Looking through git reflog to realize at what amend it became broken.
> But also I do not completely understand the meaning of "_compat" and if it 
> should be supported here?

Well, I'm afraid I don't understand what you're after. Of course
compat mode guests need to continue to be supported, and the
new interface would also better be available to them. And it is
a fact that their runstate area layout differs from that of 64-bit
guests.

>>> +    mfn = domain_page_map_to_mfn(v->runstate_guest.phys);
>>> +
>>> +    unmap_domain_page_global((void *)
>>> +                             ((unsigned long)v->runstate_guest.phys &
>>> +                              PAGE_MASK));
>>> +
>>> +    v->runstate_guest.phys = NULL;
>> 
>> I think you would better store NULL before unmapping.
> 
> Do you mean using local variable to pass address to 
> unmap_domain_page_global(), and setting v->runstate_guest.phys to NULL prior 
> to unmap?

Yes.

>>> --- a/xen/include/xen/sched.h
>>> +++ b/xen/include/xen/sched.h
>>> @@ -163,17 +163,31 @@ struct vcpu
>>>       void            *sched_priv;    /* scheduler-specific data */
>>>   
>>>       struct vcpu_runstate_info runstate;
>>> +
>>> +    enum {
>>> +        RUNSTATE_NONE = 0,
>>> +        RUNSTATE_PADDR = 1,
>>> +        RUNSTATE_VADDR = 2,
>>> +    } runstate_guest_type;
>>> +
>>> +    unsigned long runstate_in_use;
>> 
>> Why "unsigned long"? Isn't a bool all you need?
> 
> Bool should be enough. But it seems we will have a lock here.
> 
>> Also these would now all want to be grouped in a sub-structure named
>> "runstate", rather than having "runstate_" prefixes.
> 
> Member `runstate` has already a type of `struct vcpu_runstate_info` which is 
> an interface type.
> `runstate_guest` is a union. I'd not like moving `runstate_guest` union into 
> another substructure. Because we would have long lines like 
> `v->struct.runstate_guest.virt.p->state_entry_time`.

You didn't get my point then: What I'm after is

    struct {
        struct vcpu_runstate_info info;
        enum {
            RUNSTATE_NONE,
            RUNSTATE_PADDR,
            RUNSTATE_VADDR,
        } guest_type;
        bool in_use;
    } runstate;

(and of course the transformation to runstate.info broken out
into a separate prerreq patch).

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-12  7:27       ` Jan Beulich
@ 2019-06-13 12:17         ` Andrii Anisov
  2019-06-13 12:36           ` Jan Beulich
  0 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-13 12:17 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

Jan,

On 12.06.19 10:27, Jan Beulich wrote:
>> Well, my point here is to left it as is, maybe add more documentation. If
>> one likes shooting his leg, we should only care about avoiding ricochet harms
>> hypervisor or other guests.
>> If you disagree, please suggest your interaction model, I'll be happy to
>> implement it.
> 
> Well, if "mix as you like" is fine for guests to follow, then okay. But
> we need to be _really_ certain there's no issue with this.

I'm not aware about potential problems from the guest side. Do you have any ideas about it?

> Relaxing
> the interface is always possible, while tightening an interface is
> almost always at least a problem, if possible at all.

True.


>>> In a prior version you did the mechanical part of adjusting the VA-based
>>> code in a prereq patch, aiding review. Is there a particular reason you
>>> folded everything into one patch now?
>>
>> I silently followed suggestion from George [1]. Any objections?
> 
> Hmm, I can't read this into George's suggestion. Aiui he did suggest
> not to split the definition of the new interface from its implementation.
> But that doesn't necessarily mean to squash _everything_ in one
> patch.

OK.
It looks that what you said firstly is closer to V1 of this stuff. Will keep this in mind for the next version.


> Well, I'm afraid I don't understand what you're after. Of course
> compat mode guests need to continue to be supported, and the
> new interface would also better be available to them. And it is
> a fact that their runstate area layout differs from that of 64-bit
> guests.

OK.

>>>> --- a/xen/include/xen/sched.h
>>>> +++ b/xen/include/xen/sched.h
>>>> @@ -163,17 +163,31 @@ struct vcpu
>>>>        void            *sched_priv;    /* scheduler-specific data */
>>>>    
>>>>        struct vcpu_runstate_info runstate;
>>>> +
>>>> +    enum {
>>>> +        RUNSTATE_NONE = 0,
>>>> +        RUNSTATE_PADDR = 1,
>>>> +        RUNSTATE_VADDR = 2,
>>>> +    } runstate_guest_type;
>>>> +
>>>> +    unsigned long runstate_in_use;
>>>
>>> Why "unsigned long"? Isn't a bool all you need?
>>
>> Bool should be enough. But it seems we will have a lock here.
>>
>>> Also these would now all want to be grouped in a sub-structure named
>>> "runstate", rather than having "runstate_" prefixes.
>>
>> Member `runstate` has already a type of `struct vcpu_runstate_info` which is
>> an interface type.
>> `runstate_guest` is a union. I'd not like moving `runstate_guest` union into
>> another substructure. Because we would have long lines like
>> `v->struct.runstate_guest.virt.p->state_entry_time`.
> 
> You didn't get my point then: What I'm after is
> 
>      struct {
>          struct vcpu_runstate_info info;
>          enum {
>              RUNSTATE_NONE,
>              RUNSTATE_PADDR,
>              RUNSTATE_VADDR,
>          } guest_type;
>          bool in_use;
>      } runstate;

Did you miss runstate_guest as a member of that struct?

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-11 10:22         ` Andrii Anisov
  2019-06-11 12:12           ` Julien Grall
@ 2019-06-13 12:21           ` Andrii Anisov
  2019-06-13 12:39             ` Jan Beulich
  1 sibling, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-13 12:21 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

Jan,

On 11.06.19 13:22, Andrii Anisov wrote:
> Hello Jan,
> 
> On 11.06.19 12:10, Jan Beulich wrote:
>>>> Except when there's no need for KPTI in the guest in the first place,
>>>> as is the case for x86-64 PV guests. I think this is worthwhile clarifying.
>>>
>>> I am not sure what is your point here. At least on Arm, using virtual address is
>>> not safe at all (whether KPTI is used or not). A guest can genuinely decides to
>>> shatter the mapping where the virtual address is. On Arm, this require to use
>>> the break-before-make sequence. It means the translation VA -> PA may fail is
>>> you happen to do it while the guest is using the sequence.
>>>
>>> Some of the intermittent issues I have seen on the Arndale in the past [1] might
>>> be related to using virtual address. I am not 100% sure because even if the
>>> debug, the error does not make sense. But this is the most plausible reason for
>>> the failure.
>>
>> All fine, but Arm-specific. The point of my comment was to ask to call
>> out that there is one environment (x86-64 PV) where this KPTI
>> discussion is entirely inapplicable.
> 
> I admit that x86 specifics are quite unclear to me so clarifications and corrections in that domain are desirable.

Could you please elaborate more about this?
Do you mean that more words should be added to the commit message about x86?
If so, please provide what is proper from your point of view.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-11  9:10       ` Jan Beulich
  2019-06-11 10:22         ` Andrii Anisov
@ 2019-06-13 12:32         ` Andrii Anisov
  2019-06-13 12:41           ` Jan Beulich
  1 sibling, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-13 12:32 UTC (permalink / raw)
  To: Jan Beulich, Julien Grall
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne

Jan, Julien,

On 11.06.19 12:10, Jan Beulich wrote:
>>> At the very least such loops want a cpu_relax() in their bodies.
>>> But this being on a hypercall path - are there theoretical guarantees
>>> that a guest can't abuse this to lock up a CPU?
>> Hmmm, I suggested this but it looks like a guest may call the hypercall multiple
>> time from different vCPU. So this could be a way to delay work on the CPU.
>>
>> I wanted to make the context switch mostly lockless and therefore avoiding to
>> introduce a spinlock.
> 
> Well, constructs like the above are trying to mimic a spinlock
> without actually using a spinlock. There are extremely rare
> situation in which this may indeed be warranted, but here it
> falls in the common "makes things worse overall" bucket, I
> think. To not unduly penalize the actual update paths, I think
> using a r/w lock would be appropriate here.

So what is the conclusion here? Should we go with trylock and hypercall_create_continuation() in order to avoid locking but still not fail to the guest?

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 12:17         ` Andrii Anisov
@ 2019-06-13 12:36           ` Jan Beulich
  0 siblings, 0 replies; 51+ messages in thread
From: Jan Beulich @ 2019-06-13 12:36 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 13.06.19 at 14:17, <andrii.anisov@gmail.com> wrote:
> On 12.06.19 10:27, Jan Beulich wrote:
>>> Well, my point here is to left it as is, maybe add more documentation. If
>>> one likes shooting his leg, we should only care about avoiding ricochet 
> harms
>>> hypervisor or other guests.
>>> If you disagree, please suggest your interaction model, I'll be happy to
>>> implement it.
>> 
>> Well, if "mix as you like" is fine for guests to follow, then okay. But
>> we need to be _really_ certain there's no issue with this.
> 
> I'm not aware about potential problems from the guest side. Do you have any 
> ideas about it?

I didn't spend time trying to figure something out, but ...

>> Relaxing
>> the interface is always possible, while tightening an interface is
>> almost always at least a problem, if possible at all.
> 
> True.

... you agreeing here suggests someone should, and this would
better not (only) be the reviewer(s).

>>>>> --- a/xen/include/xen/sched.h
>>>>> +++ b/xen/include/xen/sched.h
>>>>> @@ -163,17 +163,31 @@ struct vcpu
>>>>>        void            *sched_priv;    /* scheduler-specific data */
>>>>>    
>>>>>        struct vcpu_runstate_info runstate;
>>>>> +
>>>>> +    enum {
>>>>> +        RUNSTATE_NONE = 0,
>>>>> +        RUNSTATE_PADDR = 1,
>>>>> +        RUNSTATE_VADDR = 2,
>>>>> +    } runstate_guest_type;
>>>>> +
>>>>> +    unsigned long runstate_in_use;
>>>>
>>>> Why "unsigned long"? Isn't a bool all you need?
>>>
>>> Bool should be enough. But it seems we will have a lock here.
>>>
>>>> Also these would now all want to be grouped in a sub-structure named
>>>> "runstate", rather than having "runstate_" prefixes.
>>>
>>> Member `runstate` has already a type of `struct vcpu_runstate_info` which is
>>> an interface type.
>>> `runstate_guest` is a union. I'd not like moving `runstate_guest` union into
>>> another substructure. Because we would have long lines like
>>> `v->struct.runstate_guest.virt.p->state_entry_time`.
>> 
>> You didn't get my point then: What I'm after is
>> 
>>      struct {
>>          struct vcpu_runstate_info info;
>>          enum {
>>              RUNSTATE_NONE,
>>              RUNSTATE_PADDR,
>>              RUNSTATE_VADDR,
>>          } guest_type;
>>          bool in_use;
>>      } runstate;
> 
> Did you miss runstate_guest as a member of that struct?

Quite possible. I've outlined it only anyway, for you to get the idea.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 12:21           ` Andrii Anisov
@ 2019-06-13 12:39             ` Jan Beulich
  0 siblings, 0 replies; 51+ messages in thread
From: Jan Beulich @ 2019-06-13 12:39 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 13.06.19 at 14:21, <andrii.anisov@gmail.com> wrote:
> On 11.06.19 13:22, Andrii Anisov wrote:
>> On 11.06.19 12:10, Jan Beulich wrote:
>>>>> Except when there's no need for KPTI in the guest in the first place,
>>>>> as is the case for x86-64 PV guests. I think this is worthwhile clarifying.
>>>>
>>>> I am not sure what is your point here. At least on Arm, using virtual address is
>>>> not safe at all (whether KPTI is used or not). A guest can genuinely decides to
>>>> shatter the mapping where the virtual address is. On Arm, this require to use
>>>> the break-before-make sequence. It means the translation VA -> PA may fail is
>>>> you happen to do it while the guest is using the sequence.
>>>>
>>>> Some of the intermittent issues I have seen on the Arndale in the past [1] might
>>>> be related to using virtual address. I am not 100% sure because even if the
>>>> debug, the error does not make sense. But this is the most plausible reason for
>>>> the failure.
>>>
>>> All fine, but Arm-specific. The point of my comment was to ask to call
>>> out that there is one environment (x86-64 PV) where this KPTI
>>> discussion is entirely inapplicable.
>> 
>> I admit that x86 specifics are quite unclear to me so clarifications and 
> corrections in that domain are desirable.
> 
> Could you please elaborate more about this?
> Do you mean that more words should be added to the commit message about x86?
> If so, please provide what is proper from your point of view.

I still think my initial response (still visible in context) was
sufficient. All I'm after is that you slightly soften your bold
statement in the description (no longer visible in context).

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 12:32         ` Andrii Anisov
@ 2019-06-13 12:41           ` Jan Beulich
  2019-06-13 12:48             ` Julien Grall
  0 siblings, 1 reply; 51+ messages in thread
From: Jan Beulich @ 2019-06-13 12:41 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 13.06.19 at 14:32, <andrii.anisov@gmail.com> wrote:
> Jan, Julien,
> 
> On 11.06.19 12:10, Jan Beulich wrote:
>>>> At the very least such loops want a cpu_relax() in their bodies.
>>>> But this being on a hypercall path - are there theoretical guarantees
>>>> that a guest can't abuse this to lock up a CPU?
>>> Hmmm, I suggested this but it looks like a guest may call the hypercall 
> multiple
>>> time from different vCPU. So this could be a way to delay work on the CPU.
>>>
>>> I wanted to make the context switch mostly lockless and therefore avoiding 
> to
>>> introduce a spinlock.
>> 
>> Well, constructs like the above are trying to mimic a spinlock
>> without actually using a spinlock. There are extremely rare
>> situation in which this may indeed be warranted, but here it
>> falls in the common "makes things worse overall" bucket, I
>> think. To not unduly penalize the actual update paths, I think
>> using a r/w lock would be appropriate here.
> 
> So what is the conclusion here? Should we go with trylock and 
> hypercall_create_continuation() in order to avoid locking but still not fail 
> to the guest?

I'm not convinced a "trylock" approach is needed - that's
something Julien suggested. I'm pretty sure we're acquiring other
locks in hypercall context without going the trylock route. I am
convinced though that the pseudo-lock you've used needs to be
replaced by a real (and perhaps r/w) one, _if_ there is any need
for locking in the first place.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 12:41           ` Jan Beulich
@ 2019-06-13 12:48             ` Julien Grall
  2019-06-13 12:58               ` Jan Beulich
  0 siblings, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-13 12:48 UTC (permalink / raw)
  To: Jan Beulich, andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne

Hi Jan,

On 13/06/2019 13:41, Jan Beulich wrote:
>>>> On 13.06.19 at 14:32, <andrii.anisov@gmail.com> wrote:
>> Jan, Julien,
>>
>> On 11.06.19 12:10, Jan Beulich wrote:
>>>>> At the very least such loops want a cpu_relax() in their bodies.
>>>>> But this being on a hypercall path - are there theoretical guarantees
>>>>> that a guest can't abuse this to lock up a CPU?
>>>> Hmmm, I suggested this but it looks like a guest may call the hypercall
>> multiple
>>>> time from different vCPU. So this could be a way to delay work on the CPU.
>>>>
>>>> I wanted to make the context switch mostly lockless and therefore avoiding
>> to
>>>> introduce a spinlock.
>>>
>>> Well, constructs like the above are trying to mimic a spinlock
>>> without actually using a spinlock. There are extremely rare
>>> situation in which this may indeed be warranted, but here it
>>> falls in the common "makes things worse overall" bucket, I
>>> think. To not unduly penalize the actual update paths, I think
>>> using a r/w lock would be appropriate here.
>>
>> So what is the conclusion here? Should we go with trylock and
>> hypercall_create_continuation() in order to avoid locking but still not fail
>> to the guest?
> 
> I'm not convinced a "trylock" approach is needed - that's
> something Julien suggested.

I think the trylock in the context switch is a must. Otherwise you would delay 
context switch if the information get updated.

> I'm pretty sure we're acquiring other
> locks in hypercall context without going the trylock route. I am
> convinced though that the pseudo-lock you've used needs to be
> replaced by a real (and perhaps r/w) one, _if_ there is any need
> for locking in the first place.

You were the one asking for theoretical guarantees that a guest can't abuse this 
to lock up a CPU. There are no way to guarantee that as multiple vCPUs could 
call the hypercall and take the same lock potentially delaying significantly the 
work.

Regarding the need of the lock, I still can't see how you can make it safe 
without it as you may have concurrent call.

Feel free to suggest a way.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 12:48             ` Julien Grall
@ 2019-06-13 12:58               ` Jan Beulich
  2019-06-13 13:14                 ` Julien Grall
  0 siblings, 1 reply; 51+ messages in thread
From: Jan Beulich @ 2019-06-13 12:58 UTC (permalink / raw)
  To: Julien Grall
  Cc: Tim Deegan, Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, andrii.anisov,
	xen-devel, andrii_anisov, Roger Pau Monne

>>> On 13.06.19 at 14:48, <julien.grall@arm.com> wrote:
> Hi Jan,
> 
> On 13/06/2019 13:41, Jan Beulich wrote:
>>>>> On 13.06.19 at 14:32, <andrii.anisov@gmail.com> wrote:
>>> Jan, Julien,
>>>
>>> On 11.06.19 12:10, Jan Beulich wrote:
>>>>>> At the very least such loops want a cpu_relax() in their bodies.
>>>>>> But this being on a hypercall path - are there theoretical guarantees
>>>>>> that a guest can't abuse this to lock up a CPU?
>>>>> Hmmm, I suggested this but it looks like a guest may call the hypercall
>>> multiple
>>>>> time from different vCPU. So this could be a way to delay work on the CPU.
>>>>>
>>>>> I wanted to make the context switch mostly lockless and therefore avoiding
>>> to
>>>>> introduce a spinlock.
>>>>
>>>> Well, constructs like the above are trying to mimic a spinlock
>>>> without actually using a spinlock. There are extremely rare
>>>> situation in which this may indeed be warranted, but here it
>>>> falls in the common "makes things worse overall" bucket, I
>>>> think. To not unduly penalize the actual update paths, I think
>>>> using a r/w lock would be appropriate here.
>>>
>>> So what is the conclusion here? Should we go with trylock and
>>> hypercall_create_continuation() in order to avoid locking but still not fail
>>> to the guest?
>> 
>> I'm not convinced a "trylock" approach is needed - that's
>> something Julien suggested.
> 
> I think the trylock in the context switch is a must. Otherwise you would delay 
> context switch if the information get updated.

Delay in what way? I.e. how would this be an issue other than for
the guest itself (which shouldn't be constantly updating the
address for the region)?

>> I'm pretty sure we're acquiring other
>> locks in hypercall context without going the trylock route. I am
>> convinced though that the pseudo-lock you've used needs to be
>> replaced by a real (and perhaps r/w) one, _if_ there is any need
>> for locking in the first place.
> 
> You were the one asking for theoretical guarantees that a guest can't abuse this 
> to lock up a CPU. There are no way to guarantee that as multiple vCPUs could 
> call the hypercall and take the same lock potentially delaying significantly the 
> work.

Well, I may have gone a little too far with my original response. It
just was so odd to see this pseudo lock used.

> Regarding the need of the lock, I still can't see how you can make it safe 
> without it as you may have concurrent call.
> 
> Feel free to suggest a way.

Well, if none can be found, then fine. I don't have the time or interest
here to try and think about a lockless approach; it just doesn't _feel_
like this ought to strictly require use of a lock. This gut feeling of mine
may well be wrong.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 12:58               ` Jan Beulich
@ 2019-06-13 13:14                 ` Julien Grall
  2019-06-13 13:40                   ` Jan Beulich
  0 siblings, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-13 13:14 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Tim Deegan, Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, andrii.anisov,
	xen-devel, andrii_anisov, Roger Pau Monne

Hi Jan,

On 13/06/2019 13:58, Jan Beulich wrote:
>>>> On 13.06.19 at 14:48, <julien.grall@arm.com> wrote:
>> Hi Jan,
>>
>> On 13/06/2019 13:41, Jan Beulich wrote:
>>>>>> On 13.06.19 at 14:32, <andrii.anisov@gmail.com> wrote:
>>>> Jan, Julien,
>>>>
>>>> On 11.06.19 12:10, Jan Beulich wrote:
>>>>>>> At the very least such loops want a cpu_relax() in their bodies.
>>>>>>> But this being on a hypercall path - are there theoretical guarantees
>>>>>>> that a guest can't abuse this to lock up a CPU?
>>>>>> Hmmm, I suggested this but it looks like a guest may call the hypercall
>>>> multiple
>>>>>> time from different vCPU. So this could be a way to delay work on the CPU.
>>>>>>
>>>>>> I wanted to make the context switch mostly lockless and therefore avoiding
>>>> to
>>>>>> introduce a spinlock.
>>>>>
>>>>> Well, constructs like the above are trying to mimic a spinlock
>>>>> without actually using a spinlock. There are extremely rare
>>>>> situation in which this may indeed be warranted, but here it
>>>>> falls in the common "makes things worse overall" bucket, I
>>>>> think. To not unduly penalize the actual update paths, I think
>>>>> using a r/w lock would be appropriate here.
>>>>
>>>> So what is the conclusion here? Should we go with trylock and
>>>> hypercall_create_continuation() in order to avoid locking but still not fail
>>>> to the guest?
>>>
>>> I'm not convinced a "trylock" approach is needed - that's
>>> something Julien suggested.
>>
>> I think the trylock in the context switch is a must. Otherwise you would delay
>> context switch if the information get updated.
> 
> Delay in what way? I.e. how would this be an issue other than for
> the guest itself (which shouldn't be constantly updating the
> address for the region)?

Why would it only be an issue with the guest itself? Any wait on lock in Xen 
implies that you can't schedule another vCPU as we are not preemptible.

As the lock is taken in the context switch, I am worry that a guest continuously 
trying to call the hypercall and therefore use the lock may actually delay the 
end of the context switch. And therefore delay the rest of the work.

I suggested the trylock here, so the context switch could avoid updating the 
runstate if we are in the hypercall.

> 
>>> I'm pretty sure we're acquiring other
>>> locks in hypercall context without going the trylock route. I am
>>> convinced though that the pseudo-lock you've used needs to be
>>> replaced by a real (and perhaps r/w) one, _if_ there is any need
>>> for locking in the first place.
>>
>> You were the one asking for theoretical guarantees that a guest can't abuse this
>> to lock up a CPU. There are no way to guarantee that as multiple vCPUs could
>> call the hypercall and take the same lock potentially delaying significantly the
>> work.
> 
> Well, I may have gone a little too far with my original response. It
> just was so odd to see this pseudo lock used.
> 
>> Regarding the need of the lock, I still can't see how you can make it safe
>> without it as you may have concurrent call.
>>
>> Feel free to suggest a way.
> 
> Well, if none can be found, then fine. I don't have the time or interest
> here to try and think about a lockless approach; it just doesn't _feel_
> like this ought to strictly require use of a lock. This gut feeling of mine
> may well be wrong.

I am not asking you to spend a lot of time on it. But if you have a gut feeling 
this can be done, then a little help would be extremely useful...

Otherwise, I will consider that the lock is the best way to go.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 13:14                 ` Julien Grall
@ 2019-06-13 13:40                   ` Jan Beulich
  2019-06-13 14:41                     ` Julien Grall
  0 siblings, 1 reply; 51+ messages in thread
From: Jan Beulich @ 2019-06-13 13:40 UTC (permalink / raw)
  To: Julien Grall
  Cc: Tim Deegan, Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, andrii.anisov,
	xen-devel, andrii_anisov, Roger Pau Monne

>>> On 13.06.19 at 15:14, <julien.grall@arm.com> wrote:
> On 13/06/2019 13:58, Jan Beulich wrote:
>>>>> On 13.06.19 at 14:48, <julien.grall@arm.com> wrote:
>>> On 13/06/2019 13:41, Jan Beulich wrote:
>>>>>>> On 13.06.19 at 14:32, <andrii.anisov@gmail.com> wrote:
>>>>> On 11.06.19 12:10, Jan Beulich wrote:
>>>>>>>> At the very least such loops want a cpu_relax() in their bodies.
>>>>>>>> But this being on a hypercall path - are there theoretical guarantees
>>>>>>>> that a guest can't abuse this to lock up a CPU?
>>>>>>> Hmmm, I suggested this but it looks like a guest may call the hypercall
>>>>> multiple
>>>>>>> time from different vCPU. So this could be a way to delay work on the CPU.
>>>>>>>
>>>>>>> I wanted to make the context switch mostly lockless and therefore avoiding
>>>>> to
>>>>>>> introduce a spinlock.
>>>>>>
>>>>>> Well, constructs like the above are trying to mimic a spinlock
>>>>>> without actually using a spinlock. There are extremely rare
>>>>>> situation in which this may indeed be warranted, but here it
>>>>>> falls in the common "makes things worse overall" bucket, I
>>>>>> think. To not unduly penalize the actual update paths, I think
>>>>>> using a r/w lock would be appropriate here.
>>>>>
>>>>> So what is the conclusion here? Should we go with trylock and
>>>>> hypercall_create_continuation() in order to avoid locking but still not fail
>>>>> to the guest?
>>>>
>>>> I'm not convinced a "trylock" approach is needed - that's
>>>> something Julien suggested.
>>>
>>> I think the trylock in the context switch is a must. Otherwise you would delay
>>> context switch if the information get updated.
>> 
>> Delay in what way? I.e. how would this be an issue other than for
>> the guest itself (which shouldn't be constantly updating the
>> address for the region)?
> 
> Why would it only be an issue with the guest itself? Any wait on lock in Xen 
> implies that you can't schedule another vCPU as we are not preemptible.

For one I initially (wrongly) understood you want the trylock in the
hypercall handler. And then, for context switch, wasting the target
(i.e. being switched in) vCPU's time slice is not an issue here. Of
course if there's a chance that acquiring the lock could require more
than a full time slice, then yes, some try-lock-ery may be needed.

However, ...

>>> Regarding the need of the lock, I still can't see how you can make it safe
>>> without it as you may have concurrent call.
>>>
>>> Feel free to suggest a way.
>> 
>> Well, if none can be found, then fine. I don't have the time or interest
>> here to try and think about a lockless approach; it just doesn't _feel_
>> like this ought to strictly require use of a lock. This gut feeling of mine
>> may well be wrong.
> 
> I am not asking you to spend a lot of time on it. But if you have a gut feeling 
> this can be done, then a little help would be extremely useful...

... I thought I had already outlined a model: Allow cross-vCPU updates
only while the target vCPU is still offline. Once online, a vCPU can only
itself update its runstate area address. I think you can get away
without any locks in this case; there may be a corner case with a vCPU
being onlined right at that point in time, so there may need to be a more
strict condition (like "only one online vCPU" instead of "the target vCPU
is offline").

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 13:40                   ` Jan Beulich
@ 2019-06-13 14:41                     ` Julien Grall
  2019-06-14 14:36                       ` Andrii Anisov
  0 siblings, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-13 14:41 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Tim Deegan, Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, andrii.anisov,
	xen-devel, andrii_anisov, Roger Pau Monne

Hi Jan,

On 13/06/2019 14:40, Jan Beulich wrote:
>>>> On 13.06.19 at 15:14, <julien.grall@arm.com> wrote:
>> I am not asking you to spend a lot of time on it. But if you have a gut feeling
>> this can be done, then a little help would be extremely useful...
> 
> ... I thought I had already outlined a model: Allow cross-vCPU updates
> only while the target vCPU is still offline. Once online, a vCPU can only
> itself update its runstate area address. I think you can get away
> without any locks in this case; there may be a corner case with a vCPU
> being onlined right at that point in time, so there may need to be a more
> strict condition (like "only one online vCPU" instead of "the target vCPU
> is offline").

Sorry I may have missed it. We can't really restrict the usage of the current 
hypercall (it is pretty lax). So I think any lockless solution would require to 
allow the hypercall
to be used together (which I want to avoid).

If we agree to allow the two hypercalls to be used together, then if we protect 
the update with domain_lock() then you should be able to avoid any race with the 
update path as onlining a vCPU requires to take the domain_lock() (see 
do_vcpu_op for x86 and do_common_cpu_on for Arm).

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-13 14:41                     ` Julien Grall
@ 2019-06-14 14:36                       ` Andrii Anisov
  2019-06-14 14:39                         ` Julien Grall
  0 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-14 14:36 UTC (permalink / raw)
  To: Julien Grall, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne

Hello Julien,

On 13.06.19 17:41, Julien Grall wrote:
> Sorry I may have missed it. We can't really restrict the usage of the current hypercall (it is pretty lax). So I think any lockless solution would require to allow the hypercall
> to be used together (which I want to avoid).

I'd better say here allowing using phys and virt registered runstates together (and independently).
And me personally for this approach, for sure not encouraging users (guests) to do so.

> If we agree to allow the two hypercalls to be used together, then if we protect the update with domain_lock() then you should be able to avoid any race with the update path as onlining a vCPU requires to take the domain_lock() (see do_vcpu_op for x86 and do_common_cpu_on for Arm).

Could you please clarify are you saying about protection runstate mapping update or runstate values update?
I'd not use the common lock for all vcpus runstate value update, unless it is an rw lock or doing trylock on it.

BTW, I'm a bit confused, are you OK with lock (not trylock) existing in hypercall?

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 14:36                       ` Andrii Anisov
@ 2019-06-14 14:39                         ` Julien Grall
  2019-06-14 15:11                           ` Andrii Anisov
  0 siblings, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-14 14:39 UTC (permalink / raw)
  To: Andrii Anisov, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne

On 14/06/2019 15:36, Andrii Anisov wrote:
> Hello Julien,

Hi,

> On 13.06.19 17:41, Julien Grall wrote:
>> Sorry I may have missed it. We can't really restrict the usage of the current 
>> hypercall (it is pretty lax). So I think any lockless solution would require 
>> to allow the hypercall
>> to be used together (which I want to avoid).
> 
> I'd better say here allowing using phys and virt registered runstates together 
> (and independently).
> And me personally for this approach, for sure not encouraging users (guests) to 
> do so.

Why? What are the benefits for a guest to use the two interface together? After 
all they have exactly the same data...

> 
>> If we agree to allow the two hypercalls to be used together, then if we 
>> protect the update with domain_lock() then you should be able to avoid any 
>> race with the update path as onlining a vCPU requires to take the 
>> domain_lock() (see do_vcpu_op for x86 and do_common_cpu_on for Arm).
> 
> Could you please clarify are you saying about protection runstate mapping update 
> or runstate values update?

runstate mapping.
> BTW, I'm a bit confused, are you OK with lock (not trylock) existing in hypercall?

This is still in discussion.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 14:39                         ` Julien Grall
@ 2019-06-14 15:11                           ` Andrii Anisov
  2019-06-14 15:24                             ` Julien Grall
  2019-06-14 15:42                             ` Jan Beulich
  0 siblings, 2 replies; 51+ messages in thread
From: Andrii Anisov @ 2019-06-14 15:11 UTC (permalink / raw)
  To: Julien Grall, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne



On 14.06.19 17:39, Julien Grall wrote:
> Why? What are the benefits for a guest to use the two interface together?

I do not say the guest has to use both interfaces simultaneously. It is logically odd, doing so will only reflect in increasing of hypervisor overhead.
But such an implementation will have a simpler code, which expected to be (a bit) faster.
So the code simplicity would be a benefit for us. Lower hypervisor overhead is a benefit for sane guests, which use only one interface.

BTW, dropping the old interface implementation will be much easier in future if it will not clash with the new one.

> After all they have exactly the same data...

Yes, but normal guests should use only one interface.


>> BTW, I'm a bit confused, are you OK with lock (not trylock) existing in hypercall?
> 
> This is still in discussion.

I see. So I'll think about the continuation implementation in meanwhile.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 15:11                           ` Andrii Anisov
@ 2019-06-14 15:24                             ` Julien Grall
  2019-06-14 16:11                               ` Andrii Anisov
  2019-06-14 15:42                             ` Jan Beulich
  1 sibling, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-14 15:24 UTC (permalink / raw)
  To: Andrii Anisov, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne

Hi Andrii,

On 14/06/2019 16:11, Andrii Anisov wrote:
> 
> 
> On 14.06.19 17:39, Julien Grall wrote:
>> Why? What are the benefits for a guest to use the two interface together?
> 
> I do not say the guest has to use both interfaces simultaneously. It is 
> logically odd, doing so will only reflect in increasing of hypervisor overhead.
> But such an implementation will have a simpler code, which expected to be (a 
> bit) faster. > So the code simplicity would be a benefit for us. Lower hypervisor overhead is a
> benefit for sane guests, which use only one interface.
I hope you are aware that speaking about speed here is quite irrelevant. The 
difference would be clear lost in the noise of the rest of the context switch.

But, if you allow something, then most likely someone will use it. However, you 
have to differentiate implementation vs documentation.

In this case, I don't think the implementation should dictate what is going to 
be exposed.

If you document that it can't happen, then you have room to forbid the mix in 
the future (assuming this can't be done now).

In other word, the more lax is the interface, the more difficult it is tighten 
in the future.

I am not going to push for an implementation that forbid the mix. But I am 
strongly going to push for any documentation of the expected interaction. So we 
don't make our life miserable later on.

> 
> BTW, dropping the old interface implementation will be much easier in future if 
> it will not clash with the new one.
I am afraid we will never be able to remove the old interface.

> 
>> After all they have exactly the same data...
> 
> Yes, but normal guests should use only one interface.

See above.

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 15:11                           ` Andrii Anisov
  2019-06-14 15:24                             ` Julien Grall
@ 2019-06-14 15:42                             ` Jan Beulich
  2019-06-14 16:23                               ` Andrii Anisov
  1 sibling, 1 reply; 51+ messages in thread
From: Jan Beulich @ 2019-06-14 15:42 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 14.06.19 at 17:11, <andrii.anisov@gmail.com> wrote:
> On 14.06.19 17:39, Julien Grall wrote:
>> After all they have exactly the same data...
> 
> Yes, but normal guests should use only one interface.

I thought it had been clarified already that normal guests can very
well use both interfaces, just not both at the same time: Boot loader
and OS could disagree in this regard, as the prime example.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 15:24                             ` Julien Grall
@ 2019-06-14 16:11                               ` Andrii Anisov
  2019-06-14 16:20                                 ` Julien Grall
  0 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-14 16:11 UTC (permalink / raw)
  To: Julien Grall, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne



On 14.06.19 18:24, Julien Grall wrote:
> I hope you are aware that speaking about speed here is quite irrelevant. The difference would be clear lost in the noise of the rest of the context switch.

Mmm... I have that understanding. Yet I'd rather try to not increase the noise, if not reduce.

BTW, I'll remember that to you on your next conditional branch removal ;)

> But, if you allow something, then most likely someone will use it. However, you have to differentiate implementation vs documentation.
> 
> In this case, I don't think the implementation should dictate what is going to be exposed.
>
> If you document that it can't happen, then you have room to forbid the mix in the future (assuming this can't be done now).
> 
> In other word, the more lax is the interface, the more difficult it is tighten in the future.
> 
> I am not going to push for an implementation that forbid the mix. But I am strongly going to push for any documentation of the expected interaction. So we don't make our life miserable later on.

I do not encourage using both interfaces simultaneously, it is pointless.
If you are saying that this matter could be solved with the appropriate documentation, it's OK with me.

>> BTW, dropping the old interface implementation will be much easier in future if it will not clash with the new one.
> I am afraid we will never be able to remove the old interface.

Maybe.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 16:11                               ` Andrii Anisov
@ 2019-06-14 16:20                                 ` Julien Grall
  2019-06-14 16:25                                   ` Andrii Anisov
  0 siblings, 1 reply; 51+ messages in thread
From: Julien Grall @ 2019-06-14 16:20 UTC (permalink / raw)
  To: Andrii Anisov, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne



On 14/06/2019 17:11, Andrii Anisov wrote:
> On 14.06.19 18:24, Julien Grall wrote:
>> But, if you allow something, then most likely someone will use it. However, 
>> you have to differentiate implementation vs documentation.
>>
>> In this case, I don't think the implementation should dictate what is going to 
>> be exposed.
>>
>> If you document that it can't happen, then you have room to forbid the mix in 
>> the future (assuming this can't be done now).
>>
>> In other word, the more lax is the interface, the more difficult it is tighten 
>> in the future.
>>
>> I am not going to push for an implementation that forbid the mix. But I am 
>> strongly going to push for any documentation of the expected interaction. So 
>> we don't make our life miserable later on.
> 
> I do not encourage using both interfaces simultaneously, it is pointless.
> If you are saying that this matter could be solved with the appropriate 
> documentation, it's OK with me.
> 
>>> BTW, dropping the old interface implementation will be much easier in future 
>>> if it will not clash with the new one.
>> I am afraid we will never be able to remove the old interface.
> 
> Maybe.

Well, that a stable ABI... Even if I would love to remove it, you can't get rid 
of old guests that easily...

Cheers,


> 

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 15:42                             ` Jan Beulich
@ 2019-06-14 16:23                               ` Andrii Anisov
  2019-06-17  6:28                                 ` Jan Beulich
  0 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-14 16:23 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

Hello Jan,

On 14.06.19 18:42, Jan Beulich wrote:
>>>> On 14.06.19 at 17:11, <andrii.anisov@gmail.com> wrote:
>> On 14.06.19 17:39, Julien Grall wrote:
>>> After all they have exactly the same data...
>>
>> Yes, but normal guests should use only one interface.
> 
> I thought it had been clarified already that normal guests can very
> well use both interfaces, just not both at the same time: Boot loader
> and OS could disagree in this regard, as the prime example.

I missed "at the same time".

We may require existing runstate area unregistering if the system is aware of it. But it is for the new interface.
The old one has no documentation about the unregistering. The implicit way is known to us, but not known to users.
How to solve the clash?

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 16:20                                 ` Julien Grall
@ 2019-06-14 16:25                                   ` Andrii Anisov
  2019-06-17  6:27                                     ` Jan Beulich
  0 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-14 16:25 UTC (permalink / raw)
  To: Julien Grall, Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan, xen-devel,
	andrii_anisov, Roger Pau Monne



On 14.06.19 19:20, Julien Grall wrote:
> Well, that a stable ABI... Even if I would love to remove it, you can't get rid of old guests that easily...

In 5 years, as XEN did for LK 3.18?

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 16:25                                   ` Andrii Anisov
@ 2019-06-17  6:27                                     ` Jan Beulich
  0 siblings, 0 replies; 51+ messages in thread
From: Jan Beulich @ 2019-06-17  6:27 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 14.06.19 at 18:25, <andrii.anisov@gmail.com> wrote:
> On 14.06.19 19:20, Julien Grall wrote:
>> Well, that a stable ABI... Even if I would love to remove it, you can't get 
> rid of old guests that easily...
> 
> In 5 years, as XEN did for LK 3.18?

I'm afraid I don't even know how to best word a reply to this. What
has the Linux kernel version got to do with the interfaces provided
by Xen? Is your reply meant to say that something was removed
from the Linux kernel in that version? That's fine - consumers can
stop consuming interfaces they don't like to use. But Xen (the
producer of interfaces like the one discussed here) can't lightly stop
providing interfaces, irrespective of their use in upstream Linux.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-14 16:23                               ` Andrii Anisov
@ 2019-06-17  6:28                                 ` Jan Beulich
  2019-06-18 15:32                                   ` Andrii Anisov
  0 siblings, 1 reply; 51+ messages in thread
From: Jan Beulich @ 2019-06-17  6:28 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 14.06.19 at 18:23, <andrii.anisov@gmail.com> wrote:
> On 14.06.19 18:42, Jan Beulich wrote:
>>>>> On 14.06.19 at 17:11, <andrii.anisov@gmail.com> wrote:
>>> On 14.06.19 17:39, Julien Grall wrote:
>>>> After all they have exactly the same data...
>>>
>>> Yes, but normal guests should use only one interface.
>> 
>> I thought it had been clarified already that normal guests can very
>> well use both interfaces, just not both at the same time: Boot loader
>> and OS could disagree in this regard, as the prime example.
> 
> I missed "at the same time".
> 
> We may require existing runstate area unregistering if the system is aware 
> of it. But it is for the new interface.
> The old one has no documentation about the unregistering. The implicit way 
> is known to us, but not known to users.
> How to solve the clash?

And once again I'm not sure what to answer, considering that I've
already outlined a possible model (without any explicit unregistration).

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-17  6:28                                 ` Jan Beulich
@ 2019-06-18 15:32                                   ` Andrii Anisov
  2019-06-18 15:44                                     ` Jan Beulich
  0 siblings, 1 reply; 51+ messages in thread
From: Andrii Anisov @ 2019-06-18 15:32 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

Hello Jan,

On 17.06.19 09:28, Jan Beulich wrote:
>> We may require existing runstate area unregistering if the system is aware
>> of it. But it is for the new interface.
>> The old one has no documentation about the unregistering. The implicit way
>> is known to us, but not known to users.
>> How to solve the clash?
> 
> And once again I'm not sure what to answer, considering that I've
> already outlined a possible model (without any explicit unregistration).

Just to be sure, "the model" you are talking about is following:

> I thought it had been clarified already that normal guests can very
> well use both interfaces, just not both at the same time: Boot loader
> and OS could disagree in this regard, as the prime example.

Is it correct?

But with the current interface (VA) that model is already broken without unregistration. On change between entities with different VA spaces the hypervisor definitely has a chance to spoil the new VA space at the old address.
IMHO it should be fixed (at least documented) for the old interface.

-- 
Sincerely,
Andrii Anisov.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [Xen-devel] [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall
  2019-06-18 15:32                                   ` Andrii Anisov
@ 2019-06-18 15:44                                     ` Jan Beulich
  0 siblings, 0 replies; 51+ messages in thread
From: Jan Beulich @ 2019-06-18 15:44 UTC (permalink / raw)
  To: andrii.anisov
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, andrii_anisov, Roger Pau Monne

>>> On 18.06.19 at 17:32, <andrii.anisov@gmail.com> wrote:
> On 17.06.19 09:28, Jan Beulich wrote:
>>> We may require existing runstate area unregistering if the system is aware
>>> of it. But it is for the new interface.
>>> The old one has no documentation about the unregistering. The implicit way
>>> is known to us, but not known to users.
>>> How to solve the clash?
>> 
>> And once again I'm not sure what to answer, considering that I've
>> already outlined a possible model (without any explicit unregistration).
> 
> Just to be sure, "the model" you are talking about is following:
> 
>> I thought it had been clarified already that normal guests can very
>> well use both interfaces, just not both at the same time: Boot loader
>> and OS could disagree in this regard, as the prime example.
> 
> Is it correct?

Not really - what you quote is a statement, not the outline of a
model. The basic idea for enforcement of a restriction was to
allow switching modes only when just one vCPU is online in a
guest.

> But with the current interface (VA) that model is already broken without 
> unregistration. On change between entities with different VA spaces the 
> hypervisor definitely has a chance to spoil the new VA space at the old address.
> IMHO it should be fixed (at least documented) for the old interface.

Document - sure, feel free. Fix - I don't see how you would do
this. Every component handing control onto another one would
be in charge on its own. That's not under our control.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, back to index

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-24 18:12 [PATCH RFC 2] [DO NOT APPLY] introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
2019-05-24 18:12 ` [Xen-devel] " Andrii Anisov
2019-05-24 18:12 ` [PATCH v3] Introduce runstate area registration with phys address Andrii Anisov
2019-05-24 18:12   ` [Xen-devel] " Andrii Anisov
2019-05-24 18:12 ` [PATCH v3] xen: introduce VCPUOP_register_runstate_phys_memory_area hypercall Andrii Anisov
2019-05-24 18:12   ` [Xen-devel] " Andrii Anisov
2019-06-07 14:23   ` Jan Beulich
2019-06-10 11:44     ` Julien Grall
2019-06-11  9:10       ` Jan Beulich
2019-06-11 10:22         ` Andrii Anisov
2019-06-11 12:12           ` Julien Grall
2019-06-11 12:26             ` Andrii Anisov
2019-06-11 12:32               ` Julien Grall
2019-06-11 12:40                 ` Andrii Anisov
2019-06-13 12:21           ` Andrii Anisov
2019-06-13 12:39             ` Jan Beulich
2019-06-13 12:32         ` Andrii Anisov
2019-06-13 12:41           ` Jan Beulich
2019-06-13 12:48             ` Julien Grall
2019-06-13 12:58               ` Jan Beulich
2019-06-13 13:14                 ` Julien Grall
2019-06-13 13:40                   ` Jan Beulich
2019-06-13 14:41                     ` Julien Grall
2019-06-14 14:36                       ` Andrii Anisov
2019-06-14 14:39                         ` Julien Grall
2019-06-14 15:11                           ` Andrii Anisov
2019-06-14 15:24                             ` Julien Grall
2019-06-14 16:11                               ` Andrii Anisov
2019-06-14 16:20                                 ` Julien Grall
2019-06-14 16:25                                   ` Andrii Anisov
2019-06-17  6:27                                     ` Jan Beulich
2019-06-14 15:42                             ` Jan Beulich
2019-06-14 16:23                               ` Andrii Anisov
2019-06-17  6:28                                 ` Jan Beulich
2019-06-18 15:32                                   ` Andrii Anisov
2019-06-18 15:44                                     ` Jan Beulich
2019-06-11 16:09     ` Andrii Anisov
2019-06-12  7:27       ` Jan Beulich
2019-06-13 12:17         ` Andrii Anisov
2019-06-13 12:36           ` Jan Beulich
2019-06-11 16:13     ` Andrii Anisov
2019-05-24 18:12 ` [PATCH RFC 1] [DO NOT APPLY] " Andrii Anisov
2019-05-24 18:12   ` [Xen-devel] " Andrii Anisov
2019-05-28  8:59 ` [PATCH RFC 2] " Julien Grall
2019-05-28  8:59   ` [Xen-devel] " Julien Grall
2019-05-28  9:17   ` Andrii Anisov
2019-05-28  9:17     ` [Xen-devel] " Andrii Anisov
2019-05-28  9:23     ` Julien Grall
2019-05-28  9:23       ` [Xen-devel] " Julien Grall
2019-05-28  9:36       ` Andrii Anisov
2019-05-28  9:36         ` [Xen-devel] " Andrii Anisov

Xen-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/xen-devel/0 xen-devel/git/0.git
	git clone --mirror https://lore.kernel.org/xen-devel/1 xen-devel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 xen-devel xen-devel/ https://lore.kernel.org/xen-devel \
		xen-devel@lists.xenproject.org xen-devel@archiver.kernel.org
	public-inbox-index xen-devel


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.xenproject.lists.xen-devel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox