All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
@ 2014-05-08 15:31 Tim Deegan
  2014-05-08 16:53 ` George Dunlap
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Tim Deegan @ 2014-05-08 15:31 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser, Jan Beulich

Stage one of many in merging PVH and HVM code in the hypervisor.

This exposes a few new hypercalls to HVM guests, all of which were
already available to PVH ones:

 - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
   These are basically harmless, if a bit useless to plain HVM.

 - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
   This will eventually let HVM guests bring up APs the way PVH ones do.
   For now, the VCPUOP_initialise paths are still gated on is_pvh.
 - VCPUOP_get_physid
   Harmless.

 - __HYPERVISOR_platform_op (XSM_PRIV callers only).

 - __HYPERVISOR_mmuext_op.
   The pagetable manipulation MMUEXT ops are already denied to
   paging_mode_refcounts() domains; the baseptr ones are already
   denied to paging_mode_translate() domains.
   I have restricted MMUEXT[UN]MARK_SUPER to !paging_mode_refcounts()
   domains as well, as I can see no need for them in PVH.
   That leaves TLB and cache flush operations and MMUEXT_CLEAR_PAGE /
   MMUEXT_COPY_PAGE, all of which are OK.

 - PHYSDEVOP_* (only for hardware control domains).
   Also make ops that touch PV vcpu state (PHYSDEVOP_set_iopl and
   PHYSDEVOP_set_iobitmap) gate on is_pv rather than !is_pvh.

PVH guests can also see a few hypercalls that they couldn't before,
but which were already available to HVM guests:

 - __HYPERVISOR_set_timer_op

 - __HYPERVISOR_tmem_op

Signed-off-by: Tim Deegan <tim@xen.org>
Cc: Keir Fraser <keir@xen.org>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Mukesh Rathor <mukesh.rathor@oracle.com>
---
 xen/arch/x86/hvm/hvm.c          | 113 +++++++---------------------------------
 xen/arch/x86/mm.c               |  12 +++++
 xen/arch/x86/physdev.c          |   4 +-
 xen/arch/x86/x86_64/compat/mm.c |   2 -
 xen/include/asm-x86/hypercall.h |  10 ++++
 5 files changed, 42 insertions(+), 99 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index da220bf..4274151 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3433,16 +3433,13 @@ static long hvm_memory_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 
     switch ( cmd & MEMOP_CMD_MASK )
     {
-    case XENMEM_memory_map:
-    case XENMEM_machine_memory_map:
-    case XENMEM_machphys_mapping:
-        return -ENOSYS;
     case XENMEM_decrease_reservation:
         rc = do_memory_op(cmd, arg);
         current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
         return rc;
+    default:
+        return do_memory_op(cmd, arg);
     }
-    return do_memory_op(cmd, arg);
 }
 
 static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
@@ -3450,7 +3447,7 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     switch ( cmd )
     {
     default:
-        if ( !is_pvh_vcpu(current) || !is_hardware_domain(current->domain) )
+        if ( !is_hardware_domain(current->domain) )
             return -ENOSYS;
         /* fall through */
     case PHYSDEVOP_map_pirq:
@@ -3462,31 +3459,6 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     }
 }
 
-static long hvm_vcpu_op(
-    int cmd, int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
-{
-    long rc;
-
-    switch ( cmd )
-    {
-    case VCPUOP_register_runstate_memory_area:
-    case VCPUOP_get_runstate_info:
-    case VCPUOP_set_periodic_timer:
-    case VCPUOP_stop_periodic_timer:
-    case VCPUOP_set_singleshot_timer:
-    case VCPUOP_stop_singleshot_timer:
-    case VCPUOP_register_vcpu_info:
-    case VCPUOP_register_vcpu_time_memory_area:
-        rc = do_vcpu_op(cmd, vcpuid, arg);
-        break;
-    default:
-        rc = -ENOSYS;
-        break;
-    }
-
-    return rc;
-}
-
 typedef unsigned long hvm_hypercall_t(
     unsigned long, unsigned long, unsigned long, unsigned long, unsigned long,
     unsigned long);
@@ -3509,41 +3481,13 @@ static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 
     switch ( cmd & MEMOP_CMD_MASK )
     {
-    case XENMEM_memory_map:
-    case XENMEM_machine_memory_map:
-    case XENMEM_machphys_mapping:
-        return -ENOSYS;
     case XENMEM_decrease_reservation:
         rc = compat_memory_op(cmd, arg);
         current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
         return rc;
-    }
-    return compat_memory_op(cmd, arg);
-}
-
-static long hvm_vcpu_op_compat32(
-    int cmd, int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
-{
-    long rc;
-
-    switch ( cmd )
-    {
-    case VCPUOP_register_runstate_memory_area:
-    case VCPUOP_get_runstate_info:
-    case VCPUOP_set_periodic_timer:
-    case VCPUOP_stop_periodic_timer:
-    case VCPUOP_set_singleshot_timer:
-    case VCPUOP_stop_singleshot_timer:
-    case VCPUOP_register_vcpu_info:
-    case VCPUOP_register_vcpu_time_memory_area:
-        rc = compat_vcpu_op(cmd, vcpuid, arg);
-        break;
     default:
-        rc = -ENOSYS;
-        break;
+        return compat_memory_op(cmd, arg);
     }
-
-    return rc;
 }
 
 static long hvm_physdev_op_compat32(
@@ -3551,27 +3495,29 @@ static long hvm_physdev_op_compat32(
 {
     switch ( cmd )
     {
+    default:
+        if ( !is_hardware_domain(current->domain) )
+            return -ENOSYS;
+        /* fall through */
         case PHYSDEVOP_map_pirq:
         case PHYSDEVOP_unmap_pirq:
         case PHYSDEVOP_eoi:
         case PHYSDEVOP_irq_status_query:
         case PHYSDEVOP_get_free_pirq:
             return compat_physdev_op(cmd, arg);
-        break;
-    default:
-            return -ENOSYS;
-        break;
     }
 }
 
 static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
     [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
     [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
-    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
     [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op,
+    HYPERCALL(platform_op),
     HYPERCALL(xen_version),
     HYPERCALL(console_io),
     HYPERCALL(event_channel_op),
+    HYPERCALL(vcpu_op),
+    HYPERCALL(mmuext_op),
     HYPERCALL(sched_op),
     HYPERCALL(set_timer_op),
     HYPERCALL(xsm_op),
@@ -3587,11 +3533,13 @@ static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
 static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
     [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
     [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op_compat32,
-    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op_compat32,
     [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op_compat32,
+    HYPERCALL(platform_op),
     COMPAT_CALL(xen_version),
     HYPERCALL(console_io),
     HYPERCALL(event_channel_op),
+    COMPAT_CALL(vcpu_op),
+    COMPAT_CALL(mmuext_op),
     COMPAT_CALL(sched_op),
     COMPAT_CALL(set_timer_op),
     HYPERCALL(xsm_op),
@@ -3601,24 +3549,6 @@ static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
     HYPERCALL(tmem_op)
 };
 
-/* PVH 32bitfixme. */
-static hvm_hypercall_t *const pvh_hypercall64_table[NR_hypercalls] = {
-    HYPERCALL(platform_op),
-    HYPERCALL(memory_op),
-    HYPERCALL(xen_version),
-    HYPERCALL(console_io),
-    [ __HYPERVISOR_grant_table_op ]  = (hvm_hypercall_t *)hvm_grant_table_op,
-    HYPERCALL(vcpu_op),
-    HYPERCALL(mmuext_op),
-    HYPERCALL(xsm_op),
-    HYPERCALL(sched_op),
-    HYPERCALL(event_channel_op),
-    [ __HYPERVISOR_physdev_op ]      = (hvm_hypercall_t *)hvm_physdev_op,
-    HYPERCALL(hvm_op),
-    HYPERCALL(sysctl),
-    HYPERCALL(domctl)
-};
-
 int hvm_do_hypercall(struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -3645,9 +3575,7 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
     if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
         return viridian_hypercall(regs);
 
-    if ( (eax >= NR_hypercalls) ||
-         (is_pvh_vcpu(curr) ? !pvh_hypercall64_table[eax]
-                            : !hvm_hypercall32_table[eax]) )
+    if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
     {
         regs->eax = -ENOSYS;
         return HVM_HCALL_completed;
@@ -3662,14 +3590,9 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
                     regs->r10, regs->r8, regs->r9);
 
         curr->arch.hvm_vcpu.hcall_64bit = 1;
-        if ( is_pvh_vcpu(curr) )
-            regs->rax = pvh_hypercall64_table[eax](regs->rdi, regs->rsi,
-                                                   regs->rdx, regs->r10,
-                                                   regs->r8, regs->r9);
-        else
-            regs->rax = hvm_hypercall64_table[eax](regs->rdi, regs->rsi,
-                                                   regs->rdx, regs->r10,
-                                                   regs->r8, regs->r9);
+        regs->rax = hvm_hypercall64_table[eax](regs->rdi, regs->rsi,
+                                               regs->rdx, regs->r10,
+                                               regs->r8, regs->r9);
         curr->arch.hvm_vcpu.hcall_64bit = 0;
     }
     else
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1a8a5e0..3d5c3c8 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3310,6 +3310,12 @@ long do_mmuext_op(
             unsigned long mfn;
             struct spage_info *spage;
 
+            if ( paging_mode_refcounts(pg_owner) )
+            {
+                okay = 0;
+                break;
+            }
+
             mfn = op.arg1.mfn;
             if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
             {
@@ -3336,6 +3342,12 @@ long do_mmuext_op(
             unsigned long mfn;
             struct spage_info *spage;
 
+            if ( paging_mode_refcounts(pg_owner) )
+            {
+                okay = 0;
+                break;
+            }
+
             mfn = op.arg1.mfn;
             if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
             {
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index f178315..a2d2b96 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -520,7 +520,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         struct physdev_set_iopl set_iopl;
 
         ret = -ENOSYS;
-        if ( is_pvh_vcpu(current) )
+        if ( !is_pv_vcpu(current) )
             break;
 
         ret = -EFAULT;
@@ -538,7 +538,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         struct physdev_set_iobitmap set_iobitmap;
 
         ret = -ENOSYS;
-        if ( is_pvh_vcpu(current) )
+        if ( !is_pv_vcpu(current) )
             break;
 
         ret = -EFAULT;
diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
index 69c6195..3fa90aa 100644
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -236,8 +236,6 @@ int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi,
     return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, domid);
 }
 
-DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
-
 int compat_mmuext_op(XEN_GUEST_HANDLE_PARAM(mmuext_op_compat_t) cmp_uops,
                      unsigned int count,
                      XEN_GUEST_HANDLE_PARAM(uint) pdone,
diff --git a/xen/include/asm-x86/hypercall.h b/xen/include/asm-x86/hypercall.h
index afa8ba9..cee8817 100644
--- a/xen/include/asm-x86/hypercall.h
+++ b/xen/include/asm-x86/hypercall.h
@@ -8,6 +8,7 @@
 #include <public/physdev.h>
 #include <public/arch-x86/xen-mca.h> /* for do_mca */
 #include <xen/types.h>
+#include <compat/memory.h>
 
 /*
  * Both do_mmuext_op() and do_mmu_update():
@@ -110,4 +111,13 @@ extern int
 arch_compat_vcpu_op(
     int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg);
 
+DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
+
+extern int
+compat_mmuext_op(
+    XEN_GUEST_HANDLE_PARAM(mmuext_op_compat_t) cmp_uops,
+    unsigned int count,
+    XEN_GUEST_HANDLE_PARAM(uint) pdone,
+    unsigned int foreigndom);
+
 #endif /* __ASM_X86_HYPERCALL_H__ */
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-08 15:31 [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
@ 2014-05-08 16:53 ` George Dunlap
  2014-05-15 10:25   ` Tim Deegan
  2014-05-08 18:39 ` Konrad Rzeszutek Wilk
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: George Dunlap @ 2014-05-08 16:53 UTC (permalink / raw)
  To: Tim Deegan; +Cc: Keir Fraser, Jan Beulich, xen-devel

On Thu, May 8, 2014 at 4:31 PM, Tim Deegan <tim@xen.org> wrote:
> Stage one of many in merging PVH and HVM code in the hypervisor.
>
> This exposes a few new hypercalls to HVM guests, all of which were
> already available to PVH ones:
>
>  - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
>    These are basically harmless, if a bit useless to plain HVM.
>
>  - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
>    This will eventually let HVM guests bring up APs the way PVH ones do.
>    For now, the VCPUOP_initialise paths are still gated on is_pvh.
>  - VCPUOP_get_physid
>    Harmless.
>
>  - __HYPERVISOR_platform_op (XSM_PRIV callers only).
>
>  - __HYPERVISOR_mmuext_op.
>    The pagetable manipulation MMUEXT ops are already denied to
>    paging_mode_refcounts() domains; the baseptr ones are already
>    denied to paging_mode_translate() domains.
>    I have restricted MMUEXT[UN]MARK_SUPER to !paging_mode_refcounts()
>    domains as well, as I can see no need for them in PVH.
>    That leaves TLB and cache flush operations and MMUEXT_CLEAR_PAGE /
>    MMUEXT_COPY_PAGE, all of which are OK.
>
>  - PHYSDEVOP_* (only for hardware control domains).
>    Also make ops that touch PV vcpu state (PHYSDEVOP_set_iopl and
>    PHYSDEVOP_set_iobitmap) gate on is_pv rather than !is_pvh.
>
> PVH guests can also see a few hypercalls that they couldn't before,
> but which were already available to HVM guests:
>
>  - __HYPERVISOR_set_timer_op
>
>  - __HYPERVISOR_tmem_op
>
> Signed-off-by: Tim Deegan <tim@xen.org>
> Cc: Keir Fraser <keir@xen.org>
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Mukesh Rathor <mukesh.rathor@oracle.com>

In principle looks good.  I've gone through the code and it all looks
correct.  I haven't done an audit of the various hypercalls yet.

One thing to consider is that regardless of whether a hypercall is
safe for HVM guests *if implemented correctly*, every additional
hypercall exposed increases the risk that an attacker will be able to
find one which is *not* implemented correctly and be able to take
advantage of it.

Obviously the *best* solution to that would be Flask, but AFAICT it's
not very widely used.

On the whole, simplifying the code by unification is still probably
better though.

 -George

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-08 15:31 [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
  2014-05-08 16:53 ` George Dunlap
@ 2014-05-08 18:39 ` Konrad Rzeszutek Wilk
  2014-05-15 10:30   ` Tim Deegan
  2014-05-09  8:08 ` Jan Beulich
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Konrad Rzeszutek Wilk @ 2014-05-08 18:39 UTC (permalink / raw)
  To: Tim Deegan; +Cc: Keir Fraser, Jan Beulich, xen-devel

On Thu, May 08, 2014 at 04:31:30PM +0100, Tim Deegan wrote:
> Stage one of many in merging PVH and HVM code in the hypervisor.
> 
> This exposes a few new hypercalls to HVM guests, all of which were
> already available to PVH ones:
> 
>  - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
>    These are basically harmless, if a bit useless to plain HVM.
> 
>  - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
>    This will eventually let HVM guests bring up APs the way PVH ones do.
>    For now, the VCPUOP_initialise paths are still gated on is_pvh.

I had a similar patch to enable this under HVM and found out that
if the guest issues VCPUOP_send_nmi we get in Linux:

[    3.611742] Corrupted low memory at c000fffc (fffc phys) = 00029b00
[    2.386785] Corrupted low memory at ffff88000000fff8 (fff8 phys) = 2990000000000

http://mid.gmane.org/20140422183443.GA6817@phenom.dumpdata.com


>  - VCPUOP_get_physid
>    Harmless.

The other VCPUOP_ ones are OK and you can stack an Reviewed-by-and-Tested-by:
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

on that.
> 
>  - __HYPERVISOR_platform_op (XSM_PRIV callers only).
> 
>  - __HYPERVISOR_mmuext_op.
>    The pagetable manipulation MMUEXT ops are already denied to
>    paging_mode_refcounts() domains; the baseptr ones are already
>    denied to paging_mode_translate() domains.
>    I have restricted MMUEXT[UN]MARK_SUPER to !paging_mode_refcounts()
>    domains as well, as I can see no need for them in PVH.
>    That leaves TLB and cache flush operations and MMUEXT_CLEAR_PAGE /
>    MMUEXT_COPY_PAGE, all of which are OK.
> 
>  - PHYSDEVOP_* (only for hardware control domains).
>    Also make ops that touch PV vcpu state (PHYSDEVOP_set_iopl and
>    PHYSDEVOP_set_iobitmap) gate on is_pv rather than !is_pvh.
> 
> PVH guests can also see a few hypercalls that they couldn't before,
> but which were already available to HVM guests:
> 
>  - __HYPERVISOR_set_timer_op
> 
>  - __HYPERVISOR_tmem_op
> 
> Signed-off-by: Tim Deegan <tim@xen.org>
> Cc: Keir Fraser <keir@xen.org>
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Mukesh Rathor <mukesh.rathor@oracle.com>
> ---
>  xen/arch/x86/hvm/hvm.c          | 113 +++++++---------------------------------
>  xen/arch/x86/mm.c               |  12 +++++
>  xen/arch/x86/physdev.c          |   4 +-
>  xen/arch/x86/x86_64/compat/mm.c |   2 -
>  xen/include/asm-x86/hypercall.h |  10 ++++
>  5 files changed, 42 insertions(+), 99 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index da220bf..4274151 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3433,16 +3433,13 @@ static long hvm_memory_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>  
>      switch ( cmd & MEMOP_CMD_MASK )
>      {
> -    case XENMEM_memory_map:
> -    case XENMEM_machine_memory_map:
> -    case XENMEM_machphys_mapping:
> -        return -ENOSYS;
>      case XENMEM_decrease_reservation:
>          rc = do_memory_op(cmd, arg);
>          current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
>          return rc;
> +    default:
> +        return do_memory_op(cmd, arg);
>      }
> -    return do_memory_op(cmd, arg);
>  }
>  
>  static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
> @@ -3450,7 +3447,7 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>      switch ( cmd )
>      {
>      default:
> -        if ( !is_pvh_vcpu(current) || !is_hardware_domain(current->domain) )
> +        if ( !is_hardware_domain(current->domain) )
>              return -ENOSYS;
>          /* fall through */
>      case PHYSDEVOP_map_pirq:
> @@ -3462,31 +3459,6 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>      }
>  }
>  
> -static long hvm_vcpu_op(
> -    int cmd, int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
> -{
> -    long rc;
> -
> -    switch ( cmd )
> -    {
> -    case VCPUOP_register_runstate_memory_area:
> -    case VCPUOP_get_runstate_info:
> -    case VCPUOP_set_periodic_timer:
> -    case VCPUOP_stop_periodic_timer:
> -    case VCPUOP_set_singleshot_timer:
> -    case VCPUOP_stop_singleshot_timer:
> -    case VCPUOP_register_vcpu_info:
> -    case VCPUOP_register_vcpu_time_memory_area:
> -        rc = do_vcpu_op(cmd, vcpuid, arg);
> -        break;
> -    default:
> -        rc = -ENOSYS;
> -        break;
> -    }
> -
> -    return rc;
> -}
> -
>  typedef unsigned long hvm_hypercall_t(
>      unsigned long, unsigned long, unsigned long, unsigned long, unsigned long,
>      unsigned long);
> @@ -3509,41 +3481,13 @@ static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>  
>      switch ( cmd & MEMOP_CMD_MASK )
>      {
> -    case XENMEM_memory_map:
> -    case XENMEM_machine_memory_map:
> -    case XENMEM_machphys_mapping:
> -        return -ENOSYS;
>      case XENMEM_decrease_reservation:
>          rc = compat_memory_op(cmd, arg);
>          current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
>          return rc;
> -    }
> -    return compat_memory_op(cmd, arg);
> -}
> -
> -static long hvm_vcpu_op_compat32(
> -    int cmd, int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
> -{
> -    long rc;
> -
> -    switch ( cmd )
> -    {
> -    case VCPUOP_register_runstate_memory_area:
> -    case VCPUOP_get_runstate_info:
> -    case VCPUOP_set_periodic_timer:
> -    case VCPUOP_stop_periodic_timer:
> -    case VCPUOP_set_singleshot_timer:
> -    case VCPUOP_stop_singleshot_timer:
> -    case VCPUOP_register_vcpu_info:
> -    case VCPUOP_register_vcpu_time_memory_area:
> -        rc = compat_vcpu_op(cmd, vcpuid, arg);
> -        break;
>      default:
> -        rc = -ENOSYS;
> -        break;
> +        return compat_memory_op(cmd, arg);
>      }
> -
> -    return rc;
>  }
>  
>  static long hvm_physdev_op_compat32(
> @@ -3551,27 +3495,29 @@ static long hvm_physdev_op_compat32(
>  {
>      switch ( cmd )
>      {
> +    default:
> +        if ( !is_hardware_domain(current->domain) )
> +            return -ENOSYS;
> +        /* fall through */
>          case PHYSDEVOP_map_pirq:
>          case PHYSDEVOP_unmap_pirq:
>          case PHYSDEVOP_eoi:
>          case PHYSDEVOP_irq_status_query:
>          case PHYSDEVOP_get_free_pirq:
>              return compat_physdev_op(cmd, arg);
> -        break;
> -    default:
> -            return -ENOSYS;
> -        break;
>      }
>  }
>  
>  static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
>      [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
>      [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
> -    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
>      [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op,
> +    HYPERCALL(platform_op),
>      HYPERCALL(xen_version),
>      HYPERCALL(console_io),
>      HYPERCALL(event_channel_op),
> +    HYPERCALL(vcpu_op),
> +    HYPERCALL(mmuext_op),
>      HYPERCALL(sched_op),
>      HYPERCALL(set_timer_op),
>      HYPERCALL(xsm_op),
> @@ -3587,11 +3533,13 @@ static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
>  static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
>      [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
>      [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op_compat32,
> -    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op_compat32,
>      [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op_compat32,
> +    HYPERCALL(platform_op),
>      COMPAT_CALL(xen_version),
>      HYPERCALL(console_io),
>      HYPERCALL(event_channel_op),
> +    COMPAT_CALL(vcpu_op),
> +    COMPAT_CALL(mmuext_op),
>      COMPAT_CALL(sched_op),
>      COMPAT_CALL(set_timer_op),
>      HYPERCALL(xsm_op),
> @@ -3601,24 +3549,6 @@ static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
>      HYPERCALL(tmem_op)
>  };
>  
> -/* PVH 32bitfixme. */
> -static hvm_hypercall_t *const pvh_hypercall64_table[NR_hypercalls] = {
> -    HYPERCALL(platform_op),
> -    HYPERCALL(memory_op),
> -    HYPERCALL(xen_version),
> -    HYPERCALL(console_io),
> -    [ __HYPERVISOR_grant_table_op ]  = (hvm_hypercall_t *)hvm_grant_table_op,
> -    HYPERCALL(vcpu_op),
> -    HYPERCALL(mmuext_op),
> -    HYPERCALL(xsm_op),
> -    HYPERCALL(sched_op),
> -    HYPERCALL(event_channel_op),
> -    [ __HYPERVISOR_physdev_op ]      = (hvm_hypercall_t *)hvm_physdev_op,
> -    HYPERCALL(hvm_op),
> -    HYPERCALL(sysctl),
> -    HYPERCALL(domctl)
> -};
> -
>  int hvm_do_hypercall(struct cpu_user_regs *regs)
>  {
>      struct vcpu *curr = current;
> @@ -3645,9 +3575,7 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
>      if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
>          return viridian_hypercall(regs);
>  
> -    if ( (eax >= NR_hypercalls) ||
> -         (is_pvh_vcpu(curr) ? !pvh_hypercall64_table[eax]
> -                            : !hvm_hypercall32_table[eax]) )
> +    if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
>      {
>          regs->eax = -ENOSYS;
>          return HVM_HCALL_completed;
> @@ -3662,14 +3590,9 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
>                      regs->r10, regs->r8, regs->r9);
>  
>          curr->arch.hvm_vcpu.hcall_64bit = 1;
> -        if ( is_pvh_vcpu(curr) )
> -            regs->rax = pvh_hypercall64_table[eax](regs->rdi, regs->rsi,
> -                                                   regs->rdx, regs->r10,
> -                                                   regs->r8, regs->r9);
> -        else
> -            regs->rax = hvm_hypercall64_table[eax](regs->rdi, regs->rsi,
> -                                                   regs->rdx, regs->r10,
> -                                                   regs->r8, regs->r9);
> +        regs->rax = hvm_hypercall64_table[eax](regs->rdi, regs->rsi,
> +                                               regs->rdx, regs->r10,
> +                                               regs->r8, regs->r9);
>          curr->arch.hvm_vcpu.hcall_64bit = 0;
>      }
>      else
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index 1a8a5e0..3d5c3c8 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -3310,6 +3310,12 @@ long do_mmuext_op(
>              unsigned long mfn;
>              struct spage_info *spage;
>  
> +            if ( paging_mode_refcounts(pg_owner) )
> +            {
> +                okay = 0;
> +                break;
> +            }
> +
>              mfn = op.arg1.mfn;
>              if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
>              {
> @@ -3336,6 +3342,12 @@ long do_mmuext_op(
>              unsigned long mfn;
>              struct spage_info *spage;
>  
> +            if ( paging_mode_refcounts(pg_owner) )
> +            {
> +                okay = 0;
> +                break;
> +            }
> +
>              mfn = op.arg1.mfn;
>              if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
>              {
> diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
> index f178315..a2d2b96 100644
> --- a/xen/arch/x86/physdev.c
> +++ b/xen/arch/x86/physdev.c
> @@ -520,7 +520,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>          struct physdev_set_iopl set_iopl;
>  
>          ret = -ENOSYS;
> -        if ( is_pvh_vcpu(current) )
> +        if ( !is_pv_vcpu(current) )
>              break;
>  
>          ret = -EFAULT;
> @@ -538,7 +538,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>          struct physdev_set_iobitmap set_iobitmap;
>  
>          ret = -ENOSYS;
> -        if ( is_pvh_vcpu(current) )
> +        if ( !is_pv_vcpu(current) )
>              break;
>  
>          ret = -EFAULT;
> diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
> index 69c6195..3fa90aa 100644
> --- a/xen/arch/x86/x86_64/compat/mm.c
> +++ b/xen/arch/x86/x86_64/compat/mm.c
> @@ -236,8 +236,6 @@ int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi,
>      return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, domid);
>  }
>  
> -DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
> -
>  int compat_mmuext_op(XEN_GUEST_HANDLE_PARAM(mmuext_op_compat_t) cmp_uops,
>                       unsigned int count,
>                       XEN_GUEST_HANDLE_PARAM(uint) pdone,
> diff --git a/xen/include/asm-x86/hypercall.h b/xen/include/asm-x86/hypercall.h
> index afa8ba9..cee8817 100644
> --- a/xen/include/asm-x86/hypercall.h
> +++ b/xen/include/asm-x86/hypercall.h
> @@ -8,6 +8,7 @@
>  #include <public/physdev.h>
>  #include <public/arch-x86/xen-mca.h> /* for do_mca */
>  #include <xen/types.h>
> +#include <compat/memory.h>
>  
>  /*
>   * Both do_mmuext_op() and do_mmu_update():
> @@ -110,4 +111,13 @@ extern int
>  arch_compat_vcpu_op(
>      int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg);
>  
> +DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
> +
> +extern int
> +compat_mmuext_op(
> +    XEN_GUEST_HANDLE_PARAM(mmuext_op_compat_t) cmp_uops,
> +    unsigned int count,
> +    XEN_GUEST_HANDLE_PARAM(uint) pdone,
> +    unsigned int foreigndom);
> +
>  #endif /* __ASM_X86_HYPERCALL_H__ */
> -- 
> 1.8.5.2
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-08 15:31 [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
  2014-05-08 16:53 ` George Dunlap
  2014-05-08 18:39 ` Konrad Rzeszutek Wilk
@ 2014-05-09  8:08 ` Jan Beulich
  2014-05-15 10:34   ` Tim Deegan
  2014-05-15 10:53 ` [PATCH v2] " Tim Deegan
  2014-05-15 13:35 ` [PATCH v3] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
  4 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2014-05-09  8:08 UTC (permalink / raw)
  To: Tim Deegan; +Cc: Keir Fraser, xen-devel

>>> On 08.05.14 at 17:31, <tim@xen.org> wrote:
> Stage one of many in merging PVH and HVM code in the hypervisor.

But a pretty desirable one (not the least because it means I'll be able
to remove the respective item from my todo list).

> This exposes a few new hypercalls to HVM guests, all of which were
> already available to PVH ones:
> 
>  - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
>    These are basically harmless, if a bit useless to plain HVM.
> 
>  - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
>    This will eventually let HVM guests bring up APs the way PVH ones do.
>    For now, the VCPUOP_initialise paths are still gated on is_pvh.
>  - VCPUOP_get_physid
>    Harmless.
> 
>  - __HYPERVISOR_platform_op (XSM_PRIV callers only).

I think this needs a little more thought that just relying on the
XSM_PRIV check: There are several operations here dealing with
machine memory addresses, which aren't directly meaningful to PVH
(and HVM, but for now we're not planning on having HVM Dom0). Do
you think it is useful to expose them the way they are nevertheless?

>  - __HYPERVISOR_mmuext_op.
>    The pagetable manipulation MMUEXT ops are already denied to
>    paging_mode_refcounts() domains;

Denied? From what I can see MMUEXT_PIN_L?_TABLE as well as
MMUEXT_UNPIN_TABLE succeed (in the sense of are being ignored) for
such pg_owner domains (I consider it similarly bogus to ignore rather
than fail pin requests for page table levels higher than supported now
that I look at this again - part of that code might anyway be cleaned
up now that CONFIG_PAGING_LEVELS can only ever be 4, unless we
expect a 5th level to appear at some point).

> the baseptr ones are already
>    denied to paging_mode_translate() domains.
>    I have restricted MMUEXT[UN]MARK_SUPER to !paging_mode_refcounts()
>    domains as well, as I can see no need for them in PVH.
>    That leaves TLB and cache flush operations and MMUEXT_CLEAR_PAGE /
>    MMUEXT_COPY_PAGE, all of which are OK.

Would permitting these two not undermine at least mem-access?
(If so, I suppose we already have this problem for
PHYSDEVOP_pirq_eoi_gmfn_v* and other operations where guest
memory gets written by the hypervisor, but as being discussed on a
separate thread, that appears to be an accepted limitation. But for
the two operations above, this would give guests a way to explicitly
circumvent the access checking/enforcement, as these are really
only library functions intended to help (32-bit) guests avoid needing
to do multiple hypercalls to access memory not mapped through a
permanent mapping.)

> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3433,16 +3433,13 @@ static long hvm_memory_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>  
>      switch ( cmd & MEMOP_CMD_MASK )
>      {
> -    case XENMEM_memory_map:
> -    case XENMEM_machine_memory_map:
> -    case XENMEM_machphys_mapping:
> -        return -ENOSYS;
>      case XENMEM_decrease_reservation:
>          rc = do_memory_op(cmd, arg);
>          current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
>          return rc;
> +    default:
> +        return do_memory_op(cmd, arg);

I think this would be done more efficiently/cleanly with a single call
to do_memory_op(), and a successive check for
XENMEM_decrease_reservation (similarly below for the 32-bit case).

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-08 16:53 ` George Dunlap
@ 2014-05-15 10:25   ` Tim Deegan
  0 siblings, 0 replies; 15+ messages in thread
From: Tim Deegan @ 2014-05-15 10:25 UTC (permalink / raw)
  To: George Dunlap; +Cc: Keir Fraser, Jan Beulich, xen-devel

At 17:53 +0100 on 08 May (1399568036), George Dunlap wrote:
> One thing to consider is that regardless of whether a hypercall is
> safe for HVM guests *if implemented correctly*, every additional
> hypercall exposed increases the risk that an attacker will be able to
> find one which is *not* implemented correctly and be able to take
> advantage of it.

This is true.  My hope is that, in the long run, the code
simplification by merging PVH will be worthwhile.  For some things I
think we'll still want to have flags to turn them off (e.g. qemu, ACPI
emulation, various timers); I'm not sure that there's anything here
that's worth making that kind of exception for. 

> Obviously the *best* solution to that would be Flask, but AFAICT it's
> not very widely used.

Yep.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-08 18:39 ` Konrad Rzeszutek Wilk
@ 2014-05-15 10:30   ` Tim Deegan
  2014-05-15 11:58     ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Tim Deegan @ 2014-05-15 10:30 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: George Dunlap, Keir Fraser, Jan Beulich, xen-devel

At 14:39 -0400 on 08 May (1399556383), Konrad Rzeszutek Wilk wrote:
> On Thu, May 08, 2014 at 04:31:30PM +0100, Tim Deegan wrote:
> > Stage one of many in merging PVH and HVM code in the hypervisor.
> > 
> > This exposes a few new hypercalls to HVM guests, all of which were
> > already available to PVH ones:
> > 
> >  - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
> >    These are basically harmless, if a bit useless to plain HVM.
> > 
> >  - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
> >    This will eventually let HVM guests bring up APs the way PVH ones do.
> >    For now, the VCPUOP_initialise paths are still gated on is_pvh.
> 
> I had a similar patch to enable this under HVM and found out that
> if the guest issues VCPUOP_send_nmi we get in Linux:
> 
> [    3.611742] Corrupted low memory at c000fffc (fffc phys) = 00029b00
> [    2.386785] Corrupted low memory at ffff88000000fff8 (fff8 phys) = 2990000000000
> 
> http://mid.gmane.org/20140422183443.GA6817@phenom.dumpdata.com

Right, thanks.  Do you think that's likely to be a hypervisor bug, or
just a "don't do that then"?

AFAICT PVH domains need this as they have no other way of sending
NMIs.

Tim.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-09  8:08 ` Jan Beulich
@ 2014-05-15 10:34   ` Tim Deegan
  2014-05-16  0:19     ` Mukesh Rathor
  0 siblings, 1 reply; 15+ messages in thread
From: Tim Deegan @ 2014-05-15 10:34 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Keir Fraser, xen-devel

At 09:08 +0100 on 09 May (1399622918), Jan Beulich wrote:
> >>> On 08.05.14 at 17:31, <tim@xen.org> wrote:
> >  - __HYPERVISOR_platform_op (XSM_PRIV callers only).
> 
> I think this needs a little more thought that just relying on the
> XSM_PRIV check: There are several operations here dealing with
> machine memory addresses, which aren't directly meaningful to PVH
> (and HVM, but for now we're not planning on having HVM Dom0). Do
> you think it is useful to expose them the way they are nevertheless?

I'll punt that to Mukesh: are there operations in here that a PVH
dom0 couldn't/shouldn't use or that need adjustment?

For this patch, I don't think it makes any difference; I'm just giving
HVM the same interface as PVH here.

> >  - __HYPERVISOR_mmuext_op.
> >    The pagetable manipulation MMUEXT ops are already denied to
> >    paging_mode_refcounts() domains;
> 
> Denied? From what I can see MMUEXT_PIN_L?_TABLE as well as
> MMUEXT_UNPIN_TABLE succeed (in the sense of are being ignored) for
> such pg_owner domains

Hmm, so they do.  How odd.

> (I consider it similarly bogus to ignore rather
> than fail pin requests for page table levels higher than supported now
> that I look at this again - part of that code might anyway be cleaned
> up now that CONFIG_PAGING_LEVELS can only ever be 4, unless we
> expect a 5th level to appear at some point).
> 
> > the baseptr ones are already
> >    denied to paging_mode_translate() domains.
> >    I have restricted MMUEXT[UN]MARK_SUPER to !paging_mode_refcounts()
> >    domains as well, as I can see no need for them in PVH.
> >    That leaves TLB and cache flush operations and MMUEXT_CLEAR_PAGE /
> >    MMUEXT_COPY_PAGE, all of which are OK.
> 
> Would permitting these two not undermine at least mem-access?

Good point; I'll restrict them to PV only, since they're not needed
for PVH. 

> I think this would be done more efficiently/cleanly with a single call
> to do_memory_op(), and a successive check for
> XENMEM_decrease_reservation (similarly below for the 32-bit case).

Ack, will do. 

v2 coming shortly. 

Tim.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-08 15:31 [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
                   ` (2 preceding siblings ...)
  2014-05-09  8:08 ` Jan Beulich
@ 2014-05-15 10:53 ` Tim Deegan
  2014-05-15 12:39   ` Jan Beulich
  2014-05-15 13:35 ` [PATCH v3] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
  4 siblings, 1 reply; 15+ messages in thread
From: Tim Deegan @ 2014-05-15 10:53 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Keir Fraser, Jan Beulich

Stage one of many in merging PVH and HVM code in the hypervisor.

This exposes a few new hypercalls to HVM guests, all of which were
already available to PVH ones:

 - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
   These are basically harmless, if a bit useless to plain HVM.

 - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
   This will eventually let HVM guests bring up APs the way PVH ones do.
   For now, the VCPUOP_initialise paths are still gated on is_pvh.
 - VCPUOP_get_physid
   Harmless.

 - __HYPERVISOR_platform_op (XSM_PRIV callers only).

 - __HYPERVISOR_mmuext_op.
   The pagetable manipulation MMUEXT ops are already denied
   (or no-ops) to paging_mode_refcounts() domains; the baseptr ones
   are already denied to paging_mode_translate() domains.
   I have restricted MMUEXT[UN]MARK_SUPER to !paging_mode_refcounts()
   domains as well, as I can see no need for them in PVH.
   I have also restricted MMUEXT_CLEAR_PAGE / MMUEXT_COPY_PAGE
   to PV domains only, as there is no need for them in HVM/PVH ones
   and they would lead to complication with sharing/paging operations.
   That leaves TLB and cache flush operations, which are OK.

 - PHYSDEVOP_* (only for hardware control domains).
   Also make ops that touch PV vcpu state (PHYSDEVOP_set_iopl and
   PHYSDEVOP_set_iobitmap) gate on is_pv rather than !is_pvh.

PVH guests can also see a few hypercalls that they couldn't before,
but which were already available to HVM guests:

 - __HYPERVISOR_set_timer_op

 - __HYPERVISOR_tmem_op

Signed-off-by: Tim Deegan <tim@xen.org>
Cc: Keir Fraser <keir@xen.org>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Mukesh Rathor <mukesh.rathor@oracle.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
---
v2: restrict MMUEXT_CLEAR_PAGE / MMUEXT_COPY_PAGE;
    reword to correcly state that some MMUEXT ops are noops; and
    use if() rather than one-clause switch() in a few places.
---
 xen/arch/x86/hvm/hvm.c          | 129 +++++++---------------------------------
 xen/arch/x86/mm.c               |  24 ++++++++
 xen/arch/x86/physdev.c          |   4 +-
 xen/arch/x86/x86_64/compat/mm.c |   2 -
 xen/include/asm-x86/hypercall.h |  10 ++++
 5 files changed, 57 insertions(+), 112 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index b69f164..57552b9 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3844,20 +3844,12 @@ static long hvm_grant_table_op(
 
 static long hvm_memory_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
-    long rc;
+    long rc = do_memory_op(cmd, arg);
 
-    switch ( cmd & MEMOP_CMD_MASK )
-    {
-    case XENMEM_memory_map:
-    case XENMEM_machine_memory_map:
-    case XENMEM_machphys_mapping:
-        return -ENOSYS;
-    case XENMEM_decrease_reservation:
-        rc = do_memory_op(cmd, arg);
+    if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
         current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
-        return rc;
-    }
-    return do_memory_op(cmd, arg);
+
+    return rc;
 }
 
 static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
@@ -3865,7 +3857,7 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     switch ( cmd )
     {
     default:
-        if ( !is_pvh_vcpu(current) || !is_hardware_domain(current->domain) )
+        if ( !is_hardware_domain(current->domain) )
             return -ENOSYS;
         /* fall through */
     case PHYSDEVOP_map_pirq:
@@ -3877,31 +3869,6 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     }
 }
 
-static long hvm_vcpu_op(
-    int cmd, int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
-{
-    long rc;
-
-    switch ( cmd )
-    {
-    case VCPUOP_register_runstate_memory_area:
-    case VCPUOP_get_runstate_info:
-    case VCPUOP_set_periodic_timer:
-    case VCPUOP_stop_periodic_timer:
-    case VCPUOP_set_singleshot_timer:
-    case VCPUOP_stop_singleshot_timer:
-    case VCPUOP_register_vcpu_info:
-    case VCPUOP_register_vcpu_time_memory_area:
-        rc = do_vcpu_op(cmd, vcpuid, arg);
-        break;
-    default:
-        rc = -ENOSYS;
-        break;
-    }
-
-    return rc;
-}
-
 typedef unsigned long hvm_hypercall_t(
     unsigned long, unsigned long, unsigned long, unsigned long, unsigned long,
     unsigned long);
@@ -3920,43 +3887,10 @@ static long hvm_grant_table_op_compat32(unsigned int cmd,
 
 static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
-    int rc;
+    int rc = compat_memory_op(cmd, arg);
 
-    switch ( cmd & MEMOP_CMD_MASK )
-    {
-    case XENMEM_memory_map:
-    case XENMEM_machine_memory_map:
-    case XENMEM_machphys_mapping:
-        return -ENOSYS;
-    case XENMEM_decrease_reservation:
-        rc = compat_memory_op(cmd, arg);
+    if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
         current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
-        return rc;
-    }
-    return compat_memory_op(cmd, arg);
-}
-
-static long hvm_vcpu_op_compat32(
-    int cmd, int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
-{
-    long rc;
-
-    switch ( cmd )
-    {
-    case VCPUOP_register_runstate_memory_area:
-    case VCPUOP_get_runstate_info:
-    case VCPUOP_set_periodic_timer:
-    case VCPUOP_stop_periodic_timer:
-    case VCPUOP_set_singleshot_timer:
-    case VCPUOP_stop_singleshot_timer:
-    case VCPUOP_register_vcpu_info:
-    case VCPUOP_register_vcpu_time_memory_area:
-        rc = compat_vcpu_op(cmd, vcpuid, arg);
-        break;
-    default:
-        rc = -ENOSYS;
-        break;
-    }
 
     return rc;
 }
@@ -3966,27 +3900,29 @@ static long hvm_physdev_op_compat32(
 {
     switch ( cmd )
     {
+    default:
+        if ( !is_hardware_domain(current->domain) )
+            return -ENOSYS;
+        /* fall through */
         case PHYSDEVOP_map_pirq:
         case PHYSDEVOP_unmap_pirq:
         case PHYSDEVOP_eoi:
         case PHYSDEVOP_irq_status_query:
         case PHYSDEVOP_get_free_pirq:
             return compat_physdev_op(cmd, arg);
-        break;
-    default:
-            return -ENOSYS;
-        break;
     }
 }
 
 static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
     [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
     [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
-    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
     [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op,
+    HYPERCALL(platform_op),
     HYPERCALL(xen_version),
     HYPERCALL(console_io),
     HYPERCALL(event_channel_op),
+    HYPERCALL(vcpu_op),
+    HYPERCALL(mmuext_op),
     HYPERCALL(sched_op),
     HYPERCALL(set_timer_op),
     HYPERCALL(xsm_op),
@@ -4002,11 +3938,13 @@ static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
 static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
     [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
     [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op_compat32,
-    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op_compat32,
     [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op_compat32,
+    HYPERCALL(platform_op),
     COMPAT_CALL(xen_version),
     HYPERCALL(console_io),
     HYPERCALL(event_channel_op),
+    COMPAT_CALL(vcpu_op),
+    COMPAT_CALL(mmuext_op),
     COMPAT_CALL(sched_op),
     COMPAT_CALL(set_timer_op),
     HYPERCALL(xsm_op),
@@ -4016,24 +3954,6 @@ static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
     HYPERCALL(tmem_op)
 };
 
-/* PVH 32bitfixme. */
-static hvm_hypercall_t *const pvh_hypercall64_table[NR_hypercalls] = {
-    HYPERCALL(platform_op),
-    HYPERCALL(memory_op),
-    HYPERCALL(xen_version),
-    HYPERCALL(console_io),
-    [ __HYPERVISOR_grant_table_op ]  = (hvm_hypercall_t *)hvm_grant_table_op,
-    HYPERCALL(vcpu_op),
-    HYPERCALL(mmuext_op),
-    HYPERCALL(xsm_op),
-    HYPERCALL(sched_op),
-    HYPERCALL(event_channel_op),
-    [ __HYPERVISOR_physdev_op ]      = (hvm_hypercall_t *)hvm_physdev_op,
-    HYPERCALL(hvm_op),
-    HYPERCALL(sysctl),
-    HYPERCALL(domctl)
-};
-
 int hvm_do_hypercall(struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -4060,9 +3980,7 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
     if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
         return viridian_hypercall(regs);
 
-    if ( (eax >= NR_hypercalls) ||
-         (is_pvh_vcpu(curr) ? !pvh_hypercall64_table[eax]
-                            : !hvm_hypercall32_table[eax]) )
+    if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
     {
         regs->eax = -ENOSYS;
         return HVM_HCALL_completed;
@@ -4077,14 +3995,9 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
                     regs->r10, regs->r8, regs->r9);
 
         curr->arch.hvm_vcpu.hcall_64bit = 1;
-        if ( is_pvh_vcpu(curr) )
-            regs->rax = pvh_hypercall64_table[eax](regs->rdi, regs->rsi,
-                                                   regs->rdx, regs->r10,
-                                                   regs->r8, regs->r9);
-        else
-            regs->rax = hvm_hypercall64_table[eax](regs->rdi, regs->rsi,
-                                                   regs->rdx, regs->r10,
-                                                   regs->r8, regs->r9);
+        regs->rax = hvm_hypercall64_table[eax](regs->rdi, regs->rsi,
+                                               regs->rdx, regs->r10,
+                                               regs->r8, regs->r9);
         curr->arch.hvm_vcpu.hcall_64bit = 0;
     }
     else
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1a8a5e0..a263d26 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3253,6 +3253,12 @@ long do_mmuext_op(
         case MMUEXT_CLEAR_PAGE: {
             struct page_info *page;
 
+            if ( !is_pv_vcpu(current) )
+            {
+                okay = 0;
+                break;
+            }
+
             page = get_page_from_gfn(d, op.arg1.mfn, NULL, P2M_ALLOC);
             if ( !page || !get_page_type(page, PGT_writable_page) )
             {
@@ -3276,6 +3282,12 @@ long do_mmuext_op(
         {
             struct page_info *src_page, *dst_page;
 
+            if ( !is_pv_vcpu(current) )
+            {
+                okay = 0;
+                break;
+            }
+
             src_page = get_page_from_gfn(d, op.arg2.src_mfn, NULL, P2M_ALLOC);
             if ( unlikely(!src_page) )
             {
@@ -3310,6 +3322,12 @@ long do_mmuext_op(
             unsigned long mfn;
             struct spage_info *spage;
 
+            if ( paging_mode_refcounts(pg_owner) )
+            {
+                okay = 0;
+                break;
+            }
+
             mfn = op.arg1.mfn;
             if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
             {
@@ -3336,6 +3354,12 @@ long do_mmuext_op(
             unsigned long mfn;
             struct spage_info *spage;
 
+            if ( paging_mode_refcounts(pg_owner) )
+            {
+                okay = 0;
+                break;
+            }
+
             mfn = op.arg1.mfn;
             if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
             {
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index f178315..a2d2b96 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -520,7 +520,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         struct physdev_set_iopl set_iopl;
 
         ret = -ENOSYS;
-        if ( is_pvh_vcpu(current) )
+        if ( !is_pv_vcpu(current) )
             break;
 
         ret = -EFAULT;
@@ -538,7 +538,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         struct physdev_set_iobitmap set_iobitmap;
 
         ret = -ENOSYS;
-        if ( is_pvh_vcpu(current) )
+        if ( !is_pv_vcpu(current) )
             break;
 
         ret = -EFAULT;
diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
index 69c6195..3fa90aa 100644
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -236,8 +236,6 @@ int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi,
     return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, domid);
 }
 
-DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
-
 int compat_mmuext_op(XEN_GUEST_HANDLE_PARAM(mmuext_op_compat_t) cmp_uops,
                      unsigned int count,
                      XEN_GUEST_HANDLE_PARAM(uint) pdone,
diff --git a/xen/include/asm-x86/hypercall.h b/xen/include/asm-x86/hypercall.h
index afa8ba9..cee8817 100644
--- a/xen/include/asm-x86/hypercall.h
+++ b/xen/include/asm-x86/hypercall.h
@@ -8,6 +8,7 @@
 #include <public/physdev.h>
 #include <public/arch-x86/xen-mca.h> /* for do_mca */
 #include <xen/types.h>
+#include <compat/memory.h>
 
 /*
  * Both do_mmuext_op() and do_mmu_update():
@@ -110,4 +111,13 @@ extern int
 arch_compat_vcpu_op(
     int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg);
 
+DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
+
+extern int
+compat_mmuext_op(
+    XEN_GUEST_HANDLE_PARAM(mmuext_op_compat_t) cmp_uops,
+    unsigned int count,
+    XEN_GUEST_HANDLE_PARAM(uint) pdone,
+    unsigned int foreigndom);
+
 #endif /* __ASM_X86_HYPERCALL_H__ */
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-15 10:30   ` Tim Deegan
@ 2014-05-15 11:58     ` Jan Beulich
  0 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2014-05-15 11:58 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, Tim Deegan; +Cc: George Dunlap, Keir Fraser, xen-devel

>>> On 15.05.14 at 12:30, <tim@xen.org> wrote:
> At 14:39 -0400 on 08 May (1399556383), Konrad Rzeszutek Wilk wrote:
>> On Thu, May 08, 2014 at 04:31:30PM +0100, Tim Deegan wrote:
>> > Stage one of many in merging PVH and HVM code in the hypervisor.
>> > 
>> > This exposes a few new hypercalls to HVM guests, all of which were
>> > already available to PVH ones:
>> > 
>> >  - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
>> >    These are basically harmless, if a bit useless to plain HVM.
>> > 
>> >  - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
>> >    This will eventually let HVM guests bring up APs the way PVH ones do.
>> >    For now, the VCPUOP_initialise paths are still gated on is_pvh.
>> 
>> I had a similar patch to enable this under HVM and found out that
>> if the guest issues VCPUOP_send_nmi we get in Linux:
>> 
>> [    3.611742] Corrupted low memory at c000fffc (fffc phys) = 00029b00
>> [    2.386785] Corrupted low memory at ffff88000000fff8 (fff8 phys) = 
> 2990000000000
>> 
>> http://mid.gmane.org/20140422183443.GA6817@phenom.dumpdata.com 
> 
> Right, thanks.  Do you think that's likely to be a hypervisor bug, or
> just a "don't do that then"?

Whether that's a hypervisor or Dom0 kernel bug I can't tell (yet),
but it certainly shouldn't be a "don't do that then".

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-15 10:53 ` [PATCH v2] " Tim Deegan
@ 2014-05-15 12:39   ` Jan Beulich
  2014-05-15 13:35     ` [PATCH v2] x86/hvm: unify HVM and PVH hypercall tables.g Tim Deegan
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2014-05-15 12:39 UTC (permalink / raw)
  To: Tim Deegan; +Cc: George Dunlap, Keir Fraser, xen-devel

>>> On 15.05.14 at 12:53, <tim@xen.org> wrote:
> Stage one of many in merging PVH and HVM code in the hypervisor.
> 
> This exposes a few new hypercalls to HVM guests, all of which were
> already available to PVH ones:
> 
>  - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
>    These are basically harmless, if a bit useless to plain HVM.
> 
>  - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
>    This will eventually let HVM guests bring up APs the way PVH ones do.
>    For now, the VCPUOP_initialise paths are still gated on is_pvh.
>  - VCPUOP_get_physid
>    Harmless.
> 
>  - __HYPERVISOR_platform_op (XSM_PRIV callers only).
> 
>  - __HYPERVISOR_mmuext_op.
>    The pagetable manipulation MMUEXT ops are already denied
>    (or no-ops) to paging_mode_refcounts() domains; the baseptr ones
>    are already denied to paging_mode_translate() domains.
>    I have restricted MMUEXT[UN]MARK_SUPER to !paging_mode_refcounts()
>    domains as well, as I can see no need for them in PVH.
>    I have also restricted MMUEXT_CLEAR_PAGE / MMUEXT_COPY_PAGE
>    to PV domains only, as there is no need for them in HVM/PVH ones
>    and they would lead to complication with sharing/paging operations.
>    That leaves TLB and cache flush operations, which are OK.
> 
>  - PHYSDEVOP_* (only for hardware control domains).
>    Also make ops that touch PV vcpu state (PHYSDEVOP_set_iopl and
>    PHYSDEVOP_set_iobitmap) gate on is_pv rather than !is_pvh.
> 
> PVH guests can also see a few hypercalls that they couldn't before,
> but which were already available to HVM guests:
> 
>  - __HYPERVISOR_set_timer_op
> 
>  - __HYPERVISOR_tmem_op
> 
> Signed-off-by: Tim Deegan <tim@xen.org>

Reviewed-by: Jan Beulich <jbeulich@suse.com>

pending clarification on whether ...

> @@ -4002,11 +3938,13 @@ static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
>  static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
>      [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
>      [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op_compat32,
> -    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op_compat32,
>      [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op_compat32,
> +    HYPERCALL(platform_op),

... this wouldn't also need to use COMPAT_CALL().

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] x86/hvm: unify HVM and PVH hypercall tables.g
  2014-05-15 12:39   ` Jan Beulich
@ 2014-05-15 13:35     ` Tim Deegan
  0 siblings, 0 replies; 15+ messages in thread
From: Tim Deegan @ 2014-05-15 13:35 UTC (permalink / raw)
  To: Jan Beulich; +Cc: George Dunlap, Keir Fraser, xen-devel

41;300;0cAt 13:39 +0100 on 15 May (1400157562), Jan Beulich wrote:
> > +    HYPERCALL(platform_op),
> 
> ... this wouldn't also need to use COMPAT_CALL().

Urgh, yes, thanks.

Tim.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-08 15:31 [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
                   ` (3 preceding siblings ...)
  2014-05-15 10:53 ` [PATCH v2] " Tim Deegan
@ 2014-05-15 13:35 ` Tim Deegan
  2014-05-19 14:08   ` Jan Beulich
  4 siblings, 1 reply; 15+ messages in thread
From: Tim Deegan @ 2014-05-15 13:35 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Keir Fraser

Stage one of many in merging PVH and HVM code in the hypervisor.

This exposes a few new hypercalls to HVM guests, all of which were
already available to PVH ones:

 - XENMEM_memory_map / XENMEM_machine_memory_map / XENMEM_machphys_mapping:
   These are basically harmless, if a bit useless to plain HVM.

 - VCPUOP_send_nmi / VCPUOP_initialise / VCPUOP[_is]_up / VCPUOP_down
   This will eventually let HVM guests bring up APs the way PVH ones do.
   For now, the VCPUOP_initialise paths are still gated on is_pvh.
 - VCPUOP_get_physid
   Harmless.

 - __HYPERVISOR_platform_op (XSM_PRIV callers only).

 - __HYPERVISOR_mmuext_op.
   The pagetable manipulation MMUEXT ops are already denied
   (or no-ops) to paging_mode_refcounts() domains; the baseptr ones
   are already denied to paging_mode_translate() domains.
   I have restricted MMUEXT[UN]MARK_SUPER to !paging_mode_refcounts()
   domains as well, as I can see no need for them in PVH.
   I have also restricted MMUEXT_CLEAR_PAGE / MMUEXT_COPY_PAGE
   to PV domains only, as there is no need for them in HVM/PVH ones
   and they would lead to complication with sharing/paging operations.
   That leaves TLB and cache flush operations, which are OK.

 - PHYSDEVOP_* (only for hardware control domains).
   Also make ops that touch PV vcpu state (PHYSDEVOP_set_iopl and
   PHYSDEVOP_set_iobitmap) gate on is_pv rather than !is_pvh.

PVH guests can also see a few hypercalls that they couldn't before,
but which were already available to HVM guests:

 - __HYPERVISOR_set_timer_op

 - __HYPERVISOR_tmem_op

Signed-off-by: Tim Deegan <tim@xen.org>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Cc: Keir Fraser <keir@xen.org>
Cc: Mukesh Rathor <mukesh.rathor@oracle.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
---
v2: restrict MMUEXT_CLEAR_PAGE / MMUEXT_COPY_PAGE;
    reword to correcly state that some MMUEXT ops are noops; and
    use if() rather than one-clause switch() in a few places.
v3: use COMPAT_CALL() for 32bit platform op.
---
 xen/arch/x86/hvm/hvm.c          | 129 +++++++---------------------------------
 xen/arch/x86/mm.c               |  24 ++++++++
 xen/arch/x86/physdev.c          |   4 +-
 xen/arch/x86/x86_64/compat/mm.c |   2 -
 xen/include/asm-x86/hypercall.h |  10 ++++
 5 files changed, 57 insertions(+), 112 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index b69f164..1579cb1 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3844,20 +3844,12 @@ static long hvm_grant_table_op(
 
 static long hvm_memory_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
-    long rc;
+    long rc = do_memory_op(cmd, arg);
 
-    switch ( cmd & MEMOP_CMD_MASK )
-    {
-    case XENMEM_memory_map:
-    case XENMEM_machine_memory_map:
-    case XENMEM_machphys_mapping:
-        return -ENOSYS;
-    case XENMEM_decrease_reservation:
-        rc = do_memory_op(cmd, arg);
+    if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
         current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
-        return rc;
-    }
-    return do_memory_op(cmd, arg);
+
+    return rc;
 }
 
 static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
@@ -3865,7 +3857,7 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     switch ( cmd )
     {
     default:
-        if ( !is_pvh_vcpu(current) || !is_hardware_domain(current->domain) )
+        if ( !is_hardware_domain(current->domain) )
             return -ENOSYS;
         /* fall through */
     case PHYSDEVOP_map_pirq:
@@ -3877,31 +3869,6 @@ static long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     }
 }
 
-static long hvm_vcpu_op(
-    int cmd, int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
-{
-    long rc;
-
-    switch ( cmd )
-    {
-    case VCPUOP_register_runstate_memory_area:
-    case VCPUOP_get_runstate_info:
-    case VCPUOP_set_periodic_timer:
-    case VCPUOP_stop_periodic_timer:
-    case VCPUOP_set_singleshot_timer:
-    case VCPUOP_stop_singleshot_timer:
-    case VCPUOP_register_vcpu_info:
-    case VCPUOP_register_vcpu_time_memory_area:
-        rc = do_vcpu_op(cmd, vcpuid, arg);
-        break;
-    default:
-        rc = -ENOSYS;
-        break;
-    }
-
-    return rc;
-}
-
 typedef unsigned long hvm_hypercall_t(
     unsigned long, unsigned long, unsigned long, unsigned long, unsigned long,
     unsigned long);
@@ -3920,43 +3887,10 @@ static long hvm_grant_table_op_compat32(unsigned int cmd,
 
 static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
-    int rc;
+    int rc = compat_memory_op(cmd, arg);
 
-    switch ( cmd & MEMOP_CMD_MASK )
-    {
-    case XENMEM_memory_map:
-    case XENMEM_machine_memory_map:
-    case XENMEM_machphys_mapping:
-        return -ENOSYS;
-    case XENMEM_decrease_reservation:
-        rc = compat_memory_op(cmd, arg);
+    if ( (cmd & MEMOP_CMD_MASK) == XENMEM_decrease_reservation )
         current->domain->arch.hvm_domain.qemu_mapcache_invalidate = 1;
-        return rc;
-    }
-    return compat_memory_op(cmd, arg);
-}
-
-static long hvm_vcpu_op_compat32(
-    int cmd, int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
-{
-    long rc;
-
-    switch ( cmd )
-    {
-    case VCPUOP_register_runstate_memory_area:
-    case VCPUOP_get_runstate_info:
-    case VCPUOP_set_periodic_timer:
-    case VCPUOP_stop_periodic_timer:
-    case VCPUOP_set_singleshot_timer:
-    case VCPUOP_stop_singleshot_timer:
-    case VCPUOP_register_vcpu_info:
-    case VCPUOP_register_vcpu_time_memory_area:
-        rc = compat_vcpu_op(cmd, vcpuid, arg);
-        break;
-    default:
-        rc = -ENOSYS;
-        break;
-    }
 
     return rc;
 }
@@ -3966,27 +3900,29 @@ static long hvm_physdev_op_compat32(
 {
     switch ( cmd )
     {
+    default:
+        if ( !is_hardware_domain(current->domain) )
+            return -ENOSYS;
+        /* fall through */
         case PHYSDEVOP_map_pirq:
         case PHYSDEVOP_unmap_pirq:
         case PHYSDEVOP_eoi:
         case PHYSDEVOP_irq_status_query:
         case PHYSDEVOP_get_free_pirq:
             return compat_physdev_op(cmd, arg);
-        break;
-    default:
-            return -ENOSYS;
-        break;
     }
 }
 
 static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
     [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
     [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
-    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
     [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op,
+    HYPERCALL(platform_op),
     HYPERCALL(xen_version),
     HYPERCALL(console_io),
     HYPERCALL(event_channel_op),
+    HYPERCALL(vcpu_op),
+    HYPERCALL(mmuext_op),
     HYPERCALL(sched_op),
     HYPERCALL(set_timer_op),
     HYPERCALL(xsm_op),
@@ -4002,11 +3938,13 @@ static hvm_hypercall_t *const hvm_hypercall64_table[NR_hypercalls] = {
 static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
     [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
     [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op_compat32,
-    [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op_compat32,
     [ __HYPERVISOR_physdev_op ] = (hvm_hypercall_t *)hvm_physdev_op_compat32,
+    COMPAT_CALL(platform_op),
     COMPAT_CALL(xen_version),
     HYPERCALL(console_io),
     HYPERCALL(event_channel_op),
+    COMPAT_CALL(vcpu_op),
+    COMPAT_CALL(mmuext_op),
     COMPAT_CALL(sched_op),
     COMPAT_CALL(set_timer_op),
     HYPERCALL(xsm_op),
@@ -4016,24 +3954,6 @@ static hvm_hypercall_t *const hvm_hypercall32_table[NR_hypercalls] = {
     HYPERCALL(tmem_op)
 };
 
-/* PVH 32bitfixme. */
-static hvm_hypercall_t *const pvh_hypercall64_table[NR_hypercalls] = {
-    HYPERCALL(platform_op),
-    HYPERCALL(memory_op),
-    HYPERCALL(xen_version),
-    HYPERCALL(console_io),
-    [ __HYPERVISOR_grant_table_op ]  = (hvm_hypercall_t *)hvm_grant_table_op,
-    HYPERCALL(vcpu_op),
-    HYPERCALL(mmuext_op),
-    HYPERCALL(xsm_op),
-    HYPERCALL(sched_op),
-    HYPERCALL(event_channel_op),
-    [ __HYPERVISOR_physdev_op ]      = (hvm_hypercall_t *)hvm_physdev_op,
-    HYPERCALL(hvm_op),
-    HYPERCALL(sysctl),
-    HYPERCALL(domctl)
-};
-
 int hvm_do_hypercall(struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
@@ -4060,9 +3980,7 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
     if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
         return viridian_hypercall(regs);
 
-    if ( (eax >= NR_hypercalls) ||
-         (is_pvh_vcpu(curr) ? !pvh_hypercall64_table[eax]
-                            : !hvm_hypercall32_table[eax]) )
+    if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
     {
         regs->eax = -ENOSYS;
         return HVM_HCALL_completed;
@@ -4077,14 +3995,9 @@ int hvm_do_hypercall(struct cpu_user_regs *regs)
                     regs->r10, regs->r8, regs->r9);
 
         curr->arch.hvm_vcpu.hcall_64bit = 1;
-        if ( is_pvh_vcpu(curr) )
-            regs->rax = pvh_hypercall64_table[eax](regs->rdi, regs->rsi,
-                                                   regs->rdx, regs->r10,
-                                                   regs->r8, regs->r9);
-        else
-            regs->rax = hvm_hypercall64_table[eax](regs->rdi, regs->rsi,
-                                                   regs->rdx, regs->r10,
-                                                   regs->r8, regs->r9);
+        regs->rax = hvm_hypercall64_table[eax](regs->rdi, regs->rsi,
+                                               regs->rdx, regs->r10,
+                                               regs->r8, regs->r9);
         curr->arch.hvm_vcpu.hcall_64bit = 0;
     }
     else
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1a8a5e0..a263d26 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3253,6 +3253,12 @@ long do_mmuext_op(
         case MMUEXT_CLEAR_PAGE: {
             struct page_info *page;
 
+            if ( !is_pv_vcpu(current) )
+            {
+                okay = 0;
+                break;
+            }
+
             page = get_page_from_gfn(d, op.arg1.mfn, NULL, P2M_ALLOC);
             if ( !page || !get_page_type(page, PGT_writable_page) )
             {
@@ -3276,6 +3282,12 @@ long do_mmuext_op(
         {
             struct page_info *src_page, *dst_page;
 
+            if ( !is_pv_vcpu(current) )
+            {
+                okay = 0;
+                break;
+            }
+
             src_page = get_page_from_gfn(d, op.arg2.src_mfn, NULL, P2M_ALLOC);
             if ( unlikely(!src_page) )
             {
@@ -3310,6 +3322,12 @@ long do_mmuext_op(
             unsigned long mfn;
             struct spage_info *spage;
 
+            if ( paging_mode_refcounts(pg_owner) )
+            {
+                okay = 0;
+                break;
+            }
+
             mfn = op.arg1.mfn;
             if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
             {
@@ -3336,6 +3354,12 @@ long do_mmuext_op(
             unsigned long mfn;
             struct spage_info *spage;
 
+            if ( paging_mode_refcounts(pg_owner) )
+            {
+                okay = 0;
+                break;
+            }
+
             mfn = op.arg1.mfn;
             if ( mfn & (L1_PAGETABLE_ENTRIES-1) )
             {
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index f178315..a2d2b96 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -520,7 +520,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         struct physdev_set_iopl set_iopl;
 
         ret = -ENOSYS;
-        if ( is_pvh_vcpu(current) )
+        if ( !is_pv_vcpu(current) )
             break;
 
         ret = -EFAULT;
@@ -538,7 +538,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         struct physdev_set_iobitmap set_iobitmap;
 
         ret = -ENOSYS;
-        if ( is_pvh_vcpu(current) )
+        if ( !is_pv_vcpu(current) )
             break;
 
         ret = -EFAULT;
diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
index 69c6195..3fa90aa 100644
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -236,8 +236,6 @@ int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi,
     return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, domid);
 }
 
-DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
-
 int compat_mmuext_op(XEN_GUEST_HANDLE_PARAM(mmuext_op_compat_t) cmp_uops,
                      unsigned int count,
                      XEN_GUEST_HANDLE_PARAM(uint) pdone,
diff --git a/xen/include/asm-x86/hypercall.h b/xen/include/asm-x86/hypercall.h
index afa8ba9..cee8817 100644
--- a/xen/include/asm-x86/hypercall.h
+++ b/xen/include/asm-x86/hypercall.h
@@ -8,6 +8,7 @@
 #include <public/physdev.h>
 #include <public/arch-x86/xen-mca.h> /* for do_mca */
 #include <xen/types.h>
+#include <compat/memory.h>
 
 /*
  * Both do_mmuext_op() and do_mmu_update():
@@ -110,4 +111,13 @@ extern int
 arch_compat_vcpu_op(
     int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg);
 
+DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
+
+extern int
+compat_mmuext_op(
+    XEN_GUEST_HANDLE_PARAM(mmuext_op_compat_t) cmp_uops,
+    unsigned int count,
+    XEN_GUEST_HANDLE_PARAM(uint) pdone,
+    unsigned int foreigndom);
+
 #endif /* __ASM_X86_HYPERCALL_H__ */
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-15 10:34   ` Tim Deegan
@ 2014-05-16  0:19     ` Mukesh Rathor
  0 siblings, 0 replies; 15+ messages in thread
From: Mukesh Rathor @ 2014-05-16  0:19 UTC (permalink / raw)
  To: Tim Deegan; +Cc: Keir Fraser, Jan Beulich, xen-devel

On Thu, 15 May 2014 12:34:27 +0200
Tim Deegan <tim@xen.org> wrote:

> At 09:08 +0100 on 09 May (1399622918), Jan Beulich wrote:
> > >>> On 08.05.14 at 17:31, <tim@xen.org> wrote:
> > >  - __HYPERVISOR_platform_op (XSM_PRIV callers only).
> > 
> > I think this needs a little more thought that just relying on the
> > XSM_PRIV check: There are several operations here dealing with
> > machine memory addresses, which aren't directly meaningful to PVH
> > (and HVM, but for now we're not planning on having HVM Dom0). Do
> > you think it is useful to expose them the way they are nevertheless?
> 
> I'll punt that to Mukesh: are there operations in here that a PVH
> dom0 couldn't/shouldn't use or that need adjustment?

I only looked at what was needed in the immediate:

XENPF_settime        17
XENPF_firmware_info  50
XENPF_get_cpuinfo    55

The is_pvh_check restriction for rest got dropped very quickly in the 
face of opposition. I think we should add them here until one by one 
each one is studied and confirmed to be OK.

thanks
mukesh

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-15 13:35 ` [PATCH v3] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
@ 2014-05-19 14:08   ` Jan Beulich
  2014-05-19 15:22     ` Tim Deegan
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2014-05-19 14:08 UTC (permalink / raw)
  To: Tim Deegan; +Cc: George Dunlap, Keir Fraser, xen-devel

>>> On 15.05.14 at 15:35, <tim@xen.org> wrote:
> v3: use COMPAT_CALL() for 32bit platform op.

I had this already committed, but the pre-push build test fails due to
this change - apparently there's a declaration of compat_platform_op
missing.

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3] x86/hvm: unify HVM and PVH hypercall tables.
  2014-05-19 14:08   ` Jan Beulich
@ 2014-05-19 15:22     ` Tim Deegan
  0 siblings, 0 replies; 15+ messages in thread
From: Tim Deegan @ 2014-05-19 15:22 UTC (permalink / raw)
  To: Jan Beulich; +Cc: George Dunlap, Keir Fraser, xen-devel

At 15:08 +0100 on 19 May (1400508532), Jan Beulich wrote:
> >>> On 15.05.14 at 15:35, <tim@xen.org> wrote:
> > v3: use COMPAT_CALL() for 32bit platform op.
> 
> I had this already committed, but the pre-push build test fails due to
> this change - apparently there's a declaration of compat_platform_op
> missing.

Urgh.  /me goes off to check why his test build seemed to work. :(
Will send v4 probably later today.  Sorry about that.

Tim.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2014-05-19 15:22 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-08 15:31 [PATCH RFC] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
2014-05-08 16:53 ` George Dunlap
2014-05-15 10:25   ` Tim Deegan
2014-05-08 18:39 ` Konrad Rzeszutek Wilk
2014-05-15 10:30   ` Tim Deegan
2014-05-15 11:58     ` Jan Beulich
2014-05-09  8:08 ` Jan Beulich
2014-05-15 10:34   ` Tim Deegan
2014-05-16  0:19     ` Mukesh Rathor
2014-05-15 10:53 ` [PATCH v2] " Tim Deegan
2014-05-15 12:39   ` Jan Beulich
2014-05-15 13:35     ` [PATCH v2] x86/hvm: unify HVM and PVH hypercall tables.g Tim Deegan
2014-05-15 13:35 ` [PATCH v3] x86/hvm: unify HVM and PVH hypercall tables Tim Deegan
2014-05-19 14:08   ` Jan Beulich
2014-05-19 15:22     ` Tim Deegan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.