xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH for-4.7 v2 1/2] xen/x86: Remove the use of vm86_mode()
@ 2016-04-07 21:39 Andrew Cooper
  2016-04-07 21:39 ` [PATCH for-4.7 v2 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl Andrew Cooper
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2016-04-07 21:39 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich

Xen, being 64bit only, cannot run PV guests in vm86 mode.  HVM guests however
can be running in vm86 mode, and common codepaths need to be able to cope.

The definition of vm86_mode() in x86_64/regs.h is incorrect, as the predicate
is used by non-PV codepaths.

One buggy use is in hvm/emulate.c.  An HVM guest can be in vm86 mode, and
vm86_mode() sliently omits the check.  Luckily, due to the VEX prefix decoding
logic in x86_emulate(), there is no path to the erronious use with EFLAGS_VM
set.

Another potentially problematic use is in show_guest_stack().  In principle,
show_guest_stack() is common code called for both PV and HVM vcpus.  HVM vcpus
exit early (with no reasonable way of making the code generic), making this
part a PV-only codepath.

Open-code its use in emulate.c, matching the surrounding code.  This causes
all other uses to be in PV-only codepaths, making the code to be logically
dead.  Drop it completely, to avoid future misuse.

Part of resulting cleanup removes vm86attr from read_descriptor(), although
retaining one relevant piece of information; i.e. whether we are reading a
selector for an instruction fetch, or a data fetch.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>

v2:
 * New
---
 xen/arch/x86/hvm/emulate.c        |  2 +-
 xen/arch/x86/traps.c              | 55 ++++++++++++---------------------------
 xen/include/asm-x86/regs.h        |  2 +-
 xen/include/asm-x86/x86_64/regs.h |  1 -
 4 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index f5ab5bc..cc0b841 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1559,7 +1559,7 @@ static int hvmemul_get_fpu(
         break;
     case X86EMUL_FPU_ymm:
         if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
-             vm86_mode(ctxt->regs) ||
+             (ctxt->regs->eflags & X86_EFLAGS_VM) ||
              !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ||
              !(curr->arch.xcr0 & XSTATE_SSE) ||
              !(curr->arch.xcr0 & XSTATE_YMM) )
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 6fbb1cf..8125c53 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -198,17 +198,8 @@ static void show_guest_stack(struct vcpu *v, const struct cpu_user_regs *regs)
         return;
     }
 
-    if ( vm86_mode(regs) )
-    {
-        stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff));
-        printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n  ",
-               regs->ss, (uint16_t)(regs->esp & 0xffff));
-    }
-    else
-    {
-        stack = (unsigned long *)regs->esp;
-        printk("Guest stack trace from "__OP"sp=%p:\n  ", stack);
-    }
+    stack = (unsigned long *)regs->esp;
+    printk("Guest stack trace from "__OP"sp=%p:\n  ", stack);
 
     if ( !access_ok(stack, sizeof(*stack)) )
     {
@@ -1683,28 +1674,20 @@ static int read_descriptor(unsigned int sel,
                            unsigned long *base,
                            unsigned long *limit,
                            unsigned int *ar,
-                           unsigned int vm86attr)
+                           bool_t insn_fetch)
 {
     struct desc_struct desc;
 
-    if ( !vm86_mode(regs) )
-    {
-        if ( sel < 4)
-            desc.b = desc.a = 0;
-        else if ( __get_user(desc,
-                        (const struct desc_struct *)(!(sel & 4)
-                                                     ? GDT_VIRT_START(v)
-                                                     : LDT_VIRT_START(v))
-                        + (sel >> 3)) )
-            return 0;
-        if ( !(vm86attr & _SEGMENT_CODE) )
-            desc.b &= ~_SEGMENT_L;
-    }
-    else
-    {
-        desc.a = (sel << 20) | 0xffff;
-        desc.b = vm86attr | (sel >> 12);
-    }
+    if ( sel < 4)
+        desc.b = desc.a = 0;
+    else if ( __get_user(desc,
+                         (const struct desc_struct *)(!(sel & 4)
+                                                      ? GDT_VIRT_START(v)
+                                                      : LDT_VIRT_START(v))
+                         + (sel >> 3)) )
+        return 0;
+    if ( !insn_fetch )
+        desc.b &= ~_SEGMENT_L;
 
     *ar = desc.b & 0x00f0ff00;
     if ( !(desc.b & _SEGMENT_L) )
@@ -1715,7 +1698,7 @@ static int read_descriptor(unsigned int sel,
         if ( desc.b & _SEGMENT_G )
             *limit = ((*limit + 1) << 12) - 1;
 #ifndef NDEBUG
-        if ( !vm86_mode(regs) && (sel > 3) )
+        if ( sel > 3 )
         {
             unsigned int a, l;
             unsigned char valid;
@@ -1805,8 +1788,7 @@ static int guest_io_okay(
     int user_mode = !(v->arch.flags & TF_kernel_mode);
 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
 
-    if ( !vm86_mode(regs) &&
-         (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
+    if ( v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3) )
         return 1;
 
     if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
@@ -2094,8 +2076,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
     bool_t vpmu_msr;
 
     if ( !read_descriptor(regs->cs, v, regs,
-                          &code_base, &code_limit, &ar,
-                          _SEGMENT_CODE|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
+                          &code_base, &code_limit, &ar, 1) )
         goto fail;
     op_default = op_bytes = (ar & (_SEGMENT_L|_SEGMENT_DB)) ? 4 : 2;
     ad_default = ad_bytes = (ar & _SEGMENT_L) ? 8 : op_default;
@@ -2187,9 +2168,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
         if ( !(ar & _SEGMENT_L) )
         {
             if ( !read_descriptor(data_sel, v, regs,
-                                  &data_base, &data_limit, &ar,
-                                  _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|
-                                  _SEGMENT_P) )
+                                  &data_base, &data_limit, &ar, 0) )
                 goto fail;
             if ( !(ar & _SEGMENT_S) ||
                  !(ar & _SEGMENT_P) ||
diff --git a/xen/include/asm-x86/regs.h b/xen/include/asm-x86/regs.h
index a6338f0..22efc42 100644
--- a/xen/include/asm-x86/regs.h
+++ b/xen/include/asm-x86/regs.h
@@ -10,7 +10,7 @@
     /* Frame pointer must point into current CPU stack. */                    \
     ASSERT(diff < STACK_SIZE);                                                \
     /* If not a guest frame, it must be a hypervisor frame. */                \
-    ASSERT((diff == 0) || (!vm86_mode(r) && (r->cs == __HYPERVISOR_CS)));     \
+    ASSERT((diff == 0) || (r->cs == __HYPERVISOR_CS));                        \
     /* Return TRUE if it's a guest frame. */                                  \
     (diff == 0);                                                              \
 })
diff --git a/xen/include/asm-x86/x86_64/regs.h b/xen/include/asm-x86/x86_64/regs.h
index 3cdc702..171cf9a 100644
--- a/xen/include/asm-x86/x86_64/regs.h
+++ b/xen/include/asm-x86/x86_64/regs.h
@@ -4,7 +4,6 @@
 #include <xen/types.h>
 #include <public/xen.h>
 
-#define vm86_mode(r) (0) /* No VM86 support in long mode. */
 #define ring_0(r)    (((r)->cs & 3) == 0)
 #define ring_1(r)    (((r)->cs & 3) == 1)
 #define ring_2(r)    (((r)->cs & 3) == 2)
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH for-4.7 v2 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
  2016-04-07 21:39 [PATCH for-4.7 v2 1/2] xen/x86: Remove the use of vm86_mode() Andrew Cooper
@ 2016-04-07 21:39 ` Andrew Cooper
  2016-04-07 21:55   ` Jan Beulich
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2016-04-07 21:39 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich

The existing vIOPL interface is hard to use, and need not be.

Introduce a VMASSIST with which a guest can opt-in to having vIOPL behaviour
consistenly with native hardware.

Specifically:
 - virtual iopl updated from do_iret() hypercalls.
 - virtual iopl reported in bounce frames.
 - guest kernels assumed to be level 0 for the purpose of iopl checks.

v->arch.pv_vcpu.iopl is altered to store IOPL shifted as it would exist
eflags, for the benefit of the assembly code.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>

v2:
 * Shift vcpu.iopl to match eflags.
 * Factor out iopl_ok().
 * Use CMOVcc in assembly code.

Along with this, I have functional tests for both vIOPL interfaces in PV
guests.
---
 xen/arch/x86/domain.c              | 15 ++++++++++-----
 xen/arch/x86/physdev.c             |  2 +-
 xen/arch/x86/traps.c               | 15 +++++++++++++--
 xen/arch/x86/x86_64/asm-offsets.c  |  3 +++
 xen/arch/x86/x86_64/compat/entry.S |  7 ++++++-
 xen/arch/x86/x86_64/compat/traps.c |  4 ++++
 xen/arch/x86/x86_64/entry.S        |  7 ++++++-
 xen/arch/x86/x86_64/traps.c        |  3 +++
 xen/include/asm-x86/config.h       |  1 +
 xen/include/asm-x86/domain.h       |  4 +++-
 xen/include/public/xen.h           |  8 ++++++++
 11 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a4f6db2..8c590f3 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -994,7 +994,7 @@ int arch_set_info_guest(
     init_int80_direct_trap(v);
 
     /* IOPL privileges are virtualised. */
-    v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3;
+    v->arch.pv_vcpu.iopl = v->arch.user_regs.eflags & X86_EFLAGS_IOPL;
     v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL;
 
     /* Ensure real hardware interrupts are enabled. */
@@ -1735,8 +1735,10 @@ static void load_segments(struct vcpu *n)
             cs_and_mask = (unsigned short)regs->cs |
                 ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
             /* Fold upcall mask into RFLAGS.IF. */
-            eflags  = regs->_eflags & ~X86_EFLAGS_IF;
+            eflags  = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
             eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+            if ( VM_ASSIST(n->domain, architectural_iopl) )
+                eflags |= n->arch.pv_vcpu.iopl;
 
             if ( !ring_1(regs) )
             {
@@ -1763,7 +1765,8 @@ static void load_segments(struct vcpu *n)
                 vcpu_info(n, evtchn_upcall_mask) = 1;
 
             regs->entry_vector |= TRAP_syscall;
-            regs->_eflags      &= 0xFFFCBEFFUL;
+            regs->_eflags      &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
+                                    X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
             regs->ss            = FLAT_COMPAT_KERNEL_SS;
             regs->_esp          = (unsigned long)(esp-7);
             regs->cs            = FLAT_COMPAT_KERNEL_CS;
@@ -1781,8 +1784,10 @@ static void load_segments(struct vcpu *n)
             ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
 
         /* Fold upcall mask into RFLAGS.IF. */
-        rflags  = regs->rflags & ~X86_EFLAGS_IF;
+        rflags  = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
         rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+        if ( VM_ASSIST(n->domain, architectural_iopl) )
+            rflags |= n->arch.pv_vcpu.iopl;
 
         if ( put_user(regs->ss,            rsp- 1) |
              put_user(regs->rsp,           rsp- 2) |
@@ -1806,7 +1811,7 @@ static void load_segments(struct vcpu *n)
 
         regs->entry_vector |= TRAP_syscall;
         regs->rflags       &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
-                                X86_EFLAGS_NT|X86_EFLAGS_TF);
+                                X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
         regs->ss            = FLAT_KERNEL_SS;
         regs->rsp           = (unsigned long)(rsp-11);
         regs->cs            = FLAT_KERNEL_CS;
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index 1cb9b58..5a49796 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -529,7 +529,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         if ( set_iopl.iopl > 3 )
             break;
         ret = 0;
-        curr->arch.pv_vcpu.iopl = set_iopl.iopl;
+        curr->arch.pv_vcpu.iopl = MASK_INSR(set_iopl.iopl, X86_EFLAGS_IOPL);
         break;
     }
 
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 8125c53..7861a3c 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1779,6 +1779,17 @@ static int read_gate_descriptor(unsigned int gate_sel,
     return 1;
 }
 
+/* Perform IOPL check between the vcpu's shadowed IOPL, and the assumed cpl. */
+static bool_t iopl_ok(const struct vcpu *v, const struct cpu_user_regs *regs)
+{
+    unsigned int cpl = guest_kernel_mode(v, regs) ?
+        (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3;
+
+    ASSERT((v->arch.pv_vcpu.iopl & ~X86_EFLAGS_IOPL) == 0);
+
+    return IOPL(cpl) <= v->arch.pv_vcpu.iopl;
+}
+
 /* Has the guest requested sufficient permission for this I/O access? */
 static int guest_io_okay(
     unsigned int port, unsigned int bytes,
@@ -1788,7 +1799,7 @@ static int guest_io_okay(
     int user_mode = !(v->arch.flags & TF_kernel_mode);
 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
 
-    if ( v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3) )
+    if ( iopl_ok(v, regs) )
         return 1;
 
     if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
@@ -2346,7 +2357,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
 
     case 0xfa: /* CLI */
     case 0xfb: /* STI */
-        if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
+        if ( !iopl_ok(v, regs) )
             goto fail;
         /*
          * This is just too dangerous to allow, in my opinion. Consider if the
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 447c650..fa37ee0 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -86,6 +86,7 @@ void __dummy__(void)
     OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt);
     OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp);
     OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss);
+    OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl);
     OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags);
     OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
     OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
@@ -166,4 +167,6 @@ void __dummy__(void)
     OFFSET(MB_flags, multiboot_info_t, flags);
     OFFSET(MB_cmdline, multiboot_info_t, cmdline);
     OFFSET(MB_mem_lower, multiboot_info_t, mem_lower);
+
+    OFFSET(DOMAIN_vm_assist, struct domain, vm_assist);
 }
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index fd25e84..0ff6818 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -263,6 +263,7 @@ compat_create_bounce_frame:
         movl  UREGS_rsp+8(%rsp),%esi
 .Lft4:  mov   UREGS_ss+8(%rsp),%fs
 2:
+        movq  VCPU_domain(%rbx),%r8
         subl  $3*4,%esi
         movq  VCPU_vcpu_info(%rbx),%rax
         pushq COMPAT_VCPUINFO_upcall_mask(%rax)
@@ -277,9 +278,13 @@ compat_create_bounce_frame:
         testb %al,%al                   # Bits 0-7: saved_upcall_mask
         setz  %ch                       # %ch == !saved_upcall_mask
         movl  UREGS_eflags+8(%rsp),%eax
-        andl  $~X86_EFLAGS_IF,%eax
+        andl  $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
         addb  %ch,%ch                   # Bit 9 (EFLAGS.IF)
         orb   %ch,%ah                   # Fold EFLAGS.IF into %eax
+        xorl  %ecx,%ecx                 # if ( VM_ASSIST(v->domain, architectural_iopl) )
+        testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%r8)
+        cmovnzl VCPU_iopl(%rbx),%ecx    # Bits 13:12 (EFLAGS.IOPL)
+        orl   %ecx,%eax                 # Fold EFLAGS.IOPL into %eax
 .Lft6:  movl  %eax,%fs:2*4(%rsi)        # EFLAGS
         movl  UREGS_rip+8(%rsp),%eax
 .Lft7:  movl  %eax,%fs:(%rsi)           # EIP
diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c
index bbf18e4..a6afb26 100644
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -99,6 +99,10 @@ unsigned int compat_iret(void)
         domain_crash(v->domain);
         return 0;
     }
+
+    if ( VM_ASSIST(v->domain, architectural_iopl) )
+        v->arch.pv_vcpu.iopl = eflags & X86_EFLAGS_IOPL;
+
     regs->_eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
 
     if ( unlikely(eflags & X86_EFLAGS_VM) )
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index b0e7257..6866e8f 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -346,6 +346,7 @@ __UNLIKELY_END(create_bounce_frame_bad_sp)
         subq  $40,%rsi
         movq  UREGS_ss+8(%rsp),%rax
         ASM_STAC
+        movq  VCPU_domain(%rbx),%rdi
 .Lft2:  movq  %rax,32(%rsi)             # SS
         movq  UREGS_rsp+8(%rsp),%rax
 .Lft3:  movq  %rax,24(%rsi)             # RSP
@@ -362,9 +363,13 @@ __UNLIKELY_END(create_bounce_frame_bad_sp)
         testb $0xFF,%al                 # Bits 0-7: saved_upcall_mask
         setz  %ch                       # %ch == !saved_upcall_mask
         movl  UREGS_eflags+8(%rsp),%eax
-        andl  $~X86_EFLAGS_IF,%eax
+        andl  $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
         addb  %ch,%ch                   # Bit 9 (EFLAGS.IF)
         orb   %ch,%ah                   # Fold EFLAGS.IF into %eax
+        xorl  %ecx,%ecx                 # if ( VM_ASSIST(v->domain, architectural_iopl) )
+        testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rdi)
+        cmovnzl VCPU_iopl(%rbx),%ecx    # Bits 13:12 (EFLAGS.IOPL)
+        orl   %ecx,%eax                 # Fold EFLAGS.IOPL into %eax
 .Lft5:  movq  %rax,16(%rsi)             # RFLAGS
         movq  UREGS_rip+8(%rsp),%rax
 .Lft6:  movq  %rax,(%rsi)               # RIP
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index 26e2dd7..19f58a1 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -310,6 +310,9 @@ unsigned long do_iret(void)
         toggle_guest_mode(v);
     }
 
+    if ( VM_ASSIST(v->domain, architectural_iopl) )
+        v->arch.pv_vcpu.iopl = iret_saved.rflags & X86_EFLAGS_IOPL;
+
     regs->rip    = iret_saved.rip;
     regs->cs     = iret_saved.cs | 3; /* force guest privilege */
     regs->rflags = ((iret_saved.rflags & ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM))
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index f6f5a10..c10129d 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -331,6 +331,7 @@ extern unsigned long xen_phys_start;
                                   (1UL << VMASST_TYPE_4gb_segments_notify) | \
                                   (1UL << VMASST_TYPE_writable_pagetables) | \
                                   (1UL << VMASST_TYPE_pae_extended_cr3)    | \
+                                  (1UL << VMASST_TYPE_architectural_iopl)  | \
                                   (1UL << VMASST_TYPE_m2p_strict))
 #define VM_ASSIST_VALID          NATIVE_VM_ASSIST_VALID
 #define COMPAT_VM_ASSIST_VALID   (NATIVE_VM_ASSIST_VALID & \
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index de60def..5f2f27f 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -470,7 +470,9 @@ struct pv_vcpu
     /* I/O-port access bitmap. */
     XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */
     unsigned int iobmp_limit; /* Number of ports represented in the bitmap. */
-    unsigned int iopl;        /* Current IOPL for this VCPU. */
+#define IOPL(val) MASK_INSR(val, X86_EFLAGS_IOPL)
+    unsigned int iopl;        /* Current IOPL for this VCPU, shifted left by
+                               * 12 to match the eflags register. */
 
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 435d789..97a5ae5 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -503,6 +503,14 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 #define VMASST_TYPE_pae_extended_cr3     3
 
 /*
+ * x86 guests: Sane behaviour for virtual iopl
+ *  - virtual iopl updated from do_iret() hypercalls.
+ *  - virtual iopl reported in bounce frames.
+ *  - guest kernels assumed to be level 0 for the purpose of iopl checks.
+ */
+#define VMASST_TYPE_architectural_iopl   4
+
+/*
  * x86/64 guests: strictly hide M2P from user mode.
  * This allows the guest to control respective hypervisor behavior:
  * - when not set, L4 tables get created with the respective slot blank,
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH for-4.7 v2 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
  2016-04-07 21:39 ` [PATCH for-4.7 v2 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl Andrew Cooper
@ 2016-04-07 21:55   ` Jan Beulich
  2016-04-07 22:30     ` Andrew Cooper
  2016-04-08  9:30     ` [PATCH for-4.7 v3 " Andrew Cooper
  0 siblings, 2 replies; 7+ messages in thread
From: Jan Beulich @ 2016-04-07 21:55 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel

>>> On 07.04.16 at 23:39, <andrew.cooper3@citrix.com> wrote:
> @@ -1763,7 +1765,8 @@ static void load_segments(struct vcpu *n)
>                  vcpu_info(n, evtchn_upcall_mask) = 1;
>  
>              regs->entry_vector |= TRAP_syscall;
> -            regs->_eflags      &= 0xFFFCBEFFUL;
> +            regs->_eflags      &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
> +                                    X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);

Why AC, which didn't get cleared before? Did you just copy
the 64-bit variant from below? Assuming so, with it removed
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-4.7 v2 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
  2016-04-07 21:55   ` Jan Beulich
@ 2016-04-07 22:30     ` Andrew Cooper
  2016-04-07 22:53       ` Jan Beulich
  2016-04-08  9:30     ` [PATCH for-4.7 v3 " Andrew Cooper
  1 sibling, 1 reply; 7+ messages in thread
From: Andrew Cooper @ 2016-04-07 22:30 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel

On 07/04/2016 22:55, Jan Beulich wrote:
>>>> On 07.04.16 at 23:39, <andrew.cooper3@citrix.com> wrote:
>> @@ -1763,7 +1765,8 @@ static void load_segments(struct vcpu *n)
>>                  vcpu_info(n, evtchn_upcall_mask) = 1;
>>  
>>              regs->entry_vector |= TRAP_syscall;
>> -            regs->_eflags      &= 0xFFFCBEFFUL;
>> +            regs->_eflags      &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
>> +                                    X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
> Why AC, which didn't get cleared before? Did you just copy
> the 64-bit variant from below?

Yes,

> Assuming so, with it removed Reviewed-by: Jan Beulich <jbeulich@suse.com>

Why keep the disparity?

Looking this up again, architecturally speaking, its wrong.  AC does not
get cleared on a 32 or 64bit task switch; It only gets cleared on a real
mode task switch.

I presume you are refering to c/s eb97b7dc2b "[XEN] Fix x86/64 bug where
a guest application can crash the guest OS by setting AC flag in
RFLAGS.", from 2006?  Such a PV VM is already vulnerable from other
means.  I suppose this explains why 32bit PV kernels end up leaking AC
back into userspace.

Yuck - yet more non-architectural and non-documented PV ABI caused by
Xen trying to bugfix its way around broken PV kernels.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-4.7 v2 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
  2016-04-07 22:30     ` Andrew Cooper
@ 2016-04-07 22:53       ` Jan Beulich
  2016-04-07 23:05         ` Andrew Cooper
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Beulich @ 2016-04-07 22:53 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel

>>> On 08.04.16 at 00:30, <andrew.cooper3@citrix.com> wrote:
> On 07/04/2016 22:55, Jan Beulich wrote:
>>>>> On 07.04.16 at 23:39, <andrew.cooper3@citrix.com> wrote:
>>> @@ -1763,7 +1765,8 @@ static void load_segments(struct vcpu *n)
>>>                  vcpu_info(n, evtchn_upcall_mask) = 1;
>>>  
>>>              regs->entry_vector |= TRAP_syscall;
>>> -            regs->_eflags      &= 0xFFFCBEFFUL;
>>> +            regs->_eflags      &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
>>> +                                    X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
>> Why AC, which didn't get cleared before? Did you just copy
>> the 64-bit variant from below?
> 
> Yes,
> 
>> Assuming so, with it removed Reviewed-by: Jan Beulich <jbeulich@suse.com>
> 
> Why keep the disparity?

Because there's no reason to clear AC for 32-bit guests.

> Looking this up again, architecturally speaking, its wrong.  AC does not
> get cleared on a 32 or 64bit task switch; It only gets cleared on a real
> mode task switch.

Not sure what meaning of "task switch" you're implying here -
we're talking about code dealing with certain failures in the
PV context switch path, which has nothing to do with hardware
task switching.

> I presume you are refering to c/s eb97b7dc2b "[XEN] Fix x86/64 bug where
> a guest application can crash the guest OS by setting AC flag in
> RFLAGS.", from 2006?  Such a PV VM is already vulnerable from other
> means.  I suppose this explains why 32bit PV kernels end up leaking AC
> back into userspace.

Nor do I understand your reference to leaking whatever state
into user space: We're injecting a failsafe callback here, i.e.
guest execution is guaranteed to resume in kernel space.

The difference here mirrors the difference between
compat_create_bounce_frame and create_bounce_frame in
regard to what parts of EFLAGS they clear.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-4.7 v2 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
  2016-04-07 22:53       ` Jan Beulich
@ 2016-04-07 23:05         ` Andrew Cooper
  0 siblings, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2016-04-07 23:05 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel

On 07/04/2016 23:53, Jan Beulich wrote:
>>>> On 08.04.16 at 00:30, <andrew.cooper3@citrix.com> wrote:
>> On 07/04/2016 22:55, Jan Beulich wrote:
>>>>>> On 07.04.16 at 23:39, <andrew.cooper3@citrix.com> wrote:
>>>> @@ -1763,7 +1765,8 @@ static void load_segments(struct vcpu *n)
>>>>                  vcpu_info(n, evtchn_upcall_mask) = 1;
>>>>  
>>>>              regs->entry_vector |= TRAP_syscall;
>>>> -            regs->_eflags      &= 0xFFFCBEFFUL;
>>>> +            regs->_eflags      &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
>>>> +                                    X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
>>> Why AC, which didn't get cleared before? Did you just copy
>>> the 64-bit variant from below?
>> Yes,
>>
>>> Assuming so, with it removed Reviewed-by: Jan Beulich <jbeulich@suse.com>
>> Why keep the disparity?
> Because there's no reason to clear AC for 32-bit guests.

Nor 64bit guests.  A 64bit PV kernel should be acutely aware it is
running in ring3, and suitably audit flags at each of its entry points,
as all entry points have to do.

But as with all these hidden gems in the PV ABI, that ship has long
since sailed.

I will return it to how it was, although keeping the named constants.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH for-4.7 v3 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl
  2016-04-07 21:55   ` Jan Beulich
  2016-04-07 22:30     ` Andrew Cooper
@ 2016-04-08  9:30     ` Andrew Cooper
  1 sibling, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2016-04-08  9:30 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper

The existing vIOPL interface is hard to use, and need not be.

Introduce a VMASSIST with which a guest can opt-in to having vIOPL behaviour
consistenly with native hardware.

Specifically:
 - virtual iopl updated from do_iret() hypercalls.
 - virtual iopl reported in bounce frames.
 - guest kernels assumed to be level 0 for the purpose of iopl checks.

v->arch.pv_vcpu.iopl is altered to store IOPL shifted as it would exist
eflags, for the benefit of the assembly code.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <JBeulich@suse.com>
---
v2:
 * Shift vcpu.iopl to match eflags.
 * Factor out iopl_ok().
 * Use CMOVcc in assembly code.
v3:
 * Don't clear AC for 32bit failsafe callback.

Along with this, I have functional tests for both vIOPL interfaces in PV
guests.
---
 xen/arch/x86/domain.c              | 15 ++++++++++-----
 xen/arch/x86/physdev.c             |  2 +-
 xen/arch/x86/traps.c               | 15 +++++++++++++--
 xen/arch/x86/x86_64/asm-offsets.c  |  3 +++
 xen/arch/x86/x86_64/compat/entry.S |  7 ++++++-
 xen/arch/x86/x86_64/compat/traps.c |  4 ++++
 xen/arch/x86/x86_64/entry.S        |  7 ++++++-
 xen/arch/x86/x86_64/traps.c        |  3 +++
 xen/include/asm-x86/config.h       |  1 +
 xen/include/asm-x86/domain.h       |  4 +++-
 xen/include/public/xen.h           |  8 ++++++++
 11 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a4f6db2..9e436ef 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -994,7 +994,7 @@ int arch_set_info_guest(
     init_int80_direct_trap(v);
 
     /* IOPL privileges are virtualised. */
-    v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3;
+    v->arch.pv_vcpu.iopl = v->arch.user_regs.eflags & X86_EFLAGS_IOPL;
     v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL;
 
     /* Ensure real hardware interrupts are enabled. */
@@ -1735,8 +1735,10 @@ static void load_segments(struct vcpu *n)
             cs_and_mask = (unsigned short)regs->cs |
                 ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);
             /* Fold upcall mask into RFLAGS.IF. */
-            eflags  = regs->_eflags & ~X86_EFLAGS_IF;
+            eflags  = regs->_eflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
             eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+            if ( VM_ASSIST(n->domain, architectural_iopl) )
+                eflags |= n->arch.pv_vcpu.iopl;
 
             if ( !ring_1(regs) )
             {
@@ -1763,7 +1765,8 @@ static void load_segments(struct vcpu *n)
                 vcpu_info(n, evtchn_upcall_mask) = 1;
 
             regs->entry_vector |= TRAP_syscall;
-            regs->_eflags      &= 0xFFFCBEFFUL;
+            regs->_eflags      &= ~(X86_EFLAGS_VM|X86_EFLAGS_RF|X86_EFLAGS_NT|
+                                    X86_EFLAGS_IOPL|X86_EFLAGS_TF);
             regs->ss            = FLAT_COMPAT_KERNEL_SS;
             regs->_esp          = (unsigned long)(esp-7);
             regs->cs            = FLAT_COMPAT_KERNEL_CS;
@@ -1781,8 +1784,10 @@ static void load_segments(struct vcpu *n)
             ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);
 
         /* Fold upcall mask into RFLAGS.IF. */
-        rflags  = regs->rflags & ~X86_EFLAGS_IF;
+        rflags  = regs->rflags & ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
         rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;
+        if ( VM_ASSIST(n->domain, architectural_iopl) )
+            rflags |= n->arch.pv_vcpu.iopl;
 
         if ( put_user(regs->ss,            rsp- 1) |
              put_user(regs->rsp,           rsp- 2) |
@@ -1806,7 +1811,7 @@ static void load_segments(struct vcpu *n)
 
         regs->entry_vector |= TRAP_syscall;
         regs->rflags       &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|
-                                X86_EFLAGS_NT|X86_EFLAGS_TF);
+                                X86_EFLAGS_NT|X86_EFLAGS_IOPL|X86_EFLAGS_TF);
         regs->ss            = FLAT_KERNEL_SS;
         regs->rsp           = (unsigned long)(rsp-11);
         regs->cs            = FLAT_KERNEL_CS;
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index 1cb9b58..5a49796 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -529,7 +529,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         if ( set_iopl.iopl > 3 )
             break;
         ret = 0;
-        curr->arch.pv_vcpu.iopl = set_iopl.iopl;
+        curr->arch.pv_vcpu.iopl = MASK_INSR(set_iopl.iopl, X86_EFLAGS_IOPL);
         break;
     }
 
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 8125c53..7861a3c 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1779,6 +1779,17 @@ static int read_gate_descriptor(unsigned int gate_sel,
     return 1;
 }
 
+/* Perform IOPL check between the vcpu's shadowed IOPL, and the assumed cpl. */
+static bool_t iopl_ok(const struct vcpu *v, const struct cpu_user_regs *regs)
+{
+    unsigned int cpl = guest_kernel_mode(v, regs) ?
+        (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3;
+
+    ASSERT((v->arch.pv_vcpu.iopl & ~X86_EFLAGS_IOPL) == 0);
+
+    return IOPL(cpl) <= v->arch.pv_vcpu.iopl;
+}
+
 /* Has the guest requested sufficient permission for this I/O access? */
 static int guest_io_okay(
     unsigned int port, unsigned int bytes,
@@ -1788,7 +1799,7 @@ static int guest_io_okay(
     int user_mode = !(v->arch.flags & TF_kernel_mode);
 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
 
-    if ( v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3) )
+    if ( iopl_ok(v, regs) )
         return 1;
 
     if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) )
@@ -2346,7 +2357,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
 
     case 0xfa: /* CLI */
     case 0xfb: /* STI */
-        if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
+        if ( !iopl_ok(v, regs) )
             goto fail;
         /*
          * This is just too dangerous to allow, in my opinion. Consider if the
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index 447c650..fa37ee0 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -86,6 +86,7 @@ void __dummy__(void)
     OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt);
     OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp);
     OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss);
+    OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl);
     OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags);
     OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
     OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
@@ -166,4 +167,6 @@ void __dummy__(void)
     OFFSET(MB_flags, multiboot_info_t, flags);
     OFFSET(MB_cmdline, multiboot_info_t, cmdline);
     OFFSET(MB_mem_lower, multiboot_info_t, mem_lower);
+
+    OFFSET(DOMAIN_vm_assist, struct domain, vm_assist);
 }
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index fd25e84..0ff6818 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -263,6 +263,7 @@ compat_create_bounce_frame:
         movl  UREGS_rsp+8(%rsp),%esi
 .Lft4:  mov   UREGS_ss+8(%rsp),%fs
 2:
+        movq  VCPU_domain(%rbx),%r8
         subl  $3*4,%esi
         movq  VCPU_vcpu_info(%rbx),%rax
         pushq COMPAT_VCPUINFO_upcall_mask(%rax)
@@ -277,9 +278,13 @@ compat_create_bounce_frame:
         testb %al,%al                   # Bits 0-7: saved_upcall_mask
         setz  %ch                       # %ch == !saved_upcall_mask
         movl  UREGS_eflags+8(%rsp),%eax
-        andl  $~X86_EFLAGS_IF,%eax
+        andl  $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
         addb  %ch,%ch                   # Bit 9 (EFLAGS.IF)
         orb   %ch,%ah                   # Fold EFLAGS.IF into %eax
+        xorl  %ecx,%ecx                 # if ( VM_ASSIST(v->domain, architectural_iopl) )
+        testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%r8)
+        cmovnzl VCPU_iopl(%rbx),%ecx    # Bits 13:12 (EFLAGS.IOPL)
+        orl   %ecx,%eax                 # Fold EFLAGS.IOPL into %eax
 .Lft6:  movl  %eax,%fs:2*4(%rsi)        # EFLAGS
         movl  UREGS_rip+8(%rsp),%eax
 .Lft7:  movl  %eax,%fs:(%rsi)           # EIP
diff --git a/xen/arch/x86/x86_64/compat/traps.c b/xen/arch/x86/x86_64/compat/traps.c
index bbf18e4..a6afb26 100644
--- a/xen/arch/x86/x86_64/compat/traps.c
+++ b/xen/arch/x86/x86_64/compat/traps.c
@@ -99,6 +99,10 @@ unsigned int compat_iret(void)
         domain_crash(v->domain);
         return 0;
     }
+
+    if ( VM_ASSIST(v->domain, architectural_iopl) )
+        v->arch.pv_vcpu.iopl = eflags & X86_EFLAGS_IOPL;
+
     regs->_eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
 
     if ( unlikely(eflags & X86_EFLAGS_VM) )
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index b0e7257..6866e8f 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -346,6 +346,7 @@ __UNLIKELY_END(create_bounce_frame_bad_sp)
         subq  $40,%rsi
         movq  UREGS_ss+8(%rsp),%rax
         ASM_STAC
+        movq  VCPU_domain(%rbx),%rdi
 .Lft2:  movq  %rax,32(%rsi)             # SS
         movq  UREGS_rsp+8(%rsp),%rax
 .Lft3:  movq  %rax,24(%rsi)             # RSP
@@ -362,9 +363,13 @@ __UNLIKELY_END(create_bounce_frame_bad_sp)
         testb $0xFF,%al                 # Bits 0-7: saved_upcall_mask
         setz  %ch                       # %ch == !saved_upcall_mask
         movl  UREGS_eflags+8(%rsp),%eax
-        andl  $~X86_EFLAGS_IF,%eax
+        andl  $~(X86_EFLAGS_IF|X86_EFLAGS_IOPL),%eax
         addb  %ch,%ch                   # Bit 9 (EFLAGS.IF)
         orb   %ch,%ah                   # Fold EFLAGS.IF into %eax
+        xorl  %ecx,%ecx                 # if ( VM_ASSIST(v->domain, architectural_iopl) )
+        testb $1 << VMASST_TYPE_architectural_iopl,DOMAIN_vm_assist(%rdi)
+        cmovnzl VCPU_iopl(%rbx),%ecx    # Bits 13:12 (EFLAGS.IOPL)
+        orl   %ecx,%eax                 # Fold EFLAGS.IOPL into %eax
 .Lft5:  movq  %rax,16(%rsi)             # RFLAGS
         movq  UREGS_rip+8(%rsp),%rax
 .Lft6:  movq  %rax,(%rsi)               # RIP
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index 26e2dd7..19f58a1 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -310,6 +310,9 @@ unsigned long do_iret(void)
         toggle_guest_mode(v);
     }
 
+    if ( VM_ASSIST(v->domain, architectural_iopl) )
+        v->arch.pv_vcpu.iopl = iret_saved.rflags & X86_EFLAGS_IOPL;
+
     regs->rip    = iret_saved.rip;
     regs->cs     = iret_saved.cs | 3; /* force guest privilege */
     regs->rflags = ((iret_saved.rflags & ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM))
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index f6f5a10..c10129d 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -331,6 +331,7 @@ extern unsigned long xen_phys_start;
                                   (1UL << VMASST_TYPE_4gb_segments_notify) | \
                                   (1UL << VMASST_TYPE_writable_pagetables) | \
                                   (1UL << VMASST_TYPE_pae_extended_cr3)    | \
+                                  (1UL << VMASST_TYPE_architectural_iopl)  | \
                                   (1UL << VMASST_TYPE_m2p_strict))
 #define VM_ASSIST_VALID          NATIVE_VM_ASSIST_VALID
 #define COMPAT_VM_ASSIST_VALID   (NATIVE_VM_ASSIST_VALID & \
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index de60def..5f2f27f 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -470,7 +470,9 @@ struct pv_vcpu
     /* I/O-port access bitmap. */
     XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */
     unsigned int iobmp_limit; /* Number of ports represented in the bitmap. */
-    unsigned int iopl;        /* Current IOPL for this VCPU. */
+#define IOPL(val) MASK_INSR(val, X86_EFLAGS_IOPL)
+    unsigned int iopl;        /* Current IOPL for this VCPU, shifted left by
+                               * 12 to match the eflags register. */
 
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 435d789..97a5ae5 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -503,6 +503,14 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 #define VMASST_TYPE_pae_extended_cr3     3
 
 /*
+ * x86 guests: Sane behaviour for virtual iopl
+ *  - virtual iopl updated from do_iret() hypercalls.
+ *  - virtual iopl reported in bounce frames.
+ *  - guest kernels assumed to be level 0 for the purpose of iopl checks.
+ */
+#define VMASST_TYPE_architectural_iopl   4
+
+/*
  * x86/64 guests: strictly hide M2P from user mode.
  * This allows the guest to control respective hypervisor behavior:
  * - when not set, L4 tables get created with the respective slot blank,
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-04-08  9:30 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-07 21:39 [PATCH for-4.7 v2 1/2] xen/x86: Remove the use of vm86_mode() Andrew Cooper
2016-04-07 21:39 ` [PATCH for-4.7 v2 2/2] xen/x86: Introduce a new VMASSIST for architectural behaviour of iopl Andrew Cooper
2016-04-07 21:55   ` Jan Beulich
2016-04-07 22:30     ` Andrew Cooper
2016-04-07 22:53       ` Jan Beulich
2016-04-07 23:05         ` Andrew Cooper
2016-04-08  9:30     ` [PATCH for-4.7 v3 " Andrew Cooper

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).