All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] enable port accesses with (almost) full register context
@ 2006-09-11 16:12 Jan Beulich
  2006-09-11 16:19 ` Keir Fraser
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2006-09-11 16:12 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 243 bytes --]

This helped HP getting certain system management software going (in
dom0) that triggers SMIs and depends upon other than port number
and data register values being visible to the SMI handler.

Signed-off-by: Jan Beulich <jbeulich@novell.com>


[-- Attachment #2: xen-x86-io-register-context.patch --]
[-- Type: text/plain, Size: 26518 bytes --]

From:  Jan Beulich
Bugzilla #192150

Index: 2006-09-11/xen/arch/x86/domain.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/domain.c	2006-09-11 13:39:58.000000000 +0200
+++ 2006-09-11/xen/arch/x86/domain.c	2006-09-11 13:42:13.000000000 +0200
@@ -210,10 +210,17 @@ int arch_domain_create(struct domain *d)
     if ( !is_idle_domain(d) )
     {
         d->arch.ioport_caps = 
-            rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+            rangeset_new(d, "I/O Ports access control", RANGESETF_prettyprint_hex);
         if ( d->arch.ioport_caps == NULL )
             goto fail_nomem;
 
+        d->arch.ioport_normal = 
+            rangeset_new(d, "I/O Ports access mechanism", RANGESETF_prettyprint_hex);
+        if ( d->arch.ioport_normal == NULL )
+            goto fail_nomem;
+        if (ioports_set_normal(d, 0, 0xFFFF))
+            goto fail_nomem;
+
         if ( (d->shared_info = alloc_xenheap_page()) == NULL )
             goto fail_nomem;
 
Index: 2006-09-11/xen/arch/x86/domain_build.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/domain_build.c	2006-09-11 13:39:58.000000000 +0200
+++ 2006-09-11/xen/arch/x86/domain_build.c	2006-09-11 13:42:13.000000000 +0200
@@ -33,6 +33,8 @@
 extern unsigned long initial_images_nrpages(void);
 extern void discard_initial_images(void);
 
+struct rangeset *global_ioport_caps = NULL;
+
 static long dom0_nrpages;
 
 /*
@@ -64,9 +66,15 @@ integer_param("dom0_max_vcpus", opt_dom0
 static unsigned int opt_dom0_shadow;
 boolean_param("dom0_shadow", opt_dom0_shadow);
 
+static char opt_ioports_disable[200] = "";
+string_param("ioports_disable", opt_ioports_disable);
+
 static char opt_dom0_ioports_disable[200] = "";
 string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
 
+static char opt_dom0_ioports_special[200] = "";
+string_param("dom0_ioports_special", opt_dom0_ioports_special);
+
 #if defined(__i386__)
 /* No ring-3 access in initial leaf page tables. */
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
@@ -102,10 +110,10 @@ static struct page_info *alloc_chunk(str
     return page;
 }
 
-static void process_dom0_ioports_disable(void)
+static void process_ioports(char *opt)
 {
     unsigned long io_from, io_to;
-    char *t, *u, *s = opt_dom0_ioports_disable;
+    char *t, *u, *s = opt;
 
     if ( *s == '\0' )
         return;
@@ -117,7 +125,9 @@ static void process_dom0_ioports_disable
         {
         parse_error:
             printk("Invalid ioport range <%s> "
-                   "in dom0_ioports_disable, skipping\n", t);
+                   "in %sioports_%s, skipping\n", t,
+                   opt != opt_ioports_disable ? "dom0_" : "",
+                   opt != opt_dom0_ioports_special ? "disable" : "special");
             continue;
         }
 
@@ -131,11 +141,26 @@ static void process_dom0_ioports_disable
         if ( (*u != '\0') || (io_to < io_from) || (io_to >= 65536) )
             goto parse_error;
 
-        printk("Disabling dom0 access to ioport range %04lx-%04lx\n",
-            io_from, io_to);
+        if ( opt != opt_dom0_ioports_special )
+        {
+            printk("Disabling %saccess to ioport range %04lx-%04lx\n",
+                opt != opt_ioports_disable ? "dom0 " : "",
+                io_from, io_to);
 
-        if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
-            BUG();
+            if ( opt == opt_ioports_disable
+                 && ioports_deny_access_all(io_from, io_to) != 0 )
+                BUG();
+            if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
+                BUG();
+        }
+        else
+        {
+            printk("Setting special dom0 access for ioport range %04lx-%04lx\n",
+                io_from, io_to);
+
+            if ( ioports_set_special(dom0, io_from, io_to) != 0 )
+                BUG();
+        }
     }
 }
 
@@ -815,6 +840,13 @@ int construct_dom0(struct domain *d,
 
     rc = 0;
 
+    /* Command-line I/O ranges. */
+    global_ioport_caps = rangeset_new(NULL,
+                                      "global I/O Port access control",
+                                      RANGESETF_prettyprint_hex);
+    BUG_ON(!global_ioport_caps);
+    rc |= ioports_permit_access_all(0, 0xFFFF);
+
     /* DOM0 is permitted full I/O capabilities. */
     rc |= ioports_permit_access(dom0, 0, 0xFFFF);
     rc |= iomem_permit_access(dom0, 0UL, ~0UL);
@@ -824,15 +856,20 @@ int construct_dom0(struct domain *d,
      * Modify I/O port access permissions.
      */
     /* Master Interrupt Controller (PIC). */
+    rc |= ioports_deny_access_all(0x20, 0x21);
     rc |= ioports_deny_access(dom0, 0x20, 0x21);
     /* Slave Interrupt Controller (PIC). */
+    rc |= ioports_deny_access_all(0xA0, 0xA1);
     rc |= ioports_deny_access(dom0, 0xA0, 0xA1);
     /* Interval Timer (PIT). */
+    rc |= ioports_deny_access_all(0x40, 0x43);
     rc |= ioports_deny_access(dom0, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
+    rc |= ioports_deny_access_all(0x61, 0x61);
     rc |= ioports_deny_access(dom0, 0x61, 0x61);
     /* Command-line I/O ranges. */
-    process_dom0_ioports_disable();
+    process_ioports(opt_ioports_disable);
+    process_ioports(opt_dom0_ioports_disable);
 
     /*
      * Modify I/O memory access permissions.
@@ -853,6 +890,9 @@ int construct_dom0(struct domain *d,
 
     BUG_ON(rc != 0);
 
+    /* Command-line I/O ranges that require special (full-context) access. */
+    process_ioports(opt_dom0_ioports_special);
+
     return 0;
 }
 
Index: 2006-09-11/xen/arch/x86/domctl.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/domctl.c	2006-08-29 10:46:45.000000000 +0200
+++ 2006-09-11/xen/arch/x86/domctl.c	2006-09-11 13:47:21.000000000 +0200
@@ -62,10 +62,27 @@ long arch_do_domctl(
 
         if ( np == 0 )
             ret = 0;
-        else if ( domctl->u.ioport_permission.allow_access )
-            ret = ioports_permit_access(d, fp, fp + np - 1);
-        else
+        else switch ( domctl->u.ioport_permission.allow_access )
+        {
+        case IOPORT_ALLOW_ACCESS:
+            if ( ioports_any_access_permitted(fp, fp + np - 1) )
+                ret = ioports_permit_access(d, fp, fp + np - 1);
+            else
+                ret = -EPERM;
+            break;
+        case IOPORT_DENY_ACCESS:
             ret = ioports_deny_access(d, fp, fp + np - 1);
+            break;
+        case IOPORT_SET_NORMAL:
+            ret = ioports_set_normal(d, fp, fp + np - 1);
+            break;
+        case IOPORT_SET_SPECIAL:
+            ret = ioports_set_special(d, fp, fp + np - 1);
+            break;
+        default:
+            ret = -EINVAL;
+            break;
+        }
 
         put_domain(d);
     }
Index: 2006-09-11/xen/arch/x86/traps.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/traps.c	2006-09-11 13:42:08.000000000 +0200
+++ 2006-09-11/xen/arch/x86/traps.c	2006-09-11 13:44:18.000000000 +0200
@@ -1002,16 +1002,76 @@ static inline int admin_io_okay(
     return ioports_access_permitted(v->domain, port, port + bytes - 1);
 }
 
+/* Can the I/O access be carried out without full register context? */
+static inline int normal_io_okay(
+    unsigned int port, unsigned int bytes, struct vcpu *v)
+{
+    return ioports_normal_access(v->domain, port, port + bytes - 1);
+}
+
 /* Check admin limits. Silently fail the access if it is disallowed. */
-#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
-#define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
-#define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
-#define outb_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
-#define outw_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
-#define outl_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
+static inline unsigned char inb_user(
+    unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 1, v, regs) )
+        return string || normal_io_okay(port, 1, v)
+               ? inb(port)
+               : inb_special(port, regs);
+    return ~0;
+}
+
+static inline unsigned short inw_user(
+    unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 2, v, regs) )
+        return string || normal_io_okay(port, 2, v)
+               ? inw(port)
+               : inw_special(port, regs);
+    return ~0;
+}
+
+static inline unsigned int inl_user(
+    unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 4, v, regs) )
+        return string || normal_io_okay(port, 4, v)
+               ? inl(port)
+               : inl_special(port, regs);
+    return ~0;
+}
+
+static inline void outb_user(
+    unsigned char value, unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 1, v, regs) )
+        string || normal_io_okay(port, 1, v)
+        ? outb(value, port)
+        : outb_special(value, port, regs);
+}
+
+static inline void outw_user(
+    unsigned short value, unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 2, v, regs) )
+        string || normal_io_okay(port, 2, v)
+        ? outw(value, port)
+        : outw_special(value, port, regs);
+}
+
+static inline void outl_user(
+    unsigned int value, unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 4, v, regs) )
+        string || normal_io_okay(port, 4, v)
+        ? outl(value, port)
+        : outl_special(value, port, regs);
+}
 
 /* Instruction fetch with error handling. */
 #define insn_fetch(_type, _size, _ptr)                                      \
@@ -1087,13 +1140,13 @@ static int emulate_privileged_op(struct 
             switch ( op_bytes )
             {
             case 1:
-                data = (u8)inb_user((u16)regs->edx, v, regs);
+                data = (u8)inb_user((u16)regs->edx, 1, v, regs);
                 break;
             case 2:
-                data = (u16)inw_user((u16)regs->edx, v, regs);
+                data = (u16)inw_user((u16)regs->edx, 1, v, regs);
                 break;
             case 4:
-                data = (u32)inl_user((u16)regs->edx, v, regs);
+                data = (u32)inl_user((u16)regs->edx, 1, v, regs);
                 break;
             }
             if ( (rc = copy_to_user((void *)regs->edi, &data, op_bytes)) != 0 )
@@ -1119,13 +1172,13 @@ static int emulate_privileged_op(struct 
             switch ( op_bytes )
             {
             case 1:
-                outb_user((u8)data, (u16)regs->edx, v, regs);
+                outb_user((u8)data, (u16)regs->edx, 1, v, regs);
                 break;
             case 2:
-                outw_user((u16)data, (u16)regs->edx, v, regs);
+                outw_user((u16)data, (u16)regs->edx, 1, v, regs);
                 break;
             case 4:
-                outl_user((u32)data, (u16)regs->edx, v, regs);
+                outl_user((u32)data, (u16)regs->edx, 1, v, regs);
                 break;
             }
             regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
@@ -1156,14 +1209,14 @@ static int emulate_privileged_op(struct 
         {
         case 1:
             regs->eax &= ~0xffUL;
-            regs->eax |= (u8)inb_user(port, v, regs);
+            regs->eax |= (u8)inb_user(port, 0, v, regs);
             break;
         case 2:
             regs->eax &= ~0xffffUL;
-            regs->eax |= (u16)inw_user(port, v, regs);
+            regs->eax |= (u16)inw_user(port, 0, v, regs);
             break;
         case 4:
-            regs->eax = (u32)inl_user(port, v, regs);
+            regs->eax = (u32)inl_user(port, 0, v, regs);
             break;
         }
         goto done;
@@ -1184,13 +1237,13 @@ static int emulate_privileged_op(struct 
         switch ( op_bytes )
         {
         case 1:
-            outb_user((u8)regs->eax, port, v, regs);
+            outb_user((u8)regs->eax, port, 0, v, regs);
             break;
         case 2:
-            outw_user((u16)regs->eax, port, v, regs);
+            outw_user((u16)regs->eax, port, 0, v, regs);
             break;
         case 4:
-            outl_user((u32)regs->eax, port, v, regs);
+            outl_user((u32)regs->eax, port, 0, v, regs);
             break;
         }
         goto done;
Index: 2006-09-11/xen/arch/x86/x86_32/Makefile
===================================================================
--- 2006-09-11.orig/xen/arch/x86/x86_32/Makefile	2006-09-11 13:39:58.000000000 +0200
+++ 2006-09-11/xen/arch/x86/x86_32/Makefile	2006-09-11 13:42:13.000000000 +0200
@@ -1,5 +1,6 @@
 obj-y += domain_page.o
 obj-y += entry.o
+obj-y += io.o
 obj-y += mm.o
 obj-y += seg_fixup.o
 obj-y += traps.o
Index: 2006-09-11/xen/arch/x86/x86_32/io.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 2006-09-11/xen/arch/x86/x86_32/io.S	2006-09-11 13:42:13.000000000 +0200
@@ -0,0 +1,163 @@
+/*
+ * Special (full-context) I/O handling routines.
+ *
+ * Copyright (c) 2006, Novell, Inc.
+ */
+
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+
+#define PROTECTED_PORT 0x20 // must be some port no domain will ever be granted access
+
+ .macro in_special w, a
+	movl	2*4(%esp), %ecx
+	pushl	%ebx
+	movl	UREGS_ebx(%ecx), %ebx
+	movl	UREGS_edx(%ecx), %edx
+	pushl	%ebp
+	movl	UREGS_ebp(%ecx), %ebp
+	movl	UREGS_eax(%ecx), %eax
+	pushl	%esi
+	movl	UREGS_esi(%ecx), %esi
+	movw	4*4(%esp), %dx
+	pushl	%edi
+	testb	%dh, %dh
+	movl	UREGS_edi(%ecx), %edi
+	jz	.Ltry_lock_in\w
+	movl	UREGS_ecx(%ecx), %ecx
+	in\w	%dx, %\a
+ .ifndef .Lin_restore
+.Lin_restore:
+	movl	%ecx, 5*4(%esp)
+	movl	6*4(%esp), %ecx
+	movl	%edx, UREGS_edx(%ecx)
+	movl	5*4(%esp), %edx
+	movl	%edi, UREGS_edi(%ecx)
+	popl	%edi
+	movl	%esi, UREGS_esi(%ecx)
+	popl	%esi
+	movl	%ebp, UREGS_ebp(%ecx)
+	popl	%ebp
+	movl	%ebx, UREGS_ebx(%ecx)
+	popl	%ebx
+	movl	%eax, UREGS_eax(%ecx)
+	movl	%edx, UREGS_ecx(%ecx)
+	ret
+ .else
+ 	jmp	.Lin_restore
+ .endif
+.Ltry_lock_in\w:
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, %al
+	lock cmpxchgb %dl, .Lport_in\w
+	jne	.Lretry_in\w
+	movl	UREGS_eax(%ecx), %eax
+#else
+	movb	%dl, .Lport_in\w
+#endif
+	movl	UREGS_edx(%ecx), %edx
+	movl	UREGS_ecx(%ecx), %ecx
+	jmp	.Limm_in\w
+ .data # .section .wtext, "axw"
+.Limm_in\w:
+	in\w	$PROTECTED_PORT, %\a
+ .equiv .Lport_in\w, . - 1
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, .Lport_in\w
+#endif
+ 	jmp	.Lin_restore
+ .previous
+#ifdef CONFIG_SMP
+.Lretry_in\w:
+	pause
+	jmp	.Ltry_lock_in\w
+#endif
+ .endm
+
+ENTRY(inb_special)
+	in_special b, al
+
+ENTRY(inw_special)
+	in_special w, ax
+
+ENTRY(inl_special)
+	in_special l, eax
+
+ .macro out_special w, a
+	movl	3*4(%esp), %ecx
+	pushl	%ebx
+	movl	UREGS_ebx(%ecx), %ebx
+	movl	UREGS_edx(%ecx), %edx
+	pushl	%ebp
+	movl	UREGS_ebp(%ecx), %ebp
+ .ifnes "\w", "l"
+	movl	UREGS_eax(%ecx), %eax
+ .endif
+	pushl	%esi
+	movl	UREGS_esi(%ecx), %esi
+	movw	5*4(%esp), %dx
+	pushl	%edi
+	testb	%dh, %dh
+	movl	UREGS_edi(%ecx), %edi
+	mov\w	5*4(%esp), %\a
+	jz	.Llock_out\w
+	movl	UREGS_ecx(%ecx), %ecx
+	out\w	%\a, %dx
+ .ifndef .Lout_restore
+.Lout_restore:
+	movl	%ecx, 5*4(%esp)
+	movl	7*4(%esp), %ecx
+	movl	%edx, UREGS_edx(%ecx)
+	movl	5*4(%esp), %edx
+	movl	%edi, UREGS_edi(%ecx)
+	popl	%edi
+	movl	%esi, UREGS_esi(%ecx)
+	popl	%esi
+	movl	%ebp, UREGS_ebp(%ecx)
+	popl	%ebp
+	movl	%ebx, UREGS_ebx(%ecx)
+	popl	%ebx
+	movl	%eax, UREGS_eax(%ecx)
+	movl	%edx, UREGS_ecx(%ecx)
+	ret
+ .else
+	jmp	.Lout_restore
+ .endif
+.Llock_out\w:
+#ifdef CONFIG_SMP
+	pushl	%eax
+.Ltry_lock_out\w:
+	movb	$PROTECTED_PORT, %al
+	lock cmpxchgb %dl, .Lport_out\w
+	jne	.Lretry_out\w
+	popl	%eax
+#else
+	movb	%dl, .Lport_out\w
+#endif
+	movl	UREGS_edx(%ecx), %edx
+	movl	UREGS_ecx(%ecx), %ecx
+	jmp	.Limm_out\w
+ .data # .section .wtext, "axw"
+.Limm_out\w:
+	out\w	%\a, $PROTECTED_PORT
+ .equiv .Lport_out\w, . - 1
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, .Lport_out\w
+#endif
+	jmp	.Lout_restore
+ .previous
+#ifdef CONFIG_SMP
+.Lretry_out\w:
+	pause
+	jmp	.Ltry_lock_out\w
+#endif
+ .endm
+
+ENTRY(outb_special)
+	out_special b, al
+
+ENTRY(outw_special)
+	out_special w, ax
+
+ENTRY(outl_special)
+	out_special l, eax
Index: 2006-09-11/xen/arch/x86/x86_64/Makefile
===================================================================
--- 2006-09-11.orig/xen/arch/x86/x86_64/Makefile	2006-09-11 13:39:58.000000000 +0200
+++ 2006-09-11/xen/arch/x86/x86_64/Makefile	2006-09-11 13:42:13.000000000 +0200
@@ -1,3 +1,4 @@
 obj-y += entry.o
+obj-y += io.o
 obj-y += mm.o
 obj-y += traps.o
Index: 2006-09-11/xen/arch/x86/x86_64/io.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 2006-09-11/xen/arch/x86/x86_64/io.S	2006-09-11 13:42:13.000000000 +0200
@@ -0,0 +1,210 @@
+/*
+ * Special (full-context) I/O handling routines.
+ *
+ * Copyright (c) 2006, Novell, Inc.
+ */
+
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+
+#define PROTECTED_PORT 0x20 // must be some port no domain will ever be granted access
+
+ .macro in_special w, a
+	pushq	%rbx
+	movq	UREGS_rbx(%rsi), %rbx
+	movq	UREGS_rdx(%rsi), %rdx
+	pushq	%rbp
+	movq	UREGS_rbp(%rsi), %rbp
+	movq	UREGS_rax(%rsi), %rax
+	pushq	%r12
+	movq	UREGS_r12(%rsi), %r12
+	movq	UREGS_r8(%rsi), %r8
+	pushq	%r13
+	movq	UREGS_r13(%rsi), %r13
+	movq	UREGS_r9(%rsi), %r9
+	pushq	%r14
+	movq	UREGS_r14(%rsi), %r14
+	movq	UREGS_r10(%rsi), %r10
+	movw	%di, %dx
+	pushq	%r15
+	movq	UREGS_r15(%rsi), %r15
+	movq	UREGS_r11(%rsi), %r11
+	testb	%dh, %dh
+	movq	UREGS_rcx(%rsi), %rcx
+	movq	UREGS_rdi(%rsi), %rdi
+	pushq	%rsi
+	jz	.Ltry_lock_in\w
+	movq	UREGS_rsi(%rsi), %rsi
+	in\w	%dx, %\a
+ .ifndef .Lin_restore
+.Lin_restore:
+	pushq	%rsi
+	movq	8(%rsp), %rsi
+	movq	%rdx, UREGS_rdx(%rsi)
+	popq	UREGS_rsi(%rsi)
+	movq	%rdi, UREGS_rdi(%rsi)
+	movq	%rcx, UREGS_rcx(%rsi)
+	popq	%rdx
+	movq	%r11, UREGS_r11(%rsi)
+	movq	%r15, UREGS_r15(%rsi)
+	popq	%r15
+	movq	%r10, UREGS_r10(%rsi)
+	movq	%r14, UREGS_r14(%rsi)
+	popq	%r14
+	movq	%r9, UREGS_r9(%rsi)
+	movq	%r13, UREGS_r13(%rsi)
+	popq	%r13
+	movq	%r8, UREGS_r8(%rsi)
+	movq	%r12, UREGS_r12(%rsi)
+	popq	%r12
+	movq	%rbp, UREGS_rbp(%rsi)
+	popq	%rbp
+	movq	%rbx, UREGS_rbx(%rsi)
+	popq	%rbx
+	movq	%rax, UREGS_rax(%rsi)
+	ret
+ .else
+ 	jmp	.Lin_restore
+ .endif
+.Ltry_lock_in\w:
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, %al
+	lock cmpxchgb %dl, .Lport_in\w(%rip)
+	jne	.Lretry_in\w
+	movb	UREGS_rax(%rsi), %al
+#else
+	movb	%dl, .Lport_in\w(%rip)
+#endif
+	movq	UREGS_rdx(%rsi), %rdx
+	movq	UREGS_rsi(%rsi), %rsi
+	jmp	.Limm_in\w
+ .data # .section .wtext, "axw"
+.Limm_in\w:
+	in\w	$PROTECTED_PORT, %\a
+ .equiv .Lport_in\w, . - 1
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, .Lport_in\w(%rip)
+#endif
+ 	jmp	.Lin_restore
+ .previous
+#ifdef CONFIG_SMP
+.Lretry_in\w:
+	pause
+	jmp	.Ltry_lock_in\w
+#endif
+ .endm
+
+ENTRY(inb_special)
+	in_special b, al
+
+ENTRY(inw_special)
+	in_special w, ax
+
+ENTRY(inl_special)
+	in_special l, eax
+
+ .macro out_special w, a, v
+	movq	%rdx, %rcx
+	pushq	%rbx
+	movq	UREGS_rbx(%rdx), %rbx
+	movq	UREGS_rdx(%rdx), %rdx
+	pushq	%rbp
+	movq	UREGS_rbp(%rcx), %rbp
+	movq	UREGS_rax(%rcx), %rax
+	pushq	%r12
+	movq	UREGS_r12(%rcx), %r12
+	movq	UREGS_r8(%rcx), %r8
+	pushq	%r13
+	movq	UREGS_r13(%rcx), %r13
+	movq	UREGS_r9(%rcx), %r9
+	pushq	%r14
+	movq	UREGS_r14(%rcx), %r14
+	movq	UREGS_r10(%rcx), %r10
+	movw	%si, %dx
+	pushq	%r15
+	movq	UREGS_r15(%rcx), %r15
+	movq	UREGS_r11(%rcx), %r11
+ .ifnes "\w", "l"
+	mov\w	%\v, %\a
+ .else
+ .ifnes "\v", "edi"
+ .err
+ .endif
+	shrdq	$32, %rdi, %rax
+	rolq	$32, %rax
+ .endif
+	testb	%dh, %dh
+	movq	UREGS_rsi(%rcx), %rsi
+	movq	UREGS_rdi(%rcx), %rdi
+	pushq	%rcx
+	jz	.Llock_out\w
+	movq	UREGS_rcx(%rcx), %rcx
+	out\w	%\a, %dx
+ .ifndef .Lout_restore
+.Lout_restore:
+	pushq	%rcx
+	movq	8(%rsp), %rcx
+	movq	%rdx, UREGS_rdx(%rcx)
+	popq	UREGS_rcx(%rcx)
+	movq	%rdi, UREGS_rdi(%rcx)
+	movq	%rsi, UREGS_rsi(%rcx)
+	popq	%rdx
+	movq	%r11, UREGS_r11(%rcx)
+	movq	%r15, UREGS_r15(%rcx)
+	popq	%r15
+	movq	%r10, UREGS_r10(%rcx)
+	movq	%r14, UREGS_r14(%rcx)
+	popq	%r14
+	movq	%r9, UREGS_r9(%rcx)
+	movq	%r13, UREGS_r13(%rcx)
+	popq	%r13
+	movq	%r8, UREGS_r8(%rcx)
+	movq	%r12, UREGS_r12(%rcx)
+	popq	%r12
+	movq	%rbp, UREGS_rbp(%rcx)
+	popq	%rbp
+	movq	%rbx, UREGS_rbx(%rcx)
+	popq	%rbx
+	movq	%rax, UREGS_rax(%rcx)
+	ret
+ .else
+	jmp	.Lout_restore
+ .endif
+.Llock_out\w:
+#ifdef CONFIG_SMP
+	pushq	%rax
+.Ltry_lock_out\w:
+	movb	$PROTECTED_PORT, %al
+	lock cmpxchgb %dl, .Lport_out\w(%rip)
+	jne	.Lretry_out\w
+	popq	%rax
+#else
+	movb	%dl, .Lport_out\w(%rip)
+#endif
+	movq	UREGS_rdx(%rcx), %rdx
+	movq	UREGS_rcx(%rcx), %rcx
+	jmp	.Limm_out\w
+ .data # .section .wtext, "axw"
+.Limm_out\w:
+	out\w	%\a, $PROTECTED_PORT
+ .equiv .Lport_out\w, . - 1
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, .Lport_out\w(%rip)
+#endif
+	jmp	.Lout_restore
+ .previous
+#ifdef CONFIG_SMP
+.Lretry_out\w:
+	pause
+	jmp	.Ltry_lock_out\w
+#endif
+ .endm
+
+ENTRY(outb_special)
+	out_special b, al, dil
+
+ENTRY(outw_special)
+	out_special w, ax, di
+
+ENTRY(outl_special)
+	out_special l, eax, edi
Index: 2006-09-11/xen/include/asm-x86/domain.h
===================================================================
--- 2006-09-11.orig/xen/include/asm-x86/domain.h	2006-09-11 13:39:58.000000000 +0200
+++ 2006-09-11/xen/include/asm-x86/domain.h	2006-09-11 13:50:27.000000000 +0200
@@ -101,6 +101,9 @@ struct arch_domain
     /* I/O-port admin-specified access capabilities. */
     struct rangeset *ioport_caps;
 
+    /* I/O-port admin-specified non-special access requirements. */
+    struct rangeset *ioport_normal;
+
     /* HVM stuff */
     struct hvm_domain   hvm_domain;
 
Index: 2006-09-11/xen/include/asm-x86/io.h
===================================================================
--- 2006-09-11.orig/xen/include/asm-x86/io.h	2006-09-11 13:39:58.000000000 +0200
+++ 2006-09-11/xen/include/asm-x86/io.h	2006-09-11 13:42:13.000000000 +0200
@@ -5,6 +5,8 @@
 #include <xen/types.h>
 #include <asm/page.h>
 
+struct cpu_user_regs;
+
 /* We don't need real ioremap() on Xen/x86. */
 #define ioremap(x,l) (__va(x))
 #define iounmap(p)   ((void)0)
@@ -19,32 +21,34 @@
 #define __OUT1(s,x) \
 static inline void out##s(unsigned x value, unsigned short port) {
 
-#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+#define __OUT2(s,s1) \
+    __asm__ __volatile__ ("out" #s " %" s1 "0,%w1" : : "a" (value), "Nd" (port)); \
+}
 
 #define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port));} 
-
-#define __IN1(s) \
-static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
-
-#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
-
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } 
-
-#define RETURN_TYPE unsigned char
-__IN(b,"")
-#undef RETURN_TYPE
-#define RETURN_TYPE unsigned short
-__IN(w,"")
-#undef RETURN_TYPE
-#define RETURN_TYPE unsigned int
-__IN(l,"")
-#undef RETURN_TYPE
+extern void out##s##_special(unsigned x value, unsigned short port, \
+                             struct cpu_user_regs *); \
+__OUT1(s,x) __OUT2(s,s1) \
+__OUT1(s##_p,x) __OUT2(s,s1)
+
+#define __IN1(s,x) \
+static inline unsigned x in##s(unsigned short port) { \
+    unsigned x value;
+
+#define __IN2(s,s1) \
+    __asm__ __volatile__ ("in" #s " %w1,%" s1 "0" : "=a" (value) : "Nd" (port)); \
+    return value; \
+}
+
+#define __IN(s,s1,x) \
+extern unsigned x in##s##_special(unsigned short port, \
+                                  struct cpu_user_regs *); \
+__IN1(s,x) __IN2(s,s1) \
+__IN1(s##_p,x) __IN2(s,s1)
+
+__IN(b,"b",char)
+__IN(w,"w",short)
+__IN(l,,int)
 
 __OUT(b,"b",char)
 __OUT(w,"w",short)
Index: 2006-09-11/xen/include/asm-x86/iocap.h
===================================================================
--- 2006-09-11.orig/xen/include/asm-x86/iocap.h	2006-09-11 13:39:58.000000000 +0200
+++ 2006-09-11/xen/include/asm-x86/iocap.h	2006-09-11 13:42:13.000000000 +0200
@@ -7,6 +7,15 @@
 #ifndef __X86_IOCAP_H__
 #define __X86_IOCAP_H__
 
+extern struct rangeset *global_ioport_caps;
+
+#define ioports_permit_access_all(s, e)                 \
+    rangeset_add_range(global_ioport_caps, s, e)
+#define ioports_deny_access_all(s, e)                   \
+    rangeset_remove_range(global_ioport_caps, s, e)
+#define ioports_any_access_permitted(s, e)              \
+    rangeset_contains_range(global_ioport_caps, s, e)
+
 #define ioports_permit_access(d, s, e)                  \
     rangeset_add_range((d)->arch.ioport_caps, s, e)
 #define ioports_deny_access(d, s, e)                    \
@@ -14,6 +23,13 @@
 #define ioports_access_permitted(d, s, e)               \
     rangeset_contains_range((d)->arch.ioport_caps, s, e)
 
+#define ioports_set_normal(d, s, e)                     \
+    rangeset_add_range((d)->arch.ioport_normal, s, e)
+#define ioports_set_special(d, s, e)                    \
+    rangeset_remove_range((d)->arch.ioport_normal, s, e)
+#define ioports_normal_access(d, s, e)                  \
+    rangeset_contains_range((d)->arch.ioport_normal, s, e)
+
 #define cache_flush_permitted(d)                       \
     (!rangeset_is_empty((d)->iomem_caps))
 
Index: 2006-09-11/xen/include/public/domctl.h
===================================================================
--- 2006-09-11.orig/xen/include/public/domctl.h	2006-09-11 09:06:11.000000000 +0200
+++ 2006-09-11/xen/include/public/domctl.h	2006-09-11 13:49:41.000000000 +0200
@@ -310,6 +310,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem
 
 
 #define XEN_DOMCTL_ioport_permission 21
+#define IOPORT_DENY_ACCESS 0
+#define IOPORT_ALLOW_ACCESS 1
+#define IOPORT_SET_NORMAL 2
+#define IOPORT_SET_SPECIAL 3
 struct xen_domctl_ioport_permission {
     uint32_t first_port;              /* first port int range */
     uint32_t nr_ports;                /* size of port range */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-11 16:12 [PATCH] enable port accesses with (almost) full register context Jan Beulich
@ 2006-09-11 16:19 ` Keir Fraser
  2006-09-12  7:15   ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Keir Fraser @ 2006-09-11 16:19 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 11/9/06 5:12 pm, "Jan Beulich" <jbeulich@novell.com> wrote:

> This helped HP getting certain system management software going (in
> dom0) that triggers SMIs and depends upon other than port number
> and data register values being visible to the SMI handler.

That's quite rough. The 'special' handlers do more than just register
restore/save: what's all the locking and other assorted bits and pieces
doing in there? The 'special/normal' distinction at the interface is (I
suppose to some extent unavoidably) ugly and non-obvious.

Would it be cleaner to allow dom0 to have really direct access to some I/O
ports by allowing it to set a real I/O bitmap? I implemented I/O bitmaps via
emulation mainly because it makes context switching faster and it is less of
a pain to keep admin and guest bitmasks in sync if they are checked
synchronously. But a direct dom0-only bitmap would be a bit easier: quick to
turn on/off and no need to sync with admin bitmaps. Main downside is that
it'll slow down context-switch paths a little bit.

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-11 16:19 ` Keir Fraser
@ 2006-09-12  7:15   ` Jan Beulich
  2006-09-12  7:53     ` Keir Fraser
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2006-09-12  7:15 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>>> Keir Fraser <Keir.Fraser@cl.cam.ac.uk> 11.09.06 18:19 >>>
>On 11/9/06 5:12 pm, "Jan Beulich" <jbeulich@novell.com> wrote:
>
>> This helped HP getting certain system management software going (in
>> dom0) that triggers SMIs and depends upon other than port number
>> and data register values being visible to the SMI handler.
>
>That's quite rough. The 'special' handlers do more than just register
>restore/save: what's all the locking and other assorted bits and pieces
>doing in there? The 'special/normal' distinction at the interface is (I
>suppose to some extent unavoidably) ugly and non-obvious.

That is because of the self modifying code that needs proper MP
synchronization. I know it's looking ugly, but I considered this the most
reasonable approach.
I'm not sure I understand what ugliness you find in the special/normal
distinction logic; one thing I'm thinking of is the additional meaning
added to the hypercall interface - I simply didn't want to introduce a
new sub-function there, especially since the existing one provided
ample room for the needed addition. But certainly, if you want that
changed, should be easily doable (even without significantly affecting
HP's code already utilizing the interface as we added it to our 3.0.2).

>Would it be cleaner to allow dom0 to have really direct access to some I/O
>ports by allowing it to set a real I/O bitmap? I implemented I/O bitmaps via
>emulation mainly because it makes context switching faster and it is less of
>a pain to keep admin and guest bitmasks in sync if they are checked
>synchronously. But a direct dom0-only bitmap would be a bit easier: quick to
>turn on/off and no need to sync with admin bitmaps. Main downside is that
>it'll slow down context-switch paths a little bit.

I considered that too, but rejected it because of opening these ports to
vm86 mode then, too (as I/O instructions are *not* susceptible to iopl there,
they only depend on the bitmap).

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-12  7:15   ` Jan Beulich
@ 2006-09-12  7:53     ` Keir Fraser
  2006-09-12  9:03       ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Keir Fraser @ 2006-09-12  7:53 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 12/9/06 8:15 am, "Jan Beulich" <jbeulich@novell.com> wrote:

> That is because of the self modifying code that needs proper MP
> synchronization. I know it's looking ugly, but I considered this the most
> reasonable approach.

Why is more synchonisation needed for emulation of these SMI port accesses
than you'd have for direct execution? I.e., if the accesses were executed
natively on an SMP system there'd be none of the extra synchronisation you
added happening. The instructions would be directly executed.

> I considered that too, but rejected it because of opening these ports to
> vm86 mode then, too (as I/O instructions are *not* susceptible to iopl there,
> they only depend on the bitmap).

I/O bitmap always overrides IOPL, in every execution mode. Why is vm86 mode
a particular concern? I was thinking that dom0 would switch on the direct
bitmap access only for the process(es) that requested it. We wouldn't want
direct access to be available to every process in dom0.

Not that I'm certain direct access is better than 'special emulation'. But
I'm not applying the existing patch unless I understand exactly why it needs
to do everything that it does. I'm in no rush -- supporting some piece of HP
closed-source management software isn't top priority for us, I'd say.

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-12  7:53     ` Keir Fraser
@ 2006-09-12  9:03       ` Jan Beulich
  2006-09-12  9:50         ` Keir Fraser
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2006-09-12  9:03 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>>> Keir Fraser <Keir.Fraser@cl.cam.ac.uk> 12.09.06 09:53 >>>
>On 12/9/06 8:15 am, "Jan Beulich" <jbeulich@novell.com> wrote:
>
>> That is because of the self modifying code that needs proper MP
>> synchronization. I know it's looking ugly, but I considered this the most
>> reasonable approach.
>
>Why is more synchonisation needed for emulation of these SMI port accesses
>than you'd have for direct execution? I.e., if the accesses were executed
>natively on an SMP system there'd be none of the extra synchronisation you
>added happening. The instructions would be directly executed.

Again, I'm using self-modifying code there (to store the port number, as I
can't reliably use %dx for it if the original instruction happened to be one
with immediate operand, and %edx/%rdx happens to carry relevant data
for the SMI handler), which is what needs synchronization.

>> I considered that too, but rejected it because of opening these ports to
>> vm86 mode then, too (as I/O instructions are *not* susceptible to iopl there,
>> they only depend on the bitmap).
>
>I/O bitmap always overrides IOPL, in every execution mode. Why is vm86 mode
>a particular concern? I was thinking that dom0 would switch on the direct

You're right, of course - all modes are relevant here.

>bitmap access only for the process(es) that requested it. We wouldn't want
>direct access to be available to every process in dom0.

True. With that I agree installing the bitmap in the TSS would allow solving
the problem, too. Still I think the necessary overhead (you'd need to copy
the bitmap and keep it sync-ed, or make it read-only, for the direct access
to not be abusable) would be larger than using the special access method.

>Not that I'm certain direct access is better than 'special emulation'. But
>I'm not applying the existing patch unless I understand exactly why it needs
>to do everything that it does. I'm in no rush -- supporting some piece of HP
>closed-source management software isn't top priority for us, I'd say.

Which I can easily understand; nevertheless I seem to recall that we had
talked about the issue when it was first brought up (at least 3 months back),
and you seemed in agreement that the nature of the problem warrants a fix.

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-12  9:03       ` Jan Beulich
@ 2006-09-12  9:50         ` Keir Fraser
  2006-09-12 10:32           ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Keir Fraser @ 2006-09-12  9:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 12/9/06 10:03, "Jan Beulich" <jbeulich@novell.com> wrote:

> Again, I'm using self-modifying code there (to store the port number, as I
> can't reliably use %dx for it if the original instruction happened to be one
> with immediate operand, and %edx/%rdx happens to carry relevant data
> for the SMI handler), which is what needs synchronization.

Ok, I see. I think it would be neater to build the code on the stack, or
some other per-cpu area, and avoid the synchronisation. We have no plans to
use the PAGE_NX flag in Xen itself, and x86/64 already has stack
trampolines. Perhaps the register save/restore code could be tidied too,
since it's not performance critical. It's not at all uniform like I'd
expect, with those interleaved push/pop/mov instructions. How about
something more like:
 pushad; call restore_guest_regs; <I/o port access>; popad
Where restore_guest_regs takes a regparm, and (obviously) restores the
regparm register last. I'd only do it as a call because it'd be ugly to
dynamically build that amount of code.

I'm not sure about the full extent of the interface changes either. How
about we add a new sysctl for specifying ports which need 'direct
execution'. It makes sense to make it a sysctl because this is a property of
the I/O port (or assumptions about it encoded in the platform firmware)
rather than a per-domain issue, or something that I think should be visible
at the physdev_op interface.

We'd test the per-port direct-execution flag for any port access by any
domain. After all, the only reason we don't use the new code for *all* port
accesses is concern about performance. I think calling this 'direct
execution' versus 'emulation' at the interface is fair -- even though we
emulate in all cases, in the former case it will be Xen's responsibility to
do all that is necessary to make it appear to the BIOS that the instruction
was executed directly, as when running natively.

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-12  9:50         ` Keir Fraser
@ 2006-09-12 10:32           ` Jan Beulich
  2006-09-12 11:28             ` Keir Fraser
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2006-09-12 10:32 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>>> Keir Fraser <Keir.Fraser@cl.cam.ac.uk> 12.09.06 11:50 >>>
>On 12/9/06 10:03, "Jan Beulich" <jbeulich@novell.com> wrote:
>
>> Again, I'm using self-modifying code there (to store the port number, as I
>> can't reliably use %dx for it if the original instruction happened to be one
>> with immediate operand, and %edx/%rdx happens to carry relevant data
>> for the SMI handler), which is what needs synchronization.
>
>Ok, I see. I think it would be neater to build the code on the stack, or
>some other per-cpu area, and avoid the synchronisation. We have no plans to
>use the PAGE_NX flag in Xen itself, and x86/64 already has stack
>trampolines. Perhaps the register save/restore code could be tidied too,
>since it's not performance critical. It's not at all uniform like I'd
>expect, with those interleaved push/pop/mov instructions. How about
>something more like:
> pushad; call restore_guest_regs; <I/o port access>; popad
>Where restore_guest_regs takes a regparm, and (obviously) restores the
>regparm register last. I'd only do it as a call because it'd be ugly to
>dynamically build that amount of code.

Hm, I don't like this on-the-fly building of code very much, and I also don't
like writing assembly code that can obviously written to perform better. Also,
on 64-bits the code wouldn't look so much nicer since there's no {push,pop}ad.
But certainly, if you refuse to take the patch without changing that...

>I'm not sure about the full extent of the interface changes either. How
>about we add a new sysctl for specifying ports which need 'direct
>execution'. It makes sense to make it a sysctl because this is a property of
>the I/O port (or assumptions about it encoded in the platform firmware)
>rather than a per-domain issue, or something that I think should be visible
>at the physdev_op interface.
>
>We'd test the per-port direct-execution flag for any port access by any
>domain. After all, the only reason we don't use the new code for *all* port
>accesses is concern about performance. I think calling this 'direct
>execution' versus 'emulation' at the interface is fair -- even though we
>emulate in all cases, in the former case it will be Xen's responsibility to
>do all that is necessary to make it appear to the BIOS that the instruction
>was executed directly, as when running natively.

That sounds right (and better than the current way). I'll do that change,
though I guess I'd still not call it direct execution.

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-12 10:32           ` Jan Beulich
@ 2006-09-12 11:28             ` Keir Fraser
  2006-09-13  9:46               ` Jan Beulich
  2006-09-18 14:10               ` Jan Beulich
  0 siblings, 2 replies; 15+ messages in thread
From: Keir Fraser @ 2006-09-12 11:28 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 12/9/06 11:32, "Jan Beulich" <jbeulich@novell.com> wrote:

> Hm, I don't like this on-the-fly building of code very much, and I also don't
> like writing assembly code that can obviously written to perform better. Also,
> on 64-bits the code wouldn't look so much nicer since there's no {push,pop}ad.
> But certainly, if you refuse to take the patch without changing that...

IMO you're doing code building anyway, but just of one instruction. You get
rid of the locking by doing it to a per-CPU buffer, and the stack is the
obvious place, calling out to register save/restore code. I don't really
care about the performance of the save/restore code -- it's obviously going
to be trivial compared with the unavoidable trap-and-emulate cost. Also, do
you need separate save/restore code for IN vs. OUT instructions?

Something like:
    call save_host_restore_guest
    <IN or OUT>
    call save_guest_restore_host
    ret

Would that be reasonable?

> That sounds right (and better than the current way). I'll do that change,
> though I guess I'd still not call it direct execution.

'Special' is a crappy description because it's so non-specific. How about
'BIOS' ports? I can't think of any reason that emulating these accesses
could be a problem, except that BIOS/firmware is trapping them and expecting
more context than the hardware instruction defines as being required.

Alternatively, perhaps we could get rid of the distinction and emulate all
port accesses in this way? I suspect that the cost of state save/restore and
building the trampoline is dwarfed by the cost of the GPF and even the cost
of the I/O port access itself (they don't tend to be super fast). Could you
do a few quick measurements to determine this? If the extra cost is less
than, say, 10%, I'd be inclined to take the hit to avoid interface changes.

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-12 11:28             ` Keir Fraser
@ 2006-09-13  9:46               ` Jan Beulich
  2006-09-13 12:10                 ` Keir Fraser
  2006-09-18 14:10               ` Jan Beulich
  1 sibling, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2006-09-13  9:46 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

[-- Attachment #1: Type: text/plain, Size: 2612 bytes --]

>IMO you're doing code building anyway, but just of one instruction. You get
>rid of the locking by doing it to a per-CPU buffer, and the stack is the
>obvious place, calling out to register save/restore code. I don't really
>care about the performance of the save/restore code -- it's obviously going
>to be trivial compared with the unavoidable trap-and-emulate cost. Also, do
>you need separate save/restore code for IN vs. OUT instructions?

Actually, in the code I currently have I do. This is because for out-s I need
to merge the value output with the user-specified rAX, under the
assumption that output value and register contents are not always identical
(i.e. if particular bits within a port would need to be special treated by Xen,
which I can easily imagine to be required at some point).

>Something like:
>    call save_host_restore_guest
>    <IN or OUT>
>    call save_guest_restore_host
>    ret
>
>Would that be reasonable?

It would, provided the above assumption about the need to modify the
output value would never become true. Additionally, for 64-bits, I'm
concerned about the potential need for using indirect calls here (as well
as in the syscall trampolines): there's nothing keeping a user from making
the Xen heap 2Gb or more in size. These would further slow things down,
but depending on the nature of allocations made from the Xen heap it
may also be possible to simply place an upper limit on the heap size, as
it currently is assumed adjacent to the Xen image (but taking memory
holes at rather low addresses into account a user may even be required
to bump the heap size significantly - what if only a few Mb of memory
below 4Gb existed? - since, after all, the heap size is the size of address
space consumed, not the amount of memory used).

>Alternatively, perhaps we could get rid of the distinction and emulate all
>port accesses in this way? I suspect that the cost of state save/restore and
>building the trampoline is dwarfed by the cost of the GPF and even the cost
>of the I/O port access itself (they don't tend to be super fast). Could you
>do a few quick measurements to determine this? If the extra cost is less
>than, say, 10%, I'd be inclined to take the hit to avoid interface changes.

Percentages of full-context relative to simply emulated i/o, without having
changed the assembly file approach to the stub building one, yet (as per
above issues):

PentiumIII (32-bit) with locking	67%
PentiumIII (32-bit) without locking	84%
Pentium4 (64-bit) with locking		86%
Pentium4 (64-bit) without locking	89%

Revised patch (domctl->sysctl, naming) attached.

Jan

[-- Attachment #2: xen-x86-io-register-context-2.patch --]
[-- Type: text/plain, Size: 25202 bytes --]

From:  Jan Beulich
Bugzilla #192150

Index: 2006-09-11/xen/arch/x86/domain_build.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/domain_build.c	2006-09-11 13:50:54.000000000 +0200
+++ 2006-09-11/xen/arch/x86/domain_build.c	2006-09-13 11:05:06.000000000 +0200
@@ -33,6 +33,11 @@
 extern unsigned long initial_images_nrpages(void);
 extern void discard_initial_images(void);
 
+/* I/O-port Xen-enforced or admin-specified access control. */
+struct rangeset *ioport_caps = NULL;
+/* I/O-port admin-specified non-special access requirements. */
+struct rangeset *ioport_emul = NULL;
+
 static long dom0_nrpages;
 
 /*
@@ -64,6 +69,12 @@ integer_param("dom0_max_vcpus", opt_dom0
 static unsigned int opt_dom0_shadow;
 boolean_param("dom0_shadow", opt_dom0_shadow);
 
+static char opt_ioports_noemul[200] = "";
+string_param("ioports_noemul", opt_ioports_noemul);
+
+static char opt_ioports_disable[200] = "";
+string_param("ioports_disable", opt_ioports_disable);
+
 static char opt_dom0_ioports_disable[200] = "";
 string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
 
@@ -102,10 +113,10 @@ static struct page_info *alloc_chunk(str
     return page;
 }
 
-static void process_dom0_ioports_disable(void)
+static void process_ioports(char *opt)
 {
     unsigned long io_from, io_to;
-    char *t, *u, *s = opt_dom0_ioports_disable;
+    char *t, *u, *s = opt;
 
     if ( *s == '\0' )
         return;
@@ -117,7 +128,9 @@ static void process_dom0_ioports_disable
         {
         parse_error:
             printk("Invalid ioport range <%s> "
-                   "in dom0_ioports_disable, skipping\n", t);
+                   "in %sioports_%s, skipping\n", t,
+                   opt == opt_dom0_ioports_disable ? "dom0_" : "",
+                   opt != opt_ioports_noemul ? "disable" : "noemul");
             continue;
         }
 
@@ -131,11 +144,26 @@ static void process_dom0_ioports_disable
         if ( (*u != '\0') || (io_to < io_from) || (io_to >= 65536) )
             goto parse_error;
 
-        printk("Disabling dom0 access to ioport range %04lx-%04lx\n",
-            io_from, io_to);
+        if ( opt != opt_ioports_noemul )
+        {
+            printk("Disabling %saccess to ioport range %04lx-%04lx\n",
+                opt != opt_ioports_disable ? "dom0 " : "",
+                io_from, io_to);
 
-        if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
-            BUG();
+            if ( opt == opt_ioports_disable
+                 && ioports_deny_access_all(io_from, io_to) != 0 )
+                BUG();
+            if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
+                BUG();
+        }
+        else
+        {
+            printk("Setting non-emulated access for ioport range %04lx-%04lx\n",
+                io_from, io_to);
+
+            if ( ioports_set_noemul(io_from, io_to) != 0 )
+                BUG();
+        }
     }
 }
 
@@ -815,6 +843,13 @@ int construct_dom0(struct domain *d,
 
     rc = 0;
 
+    /* Command-line I/O ranges. */
+    ioport_caps = rangeset_new(NULL,
+                               "global I/O Port access control",
+                               RANGESETF_prettyprint_hex);
+    BUG_ON(!ioport_caps);
+    rc |= ioports_permit_access_all(0, 0xFFFF);
+
     /* DOM0 is permitted full I/O capabilities. */
     rc |= ioports_permit_access(dom0, 0, 0xFFFF);
     rc |= iomem_permit_access(dom0, 0UL, ~0UL);
@@ -824,15 +859,20 @@ int construct_dom0(struct domain *d,
      * Modify I/O port access permissions.
      */
     /* Master Interrupt Controller (PIC). */
+    rc |= ioports_deny_access_all(0x20, 0x21);
     rc |= ioports_deny_access(dom0, 0x20, 0x21);
     /* Slave Interrupt Controller (PIC). */
+    rc |= ioports_deny_access_all(0xA0, 0xA1);
     rc |= ioports_deny_access(dom0, 0xA0, 0xA1);
     /* Interval Timer (PIT). */
+    rc |= ioports_deny_access_all(0x40, 0x43);
     rc |= ioports_deny_access(dom0, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
+    rc |= ioports_deny_access_all(0x61, 0x61);
     rc |= ioports_deny_access(dom0, 0x61, 0x61);
     /* Command-line I/O ranges. */
-    process_dom0_ioports_disable();
+    process_ioports(opt_ioports_disable);
+    process_ioports(opt_dom0_ioports_disable);
 
     /*
      * Modify I/O memory access permissions.
@@ -851,6 +891,14 @@ int construct_dom0(struct domain *d,
             rc |= iomem_deny_access(dom0, mfn, mfn);
     }
 
+    /* Command-line I/O ranges requiring full register context access. */
+    ioport_emul = rangeset_new(NULL,
+                               "I/O Port emulation control",
+                               RANGESETF_prettyprint_hex);
+    BUG_ON(!ioport_emul);
+    rc |= ioports_set_emul(0, 0xFFFF);
+    process_ioports(opt_ioports_noemul);
+
     BUG_ON(rc != 0);
 
     return 0;
Index: 2006-09-11/xen/arch/x86/sysctl.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/sysctl.c	2006-08-28 08:32:37.000000000 +0200
+++ 2006-09-11/xen/arch/x86/sysctl.c	2006-09-13 09:47:23.000000000 +0200
@@ -57,6 +57,23 @@ long arch_do_sysctl(
     }
     break;
     
+    case XEN_SYSCTL_ioport_emulation:
+    {
+        unsigned int fp = sysctl->u.ioport_emulation.first_port;
+        unsigned int np = sysctl->u.ioport_emulation.nr_ports;
+
+        ret = -EINVAL;
+        if ( (fp + np) > 65536 )
+            break;
+
+        if ( np == 0 )
+            ret = 0;
+        else if ( sysctl->u.ioport_emulation.emulate )
+            ret = ioports_set_emul(fp, fp + np - 1);
+        else
+            ret = ioports_set_noemul(fp, fp + np - 1);
+    }
+    break;
 
     default:
         ret = -ENOSYS;
Index: 2006-09-11/xen/arch/x86/traps.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/traps.c	2006-09-11 13:50:54.000000000 +0200
+++ 2006-09-11/xen/arch/x86/traps.c	2006-09-13 11:05:07.000000000 +0200
@@ -1002,16 +1002,76 @@ static inline int admin_io_okay(
     return ioports_access_permitted(v->domain, port, port + bytes - 1);
 }
 
+/* Can the I/O access be carried out without full register context? */
+static inline int normal_io_okay(
+    unsigned int port, unsigned int bytes)
+{
+    return ioports_emul(port, port + bytes - 1);
+}
+
 /* Check admin limits. Silently fail the access if it is disallowed. */
-#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
-#define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
-#define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
-#define outb_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
-#define outw_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
-#define outl_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
+static inline unsigned char inb_user(
+    unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 1, v, regs) )
+        return string || normal_io_okay(port, 1)
+               ? inb(port)
+               : inb_special(port, regs);
+    return ~0;
+}
+
+static inline unsigned short inw_user(
+    unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 2, v, regs) )
+        return string || normal_io_okay(port, 2)
+               ? inw(port)
+               : inw_special(port, regs);
+    return ~0;
+}
+
+static inline unsigned int inl_user(
+    unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 4, v, regs) )
+        return string || normal_io_okay(port, 4)
+               ? inl(port)
+               : inl_special(port, regs);
+    return ~0;
+}
+
+static inline void outb_user(
+    unsigned char value, unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 1, v, regs) )
+        string || normal_io_okay(port, 1)
+        ? outb(value, port)
+        : outb_special(value, port, regs);
+}
+
+static inline void outw_user(
+    unsigned short value, unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 2, v, regs) )
+        string || normal_io_okay(port, 2)
+        ? outw(value, port)
+        : outw_special(value, port, regs);
+}
+
+static inline void outl_user(
+    unsigned int value, unsigned int port, int string,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 4, v, regs) )
+        string || normal_io_okay(port, 4)
+        ? outl(value, port)
+        : outl_special(value, port, regs);
+}
 
 /* Instruction fetch with error handling. */
 #define insn_fetch(_type, _size, _ptr)                                      \
@@ -1087,13 +1140,13 @@ static int emulate_privileged_op(struct 
             switch ( op_bytes )
             {
             case 1:
-                data = (u8)inb_user((u16)regs->edx, v, regs);
+                data = (u8)inb_user((u16)regs->edx, 1, v, regs);
                 break;
             case 2:
-                data = (u16)inw_user((u16)regs->edx, v, regs);
+                data = (u16)inw_user((u16)regs->edx, 1, v, regs);
                 break;
             case 4:
-                data = (u32)inl_user((u16)regs->edx, v, regs);
+                data = (u32)inl_user((u16)regs->edx, 1, v, regs);
                 break;
             }
             if ( (rc = copy_to_user((void *)regs->edi, &data, op_bytes)) != 0 )
@@ -1119,13 +1172,13 @@ static int emulate_privileged_op(struct 
             switch ( op_bytes )
             {
             case 1:
-                outb_user((u8)data, (u16)regs->edx, v, regs);
+                outb_user((u8)data, (u16)regs->edx, 1, v, regs);
                 break;
             case 2:
-                outw_user((u16)data, (u16)regs->edx, v, regs);
+                outw_user((u16)data, (u16)regs->edx, 1, v, regs);
                 break;
             case 4:
-                outl_user((u32)data, (u16)regs->edx, v, regs);
+                outl_user((u32)data, (u16)regs->edx, 1, v, regs);
                 break;
             }
             regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
@@ -1156,14 +1209,14 @@ static int emulate_privileged_op(struct 
         {
         case 1:
             regs->eax &= ~0xffUL;
-            regs->eax |= (u8)inb_user(port, v, regs);
+            regs->eax |= (u8)inb_user(port, 0, v, regs);
             break;
         case 2:
             regs->eax &= ~0xffffUL;
-            regs->eax |= (u16)inw_user(port, v, regs);
+            regs->eax |= (u16)inw_user(port, 0, v, regs);
             break;
         case 4:
-            regs->eax = (u32)inl_user(port, v, regs);
+            regs->eax = (u32)inl_user(port, 0, v, regs);
             break;
         }
         goto done;
@@ -1184,13 +1237,13 @@ static int emulate_privileged_op(struct 
         switch ( op_bytes )
         {
         case 1:
-            outb_user((u8)regs->eax, port, v, regs);
+            outb_user((u8)regs->eax, port, 0, v, regs);
             break;
         case 2:
-            outw_user((u16)regs->eax, port, v, regs);
+            outw_user((u16)regs->eax, port, 0, v, regs);
             break;
         case 4:
-            outl_user((u32)regs->eax, port, v, regs);
+            outl_user((u32)regs->eax, port, 0, v, regs);
             break;
         }
         goto done;
Index: 2006-09-11/xen/arch/x86/x86_32/Makefile
===================================================================
--- 2006-09-11.orig/xen/arch/x86/x86_32/Makefile	2006-09-11 13:50:54.000000000 +0200
+++ 2006-09-11/xen/arch/x86/x86_32/Makefile	2006-09-11 13:52:31.000000000 +0200
@@ -1,5 +1,6 @@
 obj-y += domain_page.o
 obj-y += entry.o
+obj-y += io.o
 obj-y += mm.o
 obj-y += seg_fixup.o
 obj-y += traps.o
Index: 2006-09-11/xen/arch/x86/x86_32/io.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 2006-09-11/xen/arch/x86/x86_32/io.S	2006-09-13 11:04:05.000000000 +0200
@@ -0,0 +1,155 @@
+/*
+ * Special (full-context) I/O handling routines.
+ *
+ * Copyright (c) 2006, Novell, Inc.
+ */
+
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+
+#define PROTECTED_PORT 0x20 // must be some port no domain will ever be granted access
+
+ .macro in_special w, a
+ENTRY(in\w\(_special))
+	movl	2*4(%esp), %ecx
+	pushl	%ebx
+	movl	UREGS_ebx(%ecx), %ebx
+	movl	UREGS_edx(%ecx), %edx
+	pushl	%ebp
+	movl	UREGS_ebp(%ecx), %ebp
+	movl	UREGS_eax(%ecx), %eax
+	pushl	%esi
+	movl	UREGS_esi(%ecx), %esi
+	movw	4*4(%esp), %dx
+	pushl	%edi
+	testb	%dh, %dh
+	movl	UREGS_edi(%ecx), %edi
+	jz	.Ltry_lock_in\w
+	movl	UREGS_ecx(%ecx), %ecx
+	in\w	%dx, %\a
+ .ifndef .Lin_restore
+.Lin_restore:
+	movl	%ecx, 5*4(%esp)
+	movl	6*4(%esp), %ecx
+	movl	%edx, UREGS_edx(%ecx)
+	movl	5*4(%esp), %edx
+	movl	%edi, UREGS_edi(%ecx)
+	popl	%edi
+	movl	%esi, UREGS_esi(%ecx)
+	popl	%esi
+	movl	%ebp, UREGS_ebp(%ecx)
+	popl	%ebp
+	movl	%ebx, UREGS_ebx(%ecx)
+	popl	%ebx
+	movl	%eax, UREGS_eax(%ecx)
+	movl	%edx, UREGS_ecx(%ecx)
+	ret
+ .else
+ 	jmp	.Lin_restore
+ .endif
+.Ltry_lock_in\w:
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, %al
+	lock cmpxchgb %dl, .Lport_in\w
+	jne	.Lretry_in\w
+	movl	UREGS_eax(%ecx), %eax
+#else
+	movb	%dl, .Lport_in\w
+#endif
+	movl	UREGS_edx(%ecx), %edx
+	movl	UREGS_ecx(%ecx), %ecx
+	jmp	.Limm_in\w
+ .data # .section .wtext, "axw"
+.Limm_in\w:
+	in\w	$PROTECTED_PORT, %\a
+ .equiv .Lport_in\w, . - 1
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, .Lport_in\w
+#endif
+ 	jmp	.Lin_restore
+ .previous
+#ifdef CONFIG_SMP
+.Lretry_in\w:
+	pause
+	jmp	.Ltry_lock_in\w
+#endif
+ .endm
+
+	in_special b, al
+	in_special w, ax
+	in_special l, eax
+
+ .macro out_special w, a
+ENTRY(out\w\(_special))
+	movl	3*4(%esp), %ecx
+	pushl	%ebx
+	movl	UREGS_ebx(%ecx), %ebx
+	movl	UREGS_edx(%ecx), %edx
+	pushl	%ebp
+	movl	UREGS_ebp(%ecx), %ebp
+ .ifnes "\w", "l"
+	movl	UREGS_eax(%ecx), %eax
+ .endif
+	pushl	%esi
+	movl	UREGS_esi(%ecx), %esi
+	movw	5*4(%esp), %dx
+	pushl	%edi
+	testb	%dh, %dh
+	movl	UREGS_edi(%ecx), %edi
+	mov\w	5*4(%esp), %\a
+	jz	.Llock_out\w
+	movl	UREGS_ecx(%ecx), %ecx
+	out\w	%\a, %dx
+ .ifndef .Lout_restore
+.Lout_restore:
+	movl	%ecx, 5*4(%esp)
+	movl	7*4(%esp), %ecx
+	movl	%edx, UREGS_edx(%ecx)
+	movl	5*4(%esp), %edx
+	movl	%edi, UREGS_edi(%ecx)
+	popl	%edi
+	movl	%esi, UREGS_esi(%ecx)
+	popl	%esi
+	movl	%ebp, UREGS_ebp(%ecx)
+	popl	%ebp
+	movl	%ebx, UREGS_ebx(%ecx)
+	popl	%ebx
+	movl	%eax, UREGS_eax(%ecx)
+	movl	%edx, UREGS_ecx(%ecx)
+	ret
+ .else
+	jmp	.Lout_restore
+ .endif
+.Llock_out\w:
+#ifdef CONFIG_SMP
+	pushl	%eax
+.Ltry_lock_out\w:
+	movb	$PROTECTED_PORT, %al
+	lock cmpxchgb %dl, .Lport_out\w
+	jne	.Lretry_out\w
+	popl	%eax
+#else
+	movb	%dl, .Lport_out\w
+#endif
+	movl	UREGS_edx(%ecx), %edx
+	movl	UREGS_ecx(%ecx), %ecx
+	jmp	.Limm_out\w
+ .data # .section .wtext, "axw"
+.Limm_out\w:
+	out\w	%\a, $PROTECTED_PORT
+ .equiv .Lport_out\w, . - 1
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, .Lport_out\w
+#endif
+	jmp	.Lout_restore
+ .previous
+#ifdef CONFIG_SMP
+.Lretry_out\w:
+	pause
+	jmp	.Ltry_lock_out\w
+#endif
+ .endm
+
+	out_special b, al
+	out_special w, ax
+	out_special l, eax
Index: 2006-09-11/xen/arch/x86/x86_64/Makefile
===================================================================
--- 2006-09-11.orig/xen/arch/x86/x86_64/Makefile	2006-09-11 13:50:54.000000000 +0200
+++ 2006-09-11/xen/arch/x86/x86_64/Makefile	2006-09-13 11:05:05.000000000 +0200
@@ -1,3 +1,4 @@
 obj-y += entry.o
+obj-y += io.o
 obj-y += mm.o
 obj-y += traps.o
Index: 2006-09-11/xen/arch/x86/x86_64/io.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 2006-09-11/xen/arch/x86/x86_64/io.S	2006-09-13 11:04:38.000000000 +0200
@@ -0,0 +1,202 @@
+/*
+ * Special (full-context) I/O handling routines.
+ *
+ * Copyright (c) 2006, Novell, Inc.
+ */
+
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+
+#define PROTECTED_PORT 0x20 // must be some port no domain will ever be granted access
+
+ .macro in_special w, a
+ENTRY(in\w\(_special))
+	pushq	%rbx
+	movq	UREGS_rbx(%rsi), %rbx
+	movq	UREGS_rdx(%rsi), %rdx
+	pushq	%rbp
+	movq	UREGS_rbp(%rsi), %rbp
+	movq	UREGS_rax(%rsi), %rax
+	pushq	%r12
+	movq	UREGS_r12(%rsi), %r12
+	movq	UREGS_r8(%rsi), %r8
+	pushq	%r13
+	movq	UREGS_r13(%rsi), %r13
+	movq	UREGS_r9(%rsi), %r9
+	pushq	%r14
+	movq	UREGS_r14(%rsi), %r14
+	movq	UREGS_r10(%rsi), %r10
+	movw	%di, %dx
+	pushq	%r15
+	movq	UREGS_r15(%rsi), %r15
+	movq	UREGS_r11(%rsi), %r11
+	testb	%dh, %dh
+	movq	UREGS_rcx(%rsi), %rcx
+	movq	UREGS_rdi(%rsi), %rdi
+	pushq	%rsi
+	jz	.Ltry_lock_in\w
+	movq	UREGS_rsi(%rsi), %rsi
+	in\w	%dx, %\a
+ .ifndef .Lin_restore
+.Lin_restore:
+	pushq	%rsi
+	movq	8(%rsp), %rsi
+	movq	%rdx, UREGS_rdx(%rsi)
+	popq	UREGS_rsi(%rsi)
+	movq	%rdi, UREGS_rdi(%rsi)
+	movq	%rcx, UREGS_rcx(%rsi)
+	popq	%rdx
+	movq	%r11, UREGS_r11(%rsi)
+	movq	%r15, UREGS_r15(%rsi)
+	popq	%r15
+	movq	%r10, UREGS_r10(%rsi)
+	movq	%r14, UREGS_r14(%rsi)
+	popq	%r14
+	movq	%r9, UREGS_r9(%rsi)
+	movq	%r13, UREGS_r13(%rsi)
+	popq	%r13
+	movq	%r8, UREGS_r8(%rsi)
+	movq	%r12, UREGS_r12(%rsi)
+	popq	%r12
+	movq	%rbp, UREGS_rbp(%rsi)
+	popq	%rbp
+	movq	%rbx, UREGS_rbx(%rsi)
+	popq	%rbx
+	movq	%rax, UREGS_rax(%rsi)
+	ret
+ .else
+ 	jmp	.Lin_restore
+ .endif
+.Ltry_lock_in\w:
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, %al
+	lock cmpxchgb %dl, .Lport_in\w(%rip)
+	jne	.Lretry_in\w
+	movb	UREGS_rax(%rsi), %al
+#else
+	movb	%dl, .Lport_in\w(%rip)
+#endif
+	movq	UREGS_rdx(%rsi), %rdx
+	movq	UREGS_rsi(%rsi), %rsi
+	jmp	.Limm_in\w
+ .data # .section .wtext, "axw"
+.Limm_in\w:
+	in\w	$PROTECTED_PORT, %\a
+ .equiv .Lport_in\w, . - 1
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, .Lport_in\w(%rip)
+#endif
+ 	jmp	.Lin_restore
+ .previous
+#ifdef CONFIG_SMP
+.Lretry_in\w:
+	pause
+	jmp	.Ltry_lock_in\w
+#endif
+ .endm
+
+	in_special b, al
+	in_special w, ax
+	in_special l, eax
+
+ .macro out_special w, a, v
+ENTRY(out\w\(_special))
+	movq	%rdx, %rcx
+	pushq	%rbx
+	movq	UREGS_rbx(%rdx), %rbx
+	movq	UREGS_rdx(%rdx), %rdx
+	pushq	%rbp
+	movq	UREGS_rbp(%rcx), %rbp
+	movq	UREGS_rax(%rcx), %rax
+	pushq	%r12
+	movq	UREGS_r12(%rcx), %r12
+	movq	UREGS_r8(%rcx), %r8
+	pushq	%r13
+	movq	UREGS_r13(%rcx), %r13
+	movq	UREGS_r9(%rcx), %r9
+	pushq	%r14
+	movq	UREGS_r14(%rcx), %r14
+	movq	UREGS_r10(%rcx), %r10
+	movw	%si, %dx
+	pushq	%r15
+	movq	UREGS_r15(%rcx), %r15
+	movq	UREGS_r11(%rcx), %r11
+ .ifnes "\w", "l"
+	mov\w	%\v, %\a
+ .else
+ .ifnes "\v", "edi"
+ .err
+ .endif
+	shrdq	$32, %rdi, %rax
+	rolq	$32, %rax
+ .endif
+	testb	%dh, %dh
+	movq	UREGS_rsi(%rcx), %rsi
+	movq	UREGS_rdi(%rcx), %rdi
+	pushq	%rcx
+	jz	.Llock_out\w
+	movq	UREGS_rcx(%rcx), %rcx
+	out\w	%\a, %dx
+ .ifndef .Lout_restore
+.Lout_restore:
+	pushq	%rcx
+	movq	8(%rsp), %rcx
+	movq	%rdx, UREGS_rdx(%rcx)
+	popq	UREGS_rcx(%rcx)
+	movq	%rdi, UREGS_rdi(%rcx)
+	movq	%rsi, UREGS_rsi(%rcx)
+	popq	%rdx
+	movq	%r11, UREGS_r11(%rcx)
+	movq	%r15, UREGS_r15(%rcx)
+	popq	%r15
+	movq	%r10, UREGS_r10(%rcx)
+	movq	%r14, UREGS_r14(%rcx)
+	popq	%r14
+	movq	%r9, UREGS_r9(%rcx)
+	movq	%r13, UREGS_r13(%rcx)
+	popq	%r13
+	movq	%r8, UREGS_r8(%rcx)
+	movq	%r12, UREGS_r12(%rcx)
+	popq	%r12
+	movq	%rbp, UREGS_rbp(%rcx)
+	popq	%rbp
+	movq	%rbx, UREGS_rbx(%rcx)
+	popq	%rbx
+	movq	%rax, UREGS_rax(%rcx)
+	ret
+ .else
+	jmp	.Lout_restore
+ .endif
+.Llock_out\w:
+#ifdef CONFIG_SMP
+	pushq	%rax
+.Ltry_lock_out\w:
+	movb	$PROTECTED_PORT, %al
+	lock cmpxchgb %dl, .Lport_out\w(%rip)
+	jne	.Lretry_out\w
+	popq	%rax
+#else
+	movb	%dl, .Lport_out\w(%rip)
+#endif
+	movq	UREGS_rdx(%rcx), %rdx
+	movq	UREGS_rcx(%rcx), %rcx
+	jmp	.Limm_out\w
+ .data # .section .wtext, "axw"
+.Limm_out\w:
+	out\w	%\a, $PROTECTED_PORT
+ .equiv .Lport_out\w, . - 1
+#ifdef CONFIG_SMP
+	movb	$PROTECTED_PORT, .Lport_out\w(%rip)
+#endif
+	jmp	.Lout_restore
+ .previous
+#ifdef CONFIG_SMP
+.Lretry_out\w:
+	pause
+	jmp	.Ltry_lock_out\w
+#endif
+ .endm
+
+	out_special b, al, dil
+	out_special w, ax, di
+	out_special l, eax, edi
Index: 2006-09-11/xen/include/asm-x86/io.h
===================================================================
--- 2006-09-11.orig/xen/include/asm-x86/io.h	2006-09-11 13:50:54.000000000 +0200
+++ 2006-09-11/xen/include/asm-x86/io.h	2006-09-11 13:52:31.000000000 +0200
@@ -5,6 +5,8 @@
 #include <xen/types.h>
 #include <asm/page.h>
 
+struct cpu_user_regs;
+
 /* We don't need real ioremap() on Xen/x86. */
 #define ioremap(x,l) (__va(x))
 #define iounmap(p)   ((void)0)
@@ -19,32 +21,34 @@
 #define __OUT1(s,x) \
 static inline void out##s(unsigned x value, unsigned short port) {
 
-#define __OUT2(s,s1,s2) \
-__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+#define __OUT2(s,s1) \
+    __asm__ __volatile__ ("out" #s " %" s1 "0,%w1" : : "a" (value), "Nd" (port)); \
+}
 
 #define __OUT(s,s1,x) \
-__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
-__OUT1(s##_p,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port));} 
-
-#define __IN1(s) \
-static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
-
-#define __IN2(s,s1,s2) \
-__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
-
-#define __IN(s,s1,i...) \
-__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
-__IN1(s##_p) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } 
-
-#define RETURN_TYPE unsigned char
-__IN(b,"")
-#undef RETURN_TYPE
-#define RETURN_TYPE unsigned short
-__IN(w,"")
-#undef RETURN_TYPE
-#define RETURN_TYPE unsigned int
-__IN(l,"")
-#undef RETURN_TYPE
+extern void out##s##_special(unsigned x value, unsigned short port, \
+                             struct cpu_user_regs *); \
+__OUT1(s,x) __OUT2(s,s1) \
+__OUT1(s##_p,x) __OUT2(s,s1)
+
+#define __IN1(s,x) \
+static inline unsigned x in##s(unsigned short port) { \
+    unsigned x value;
+
+#define __IN2(s,s1) \
+    __asm__ __volatile__ ("in" #s " %w1,%" s1 "0" : "=a" (value) : "Nd" (port)); \
+    return value; \
+}
+
+#define __IN(s,s1,x) \
+extern unsigned x in##s##_special(unsigned short port, \
+                                  struct cpu_user_regs *); \
+__IN1(s,x) __IN2(s,s1) \
+__IN1(s##_p,x) __IN2(s,s1)
+
+__IN(b,"b",char)
+__IN(w,"w",short)
+__IN(l,,int)
 
 __OUT(b,"b",char)
 __OUT(w,"w",short)
Index: 2006-09-11/xen/include/asm-x86/iocap.h
===================================================================
--- 2006-09-11.orig/xen/include/asm-x86/iocap.h	2006-09-11 13:50:54.000000000 +0200
+++ 2006-09-11/xen/include/asm-x86/iocap.h	2006-09-13 09:47:46.000000000 +0200
@@ -7,6 +7,15 @@
 #ifndef __X86_IOCAP_H__
 #define __X86_IOCAP_H__
 
+extern struct rangeset *ioport_caps, *ioport_emul;
+
+#define ioports_permit_access_all(s, e)                 \
+    rangeset_add_range(ioport_caps, s, e)
+#define ioports_deny_access_all(s, e)                   \
+    rangeset_remove_range(ioport_caps, s, e)
+#define ioports_any_access_permitted(s, e)              \
+    rangeset_contains_range(ioport_caps, s, e)
+
 #define ioports_permit_access(d, s, e)                  \
     rangeset_add_range((d)->arch.ioport_caps, s, e)
 #define ioports_deny_access(d, s, e)                    \
@@ -14,6 +23,13 @@
 #define ioports_access_permitted(d, s, e)               \
     rangeset_contains_range((d)->arch.ioport_caps, s, e)
 
+#define ioports_set_emul(s, e)                          \
+    rangeset_add_range(ioport_emul, s, e)
+#define ioports_set_noemul(s, e)                        \
+    rangeset_remove_range(ioport_emul, s, e)
+#define ioports_emul(s, e)                              \
+    rangeset_contains_range(ioport_emul, s, e)
+
 #define cache_flush_permitted(d)                       \
     (!rangeset_is_empty((d)->iomem_caps))
 
Index: 2006-09-11/xen/include/public/sysctl.h
===================================================================
--- 2006-09-11.orig/xen/include/public/sysctl.h	2006-09-11 09:06:11.000000000 +0200
+++ 2006-09-11/xen/include/public/sysctl.h	2006-09-13 09:45:47.000000000 +0200
@@ -122,6 +122,15 @@ struct xen_sysctl_getdomaininfolist {
 typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
 
+#define XEN_SYSCTL_ioport_emulation 7
+struct xen_sysctl_ioport_emulation {
+    uint32_t first_port;              /* first port int range */
+    uint32_t nr_ports;                /* size of port range */
+    uint8_t  emulate;                 /* emulate access to range? */
+};
+typedef struct xen_sysctl_ioport_emulation xen_sysctl_ioport_emulation_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_ioport_emulation_t);
+
 struct xen_sysctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
@@ -132,6 +141,7 @@ struct xen_sysctl {
         struct xen_sysctl_sched_id          sched_id;
         struct xen_sysctl_perfc_op          perfc_op;
         struct xen_sysctl_getdomaininfolist getdomaininfolist;
+        struct xen_sysctl_ioport_emulation  ioport_emulation;
         uint8_t                             pad[128];
     } u;
 };

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-13  9:46               ` Jan Beulich
@ 2006-09-13 12:10                 ` Keir Fraser
  2006-09-18 10:40                   ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Keir Fraser @ 2006-09-13 12:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 13/9/06 10:46, "Jan Beulich" <jbeulich@novell.com> wrote:

> It would, provided the above assumption about the need to modify the
> output value would never become true.

I hope it doesn't. :-) We'll cross this bridge if we come to it.

> Additionally, for 64-bits, I'm
> concerned about the potential need for using indirect calls here (as well
> as in the syscall trampolines): there's nothing keeping a user from making
> the Xen heap 2Gb or more in size.

Not much of a concern. Perhaps we should clamp the heap_size parameter to
2GB as a short-term fix for this issue. As you say, it can also affect the
syscall trampolines so users would soon notice if this was broken!

When we merge Xen and domain heaps on x86/64, we'll probably require Xen
allocations to come from a zone <= 2GB. Xen doesn't allocate much memory, so
that's not going to be a particularly serious constraint.

> Percentages of full-context relative to simply emulated i/o, without having
> changed the assembly file approach to the stub building one, yet (as per
> above issues):
> 
> PentiumIII (32-bit) with locking 67%
> PentiumIII (32-bit) without locking 84%
> Pentium4 (64-bit) with locking  86%
> Pentium4 (64-bit) without locking 89%

A little bit higher overhead than I'd hoped, but not terrible. Let's see how
it looks with the stub-building method, and then decide whether to bother
with the sysctl interface. Perhaps highly-optimised assembly code
save/restore routines will be required after all. :-)

 Cheers,
 Keir

> Revised patch (domctl->sysctl, naming) attached.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-13 12:10                 ` Keir Fraser
@ 2006-09-18 10:40                   ` Jan Beulich
  2006-09-18 11:05                     ` Keir Fraser
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2006-09-18 10:40 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>>> Keir Fraser <Keir.Fraser@cl.cam.ac.uk> 13.09.06 14:10 >>>
>On 13/9/06 10:46, "Jan Beulich" <jbeulich@novell.com> wrote:
>
>> It would, provided the above assumption about the need to modify the
>> output value would never become true.
>
>I hope it doesn't. :-) We'll cross this bridge if we come to it.

It'll be immediately needed if string I/O instructions are to also go that
path, unless you'd want them to access the original user buffer (and
trap the eventual page fault).

Also, I might need a little more clarification on the stack (ab)use for
creating stubs: As I understand it, the double-fault and NMI stacks on
x86-64 are currently simply overlaid on top of the normal stack,
basically assuming you'd never use this much space (the one-page
non-present separator is inserted only in debug builds). (Side note:
While for normal operations this is fine, I question the value of a
double fault backtrace that might be created due to a stack overflow
on a non-debug build. The obvious question is why the separator hole
isn't always being created - after all this is a one time operation that
happens as CPUs get brought up, so there shouldn't be any
performance overhead.)

Anyway, the relationship to the stubs is that I would favor moving the
stubs onto the double fault stack itself (rather than adjacent to the NMI
stack, which in turn is adjacent to the double fault one), because
(a) the stubs won't be needed anymore once the double fault stack is
needed and
(b) the stubs are this way farther away from the normal stack, making
it less likely for difficult to debug problems to crop in. I would then
similarly put the 32-bit I/O stubs onto the (top of the) (would-be
double fault) stack (which should be per CPU as much as on 64-bits,
but I realize that would imply per-CPU double fault TSSes and hence
per-CPU GDTs).

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-18 10:40                   ` Jan Beulich
@ 2006-09-18 11:05                     ` Keir Fraser
  2006-09-18 11:36                       ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Keir Fraser @ 2006-09-18 11:05 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 18/9/06 11:40, "Jan Beulich" <jbeulich@novell.com> wrote:

> It'll be immediately needed if string I/O instructions are to also go that
> path, unless you'd want them to access the original user buffer (and
> trap the eventual page fault).

We emulate INS/OUTS as a sequence of IN/OUT plus copy_to/from_guest. Unless
the SMM code depends on us not having 'clobbered' %eax (which we would need
to do to emulate OUTS with OUT) then we should be okay there. I guess how
complicated the save/restore code needs to be depends on just how accurately
we need to set up the register state for this HP SMM code -- for example, I
guess we get away with SS:ESP being incorrect; can we get away with EAX as
well? Hmm... I guess you have made your point that there are devils in the
detail of doing this emulation. ;-)

> Also, I might need a little more clarification on the stack (ab)use for
> creating stubs.

Just declare a char-array automatic variable, fill it with machine code, and
call it.

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-18 11:05                     ` Keir Fraser
@ 2006-09-18 11:36                       ` Jan Beulich
  2006-09-18 12:22                         ` Keir Fraser
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2006-09-18 11:36 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

>> Also, I might need a little more clarification on the stack (ab)use for
>> creating stubs.
>
>Just declare a char-array automatic variable, fill it with machine code, and
>call it.

Actually, I rather wanted to do static setup as much as possible and hence
leave only the filling of the actual opcode to be done dynamically (at the
price of inserting one or two nops).

Jan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-18 11:36                       ` Jan Beulich
@ 2006-09-18 12:22                         ` Keir Fraser
  0 siblings, 0 replies; 15+ messages in thread
From: Keir Fraser @ 2006-09-18 12:22 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 18/9/06 12:36, "Jan Beulich" <jbeulich@novell.com> wrote:

>> Just declare a char-array automatic variable, fill it with machine code, and
>> call it.
> 
> Actually, I rather wanted to do static setup as much as possible and hence
> leave only the filling of the actual opcode to be done dynamically (at the
> price of inserting one or two nops).

I think putting the static code in assembly functions and calling out to
them from a dynamically-generated stub of machine code would be neatest.

It doesn't take much C code to generate:
 call prologue; in/out; jmp epilogue

That's only about 12 bytes of generated code (assuming call/jmp rel32).
Static calls/jumps are very cheap.

You can define the prologue/epilogue functions within the same .c file
inside globally-defined asm() blocks.

 -- Keir

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] enable port accesses with (almost) full register context
  2006-09-12 11:28             ` Keir Fraser
  2006-09-13  9:46               ` Jan Beulich
@ 2006-09-18 14:10               ` Jan Beulich
  1 sibling, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2006-09-18 14:10 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel

[-- Attachment #1: Type: text/plain, Size: 2087 bytes --]

>>> Keir Fraser <Keir.Fraser@cl.cam.ac.uk> 12.09.06 13:28 >>>
>On 12/9/06 11:32, "Jan Beulich" <jbeulich@novell.com> wrote:
>
>> Hm, I don't like this on-the-fly building of code very much, and I also don't
>> like writing assembly code that can obviously written to perform better. Also,
>> on 64-bits the code wouldn't look so much nicer since there's no {push,pop}ad.
>> But certainly, if you refuse to take the patch without changing that...
>
>IMO you're doing code building anyway, but just of one instruction. You get
>rid of the locking by doing it to a per-CPU buffer, and the stack is the
>obvious place, calling out to register save/restore code. I don't really
>care about the performance of the save/restore code -- it's obviously going
>to be trivial compared with the unavoidable trap-and-emulate cost. Also, do
>you need separate save/restore code for IN vs. OUT instructions?
>
>Something like:
>    call save_host_restore_guest
>    <IN or OUT>
>    call save_guest_restore_host
>    ret
>
>Would that be reasonable?

Attaching the revised patch.

>> That sounds right (and better than the current way). I'll do that change,
>> though I guess I'd still not call it direct execution.
>
>'Special' is a crappy description because it's so non-specific. How about
>'BIOS' ports? I can't think of any reason that emulating these accesses
>could be a problem, except that BIOS/firmware is trapping them and expecting
>more context than the hardware instruction defines as being required.
>
>Alternatively, perhaps we could get rid of the distinction and emulate all
>port accesses in this way? I suspect that the cost of state save/restore and
>building the trampoline is dwarfed by the cost of the GPF and even the cost
>of the I/O port access itself (they don't tend to be super fast). Could you
>do a few quick measurements to determine this? If the extra cost is less
>than, say, 10%, I'd be inclined to take the hit to avoid interface changes.

The new measurement results (full context compared to normal emulation):

PentiumIII (32-bit)	88%
Pentium4 (64-bit)	90%

Jan

[-- Attachment #2: xen-x86-io-register-context-3.patch --]
[-- Type: text/plain, Size: 23654 bytes --]

From:  Jan Beulich
Bugzilla #192150

Index: 2006-09-11/xen/arch/x86/domain_build.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/domain_build.c	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/arch/x86/domain_build.c	2006-09-18 15:15:40.000000000 +0200
@@ -33,6 +33,11 @@
 extern unsigned long initial_images_nrpages(void);
 extern void discard_initial_images(void);
 
+/* I/O-port Xen-enforced or admin-specified access control. */
+struct rangeset *ioport_caps = NULL;
+/* I/O-port admin-specified non-special access requirements. */
+struct rangeset *ioport_emul = NULL;
+
 static long dom0_nrpages;
 
 /*
@@ -64,6 +69,12 @@ integer_param("dom0_max_vcpus", opt_dom0
 static unsigned int opt_dom0_shadow;
 boolean_param("dom0_shadow", opt_dom0_shadow);
 
+static char opt_ioports_noemul[200] = "";
+string_param("ioports_noemul", opt_ioports_noemul);
+
+static char opt_ioports_disable[200] = "";
+string_param("ioports_disable", opt_ioports_disable);
+
 static char opt_dom0_ioports_disable[200] = "";
 string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
 
@@ -102,10 +113,10 @@ static struct page_info *alloc_chunk(str
     return page;
 }
 
-static void process_dom0_ioports_disable(void)
+static void process_ioports(char *opt)
 {
     unsigned long io_from, io_to;
-    char *t, *u, *s = opt_dom0_ioports_disable;
+    char *t, *u, *s = opt;
 
     if ( *s == '\0' )
         return;
@@ -117,7 +128,9 @@ static void process_dom0_ioports_disable
         {
         parse_error:
             printk("Invalid ioport range <%s> "
-                   "in dom0_ioports_disable, skipping\n", t);
+                   "in %sioports_%s, skipping\n", t,
+                   opt == opt_dom0_ioports_disable ? "dom0_" : "",
+                   opt != opt_ioports_noemul ? "disable" : "noemul");
             continue;
         }
 
@@ -131,11 +144,26 @@ static void process_dom0_ioports_disable
         if ( (*u != '\0') || (io_to < io_from) || (io_to >= 65536) )
             goto parse_error;
 
-        printk("Disabling dom0 access to ioport range %04lx-%04lx\n",
-            io_from, io_to);
+        if ( opt != opt_ioports_noemul )
+        {
+            printk("Disabling %saccess to ioport range %04lx-%04lx\n",
+                opt != opt_ioports_disable ? "dom0 " : "",
+                io_from, io_to);
 
-        if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
-            BUG();
+            if ( opt == opt_ioports_disable
+                 && ioports_deny_access_all(io_from, io_to) != 0 )
+                BUG();
+            if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
+                BUG();
+        }
+        else
+        {
+            printk("Setting non-emulated access for ioport range %04lx-%04lx\n",
+                io_from, io_to);
+
+            if ( ioports_set_noemul(io_from, io_to) != 0 )
+                BUG();
+        }
     }
 }
 
@@ -815,6 +843,13 @@ int construct_dom0(struct domain *d,
 
     rc = 0;
 
+    /* Command-line I/O ranges. */
+    ioport_caps = rangeset_new(NULL,
+                               "global I/O Port access control",
+                               RANGESETF_prettyprint_hex);
+    BUG_ON(!ioport_caps);
+    rc |= ioports_permit_access_all(0, 0xFFFF);
+
     /* DOM0 is permitted full I/O capabilities. */
     rc |= ioports_permit_access(dom0, 0, 0xFFFF);
     rc |= iomem_permit_access(dom0, 0UL, ~0UL);
@@ -824,15 +859,20 @@ int construct_dom0(struct domain *d,
      * Modify I/O port access permissions.
      */
     /* Master Interrupt Controller (PIC). */
+    rc |= ioports_deny_access_all(0x20, 0x21);
     rc |= ioports_deny_access(dom0, 0x20, 0x21);
     /* Slave Interrupt Controller (PIC). */
+    rc |= ioports_deny_access_all(0xA0, 0xA1);
     rc |= ioports_deny_access(dom0, 0xA0, 0xA1);
     /* Interval Timer (PIT). */
+    rc |= ioports_deny_access_all(0x40, 0x43);
     rc |= ioports_deny_access(dom0, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
+    rc |= ioports_deny_access_all(0x61, 0x61);
     rc |= ioports_deny_access(dom0, 0x61, 0x61);
     /* Command-line I/O ranges. */
-    process_dom0_ioports_disable();
+    process_ioports(opt_ioports_disable);
+    process_ioports(opt_dom0_ioports_disable);
 
     /*
      * Modify I/O memory access permissions.
@@ -851,6 +891,14 @@ int construct_dom0(struct domain *d,
             rc |= iomem_deny_access(dom0, mfn, mfn);
     }
 
+    /* Command-line I/O ranges requiring full register context access. */
+    ioport_emul = rangeset_new(NULL,
+                               "I/O Port emulation control",
+                               RANGESETF_prettyprint_hex);
+    BUG_ON(!ioport_emul);
+    rc |= ioports_set_emul(0, 0xFFFF);
+    process_ioports(opt_ioports_noemul);
+
     BUG_ON(rc != 0);
 
     return 0;
Index: 2006-09-11/xen/arch/x86/sysctl.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/sysctl.c	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/arch/x86/sysctl.c	2006-09-18 13:58:56.000000000 +0200
@@ -57,6 +57,23 @@ long arch_do_sysctl(
     }
     break;
     
+    case XEN_SYSCTL_ioport_emulation:
+    {
+        unsigned int fp = sysctl->u.ioport_emulation.first_port;
+        unsigned int np = sysctl->u.ioport_emulation.nr_ports;
+
+        ret = -EINVAL;
+        if ( (fp + np) > 65536 )
+            break;
+
+        if ( np == 0 )
+            ret = 0;
+        else if ( sysctl->u.ioport_emulation.emulate )
+            ret = ioports_set_emul(fp, fp + np - 1);
+        else
+            ret = ioports_set_noemul(fp, fp + np - 1);
+    }
+    break;
 
     default:
         ret = -ENOSYS;
Index: 2006-09-11/xen/arch/x86/traps.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/traps.c	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/arch/x86/traps.c	2006-09-18 15:15:58.000000000 +0200
@@ -1002,9 +1002,20 @@ static inline int admin_io_okay(
     return ioports_access_permitted(v->domain, port, port + bytes - 1);
 }
 
+typedef unsigned long io_emul_stub_t(struct cpu_user_regs *) __attribute__((__regparm__(1)));
+long io_emul_stub_offset = 0, io_emul_insn_offset = 0;
+
+/* Can the I/O access be carried out without full register context? */
+static inline int normal_io_okay(
+    unsigned int port, unsigned int bytes)
+{
+    return ioports_emul(port, port + bytes - 1);
+}
+
 /* Check admin limits. Silently fail the access if it is disallowed. */
 static inline unsigned char inb_user(
-    unsigned int port, struct vcpu *v, struct cpu_user_regs *regs)
+    unsigned int port, io_emul_stub_t *stub,
+    struct vcpu *v, struct cpu_user_regs *regs)
 {
     /*
      * Allow read access to port 0x61. Bit 4 oscillates with period 30us, and
@@ -1014,18 +1025,54 @@ static inline int admin_io_okay(
      * but there's not really a good reason to do so.
      */
     if ( admin_io_okay(port, 1, v, regs) || (port == 0x61) )
-        return inb(port);
+        return !stub || normal_io_okay(port, 1) ? inb(port) : stub(regs);
     return ~0;
 }
-//#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
-#define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
-#define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
-#define outb_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
-#define outw_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
-#define outl_user(_v, _p, _d, _r) \
-    (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
+
+static inline unsigned short inw_user(
+    unsigned int port, io_emul_stub_t *stub,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 2, v, regs) )
+        return !stub || normal_io_okay(port, 2) ? inw(port) : stub(regs);
+    return ~0;
+}
+
+static inline unsigned int inl_user(
+    unsigned int port, io_emul_stub_t *stub,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 4, v, regs) )
+        return !stub || normal_io_okay(port, 4) ? inl(port) : stub(regs);
+    return ~0;
+}
+
+static inline void outb_user(
+    unsigned char value, unsigned int port,
+    io_emul_stub_t *stub,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 1, v, regs) )
+        !stub || normal_io_okay(port, 1) ? outb(value, port) : (void)stub(regs);
+}
+
+static inline void outw_user(
+    unsigned short value, unsigned int port,
+    io_emul_stub_t *stub,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 2, v, regs) )
+        !stub || normal_io_okay(port, 2) ? outw(value, port) : (void)stub(regs);
+}
+
+static inline void outl_user(
+    unsigned int value, unsigned int port,
+    io_emul_stub_t *stub,
+    struct vcpu *v, struct cpu_user_regs *regs)
+{
+    if ( admin_io_okay(port, 4, v, regs) )
+        !stub || normal_io_okay(port, 4) ? outl(value, port) : (void)stub(regs);
+}
 
 /* Instruction fetch with error handling. */
 #define insn_fetch(_type, _size, _ptr)                                      \
@@ -1044,6 +1091,8 @@ static int emulate_privileged_op(struct 
     u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0;
     unsigned int port, i, op_bytes = 4, data, rc;
     u32 l, h;
+    io_emul_stub_t *stub;
+    char *insn;
 
     /* Legacy prefixes. */
     for ( i = 0; i < 8; i++ )
@@ -1101,13 +1150,13 @@ static int emulate_privileged_op(struct 
             switch ( op_bytes )
             {
             case 1:
-                data = (u8)inb_user((u16)regs->edx, v, regs);
+                data = (u8)inb_user((u16)regs->edx, NULL, v, regs);
                 break;
             case 2:
-                data = (u16)inw_user((u16)regs->edx, v, regs);
+                data = (u16)inw_user((u16)regs->edx, NULL, v, regs);
                 break;
             case 4:
-                data = (u32)inl_user((u16)regs->edx, v, regs);
+                data = (u32)inl_user((u16)regs->edx, NULL, v, regs);
                 break;
             }
             if ( (rc = copy_to_user((void *)regs->edi, &data, op_bytes)) != 0 )
@@ -1133,13 +1182,13 @@ static int emulate_privileged_op(struct 
             switch ( op_bytes )
             {
             case 1:
-                outb_user((u8)data, (u16)regs->edx, v, regs);
+                outb_user((u8)data, (u16)regs->edx, NULL, v, regs);
                 break;
             case 2:
-                outw_user((u16)data, (u16)regs->edx, v, regs);
+                outw_user((u16)data, (u16)regs->edx, NULL, v, regs);
                 break;
             case 4:
-                outl_user((u32)data, (u16)regs->edx, v, regs);
+                outl_user((u32)data, (u16)regs->edx, NULL, v, regs);
                 break;
             }
             regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
@@ -1157,27 +1206,33 @@ static int emulate_privileged_op(struct 
     }
 
     /* I/O Port and Interrupt Flag instructions. */
+    insn = (char *)get_stack_bottom();
+    stub = (io_emul_stub_t *)(insn + io_emul_stub_offset);
+    insn += io_emul_insn_offset;
+    *insn++ = op_bytes != 2 ? 0x90 : 0x66;
+    *insn++ = opcode;
     switch ( opcode )
     {
     case 0xe4: /* IN imm8,%al */
         op_bytes = 1;
     case 0xe5: /* IN imm8,%eax */
         port = insn_fetch(u8, 1, eip);
+        *insn = port;
     exec_in:
         if ( !guest_io_okay(port, op_bytes, v, regs) )
             goto fail;
         switch ( op_bytes )
         {
         case 1:
-            regs->eax &= ~0xffUL;
-            regs->eax |= (u8)inb_user(port, v, regs);
+            res = regs->eax & ~0xffUL;
+            regs->eax = res | (u8)inb_user(port, stub, v, regs);
             break;
         case 2:
-            regs->eax &= ~0xffffUL;
-            regs->eax |= (u16)inw_user(port, v, regs);
+            res = regs->eax & ~0xffffUL;
+            regs->eax = res | (u16)inw_user(port, stub, v, regs);
             break;
         case 4:
-            regs->eax = (u32)inl_user(port, v, regs);
+            regs->eax = (u32)inl_user(port, stub, v, regs);
             break;
         }
         goto done;
@@ -1186,25 +1241,27 @@ static int emulate_privileged_op(struct 
         op_bytes = 1;
     case 0xed: /* IN %dx,%eax */
         port = (u16)regs->edx;
+        *insn = 0x90;
         goto exec_in;
 
     case 0xe6: /* OUT %al,imm8 */
         op_bytes = 1;
     case 0xe7: /* OUT %eax,imm8 */
         port = insn_fetch(u8, 1, eip);
+        *insn = port;
     exec_out:
         if ( !guest_io_okay(port, op_bytes, v, regs) )
             goto fail;
         switch ( op_bytes )
         {
         case 1:
-            outb_user((u8)regs->eax, port, v, regs);
+            outb_user((u8)regs->eax, port, stub, v, regs);
             break;
         case 2:
-            outw_user((u16)regs->eax, port, v, regs);
+            outw_user((u16)regs->eax, port, stub, v, regs);
             break;
         case 4:
-            outl_user((u32)regs->eax, port, v, regs);
+            outl_user((u32)regs->eax, port, stub, v, regs);
             break;
         }
         goto done;
@@ -1213,6 +1270,7 @@ static int emulate_privileged_op(struct 
         op_bytes = 1;
     case 0xef: /* OUT %eax,%dx */
         port = (u16)regs->edx;
+        *insn = 0x90;
         goto exec_out;
 
     case 0xfa: /* CLI */
Index: 2006-09-11/xen/arch/x86/x86_32/Makefile
===================================================================
--- 2006-09-11.orig/xen/arch/x86/x86_32/Makefile	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/arch/x86/x86_32/Makefile	2006-09-18 13:58:56.000000000 +0200
@@ -1,5 +1,6 @@
 obj-y += domain_page.o
 obj-y += entry.o
+obj-y += io.o
 obj-y += mm.o
 obj-y += seg_fixup.o
 obj-y += traps.o
Index: 2006-09-11/xen/arch/x86/x86_32/io.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 2006-09-11/xen/arch/x86/x86_32/io.S	2006-09-18 14:32:37.000000000 +0200
@@ -0,0 +1,42 @@
+/*
+ * Special (full-context) I/O handling routines.
+ *
+ * Copyright (c) 2006, Novell, Inc.
+ */
+
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+
+ENTRY(save_host_restore_guest)
+	movl	(%esp), %ecx
+	movl	%eax, (%esp)
+	movl	UREGS_edx(%eax), %edx
+	pushl	%ebx
+	movl	UREGS_ebx(%eax), %ebx
+	pushl	%ebp
+	movl	UREGS_ebp(%eax), %ebp
+	pushl	%esi
+	movl	UREGS_esi(%eax), %esi
+	pushl	%edi
+	movl	UREGS_edi(%eax), %edi
+	pushl	%ecx
+	movl	UREGS_ecx(%eax), %ecx
+	movl	UREGS_eax(%eax), %eax
+	ret
+
+ENTRY(save_guest_restore_host)
+	pushl	%edx
+	movl	5*4(%esp), %edx
+	movl	%eax, UREGS_eax(%edx)
+	popl	UREGS_edx(%edx)
+	movl	%edi, UREGS_edi(%edx)
+	popl	%edi
+	movl	%esi, UREGS_esi(%edx)
+	popl	%esi
+	movl	%ebp, UREGS_ebp(%edx)
+	popl	%ebp
+	movl	%ebx, UREGS_ebx(%edx)
+	popl	%ebx
+	movl	%ecx, UREGS_ecx(%edx)
+	popl	%ecx
+	ret
Index: 2006-09-11/xen/arch/x86/x86_32/traps.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/x86_32/traps.c	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/arch/x86/x86_32/traps.c	2006-09-18 15:15:40.000000000 +0200
@@ -13,6 +13,7 @@
 #include <xen/nmi.h>
 #include <asm/current.h>
 #include <asm/flushtlb.h>
+#include <asm/io.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
 
@@ -250,6 +251,7 @@ fastcall void smp_deferred_nmi(struct cp
 void __init percpu_traps_init(void)
 {
     struct tss_struct *tss = &doublefault_tss;
+    char *stack_bottom, *stack;
     asmlinkage int hypercall(void);
 
     if ( smp_processor_id() != 0 )
@@ -283,6 +285,28 @@ void __init percpu_traps_init(void)
         (unsigned long)tss, 235, 9);
 
     set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
+
+    /*
+     * Stub for full-context I/O emulation.
+     */
+    stack_bottom = (char *)get_stack_bottom();
+    stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
+    if (!io_emul_stub_offset)
+        io_emul_stub_offset = stack - stack_bottom;
+    else
+        BUG_ON(io_emul_stub_offset != stack - stack_bottom);
+    /* call save_host_restore_guest */
+    stack[0] = 0xe8;
+    *(s32*)&stack[1] = (char *)save_host_restore_guest - &stack[5];
+    stack += 5;
+    if (!io_emul_insn_offset)
+        io_emul_insn_offset = stack - stack_bottom;
+    else
+        BUG_ON(io_emul_insn_offset != stack - stack_bottom);
+    stack += 3; /* operand size prefix, opcode, immediate */
+    /* jmp save_guest_restore_host */
+    stack[0] = 0xe9;
+    *(s32*)&stack[1] = (char *)save_guest_restore_host - &stack[5];
 }
 
 void init_int80_direct_trap(struct vcpu *v)
Index: 2006-09-11/xen/arch/x86/x86_64/Makefile
===================================================================
--- 2006-09-11.orig/xen/arch/x86/x86_64/Makefile	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/arch/x86/x86_64/Makefile	2006-09-18 15:15:40.000000000 +0200
@@ -1,3 +1,4 @@
 obj-y += entry.o
+obj-y += io.o
 obj-y += mm.o
 obj-y += traps.o
Index: 2006-09-11/xen/arch/x86/x86_64/io.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 2006-09-11/xen/arch/x86/x86_64/io.S	2006-09-18 14:41:26.000000000 +0200
@@ -0,0 +1,62 @@
+/*
+ * Special (full-context) I/O handling routines.
+ *
+ * Copyright (c) 2006, Novell, Inc.
+ */
+
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+
+ENTRY(save_host_restore_guest)
+	movq	(%rsp), %rcx
+	movq	%rdi, (%rsp)
+	movq	UREGS_rdx(%rdi), %rdx
+	pushq	%rbx
+	movq	UREGS_rax(%rdi), %rax
+	movq	UREGS_rbx(%rdi), %rbx
+	pushq	%rbp
+	movq	UREGS_rsi(%rdi), %rsi
+	movq	UREGS_rbp(%rdi), %rbp
+	pushq	%r12
+	movq	UREGS_r8(%rdi), %r8
+	movq	UREGS_r12(%rdi), %r12
+	pushq	%r13
+	movq	UREGS_r9(%rdi), %r9
+	movq	UREGS_r13(%rdi), %r13
+	pushq	%r14
+	movq	UREGS_r10(%rdi), %r10
+	movq	UREGS_r14(%rdi), %r14
+	pushq	%r15
+	movq	UREGS_r11(%rdi), %r11
+	movq	UREGS_r15(%rdi), %r15
+	pushq	%rcx
+	movq	UREGS_rcx(%rdi), %rcx
+	movq	UREGS_rdi(%rdi), %rdi
+	ret
+
+ENTRY(save_guest_restore_host)
+	pushq	%rdi
+	movq	7*8(%rsp), %rdi
+	movq	%rax, UREGS_rax(%rdi)
+	popq	UREGS_rdi(%rdi)
+	movq	%r15, UREGS_r15(%rdi)
+	movq	%r11, UREGS_r11(%rdi)
+	popq	%r15
+	movq	%r14, UREGS_r14(%rdi)
+	movq	%r10, UREGS_r10(%rdi)
+	popq	%r14
+	movq	%r13, UREGS_r13(%rdi)
+	movq	%r9, UREGS_r9(%rdi)
+	popq	%r13
+	movq	%r12, UREGS_r12(%rdi)
+	movq	%r8, UREGS_r8(%rdi)
+	popq	%r12
+	movq	%rbp, UREGS_rbp(%rdi)
+	movq	%rsi, UREGS_rsi(%rdi)
+	popq	%rbp
+	movq	%rbx, UREGS_rbx(%rdi)
+	movq	%rdx, UREGS_rdx(%rdi)
+	popq	%rbx
+	movq	%rcx, UREGS_rcx(%rdi)
+	popq	%rcx
+	ret
Index: 2006-09-11/xen/arch/x86/x86_64/traps.c
===================================================================
--- 2006-09-11.orig/xen/arch/x86/x86_64/traps.c	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/arch/x86/x86_64/traps.c	2006-09-18 15:15:41.000000000 +0200
@@ -14,6 +14,7 @@
 #include <xen/nmi.h>
 #include <asm/current.h>
 #include <asm/flushtlb.h>
+#include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/shadow.h>
 #include <asm/hvm/hvm.h>
@@ -331,6 +332,29 @@ void __init percpu_traps_init(void)
 
     wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
     wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
+
+    /*
+     * Stub for full-context I/O emulation.
+     */
+
+    /* Skip the compatibility-mode entry trampoline. */
+    stack += 26;
+    if (!io_emul_stub_offset)
+        io_emul_stub_offset = stack - stack_bottom;
+    else
+        BUG_ON(io_emul_stub_offset != stack - stack_bottom);
+    /* call save_host_restore_guest */
+    stack[0] = 0xe8;
+    *(s32*)&stack[1] = (char *)save_host_restore_guest - &stack[5];
+    stack += 5;
+    if (!io_emul_insn_offset)
+        io_emul_insn_offset = stack - stack_bottom;
+    else
+        BUG_ON(io_emul_insn_offset != stack - stack_bottom);
+    stack += 3; /* operand size prefix, opcode, immediate */
+    /* jmp save_guest_restore_host */
+    stack[0] = 0xe9;
+    *(s32*)&stack[1] = (char *)save_guest_restore_host - &stack[5];
 }
 
 static long register_guest_callback(struct callback_register *reg)
Index: 2006-09-11/xen/include/asm-x86/io.h
===================================================================
--- 2006-09-11.orig/xen/include/asm-x86/io.h	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/include/asm-x86/io.h	2006-09-18 15:11:38.000000000 +0200
@@ -50,4 +50,10 @@ __OUT(b,"b",char)
 __OUT(w,"w",short)
 __OUT(l,,int)
 
+struct cpu_user_regs;
+void save_host_restore_guest(struct cpu_user_regs *) __attribute__((__regparm__(1)));
+unsigned long save_guest_restore_host(unsigned long) __attribute__((__regparm__(1)));
+
+extern long io_emul_stub_offset, io_emul_insn_offset;
+
 #endif
Index: 2006-09-11/xen/include/asm-x86/iocap.h
===================================================================
--- 2006-09-11.orig/xen/include/asm-x86/iocap.h	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/include/asm-x86/iocap.h	2006-09-18 13:58:56.000000000 +0200
@@ -7,6 +7,15 @@
 #ifndef __X86_IOCAP_H__
 #define __X86_IOCAP_H__
 
+extern struct rangeset *ioport_caps, *ioport_emul;
+
+#define ioports_permit_access_all(s, e)                 \
+    rangeset_add_range(ioport_caps, s, e)
+#define ioports_deny_access_all(s, e)                   \
+    rangeset_remove_range(ioport_caps, s, e)
+#define ioports_any_access_permitted(s, e)              \
+    rangeset_contains_range(ioport_caps, s, e)
+
 #define ioports_permit_access(d, s, e)                  \
     rangeset_add_range((d)->arch.ioport_caps, s, e)
 #define ioports_deny_access(d, s, e)                    \
@@ -14,6 +23,13 @@
 #define ioports_access_permitted(d, s, e)               \
     rangeset_contains_range((d)->arch.ioport_caps, s, e)
 
+#define ioports_set_emul(s, e)                          \
+    rangeset_add_range(ioport_emul, s, e)
+#define ioports_set_noemul(s, e)                        \
+    rangeset_remove_range(ioport_emul, s, e)
+#define ioports_emul(s, e)                              \
+    rangeset_contains_range(ioport_emul, s, e)
+
 #define cache_flush_permitted(d)                       \
     (!rangeset_is_empty((d)->iomem_caps))
 
Index: 2006-09-11/xen/include/public/sysctl.h
===================================================================
--- 2006-09-11.orig/xen/include/public/sysctl.h	2006-09-18 13:58:26.000000000 +0200
+++ 2006-09-11/xen/include/public/sysctl.h	2006-09-18 13:58:56.000000000 +0200
@@ -122,6 +122,15 @@ struct xen_sysctl_getdomaininfolist {
 typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
 
+#define XEN_SYSCTL_ioport_emulation 7
+struct xen_sysctl_ioport_emulation {
+    uint32_t first_port;              /* first port int range */
+    uint32_t nr_ports;                /* size of port range */
+    uint8_t  emulate;                 /* emulate access to range? */
+};
+typedef struct xen_sysctl_ioport_emulation xen_sysctl_ioport_emulation_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_ioport_emulation_t);
+
 struct xen_sysctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
@@ -132,6 +141,7 @@ struct xen_sysctl {
         struct xen_sysctl_sched_id          sched_id;
         struct xen_sysctl_perfc_op          perfc_op;
         struct xen_sysctl_getdomaininfolist getdomaininfolist;
+        struct xen_sysctl_ioport_emulation  ioport_emulation;
         uint8_t                             pad[128];
     } u;
 };

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2006-09-18 14:10 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-11 16:12 [PATCH] enable port accesses with (almost) full register context Jan Beulich
2006-09-11 16:19 ` Keir Fraser
2006-09-12  7:15   ` Jan Beulich
2006-09-12  7:53     ` Keir Fraser
2006-09-12  9:03       ` Jan Beulich
2006-09-12  9:50         ` Keir Fraser
2006-09-12 10:32           ` Jan Beulich
2006-09-12 11:28             ` Keir Fraser
2006-09-13  9:46               ` Jan Beulich
2006-09-13 12:10                 ` Keir Fraser
2006-09-18 10:40                   ` Jan Beulich
2006-09-18 11:05                     ` Keir Fraser
2006-09-18 11:36                       ` Jan Beulich
2006-09-18 12:22                         ` Keir Fraser
2006-09-18 14:10               ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.