All of lore.kernel.org
 help / color / mirror / Atom feed
* Xen: Hybrid extension patchset for hypervisor
@ 2009-09-16  8:44 Yang, Sheng
  2009-09-16  9:08 ` Keir Fraser
  0 siblings, 1 reply; 18+ messages in thread
From: Yang, Sheng @ 2009-09-16  8:44 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Jeremy Fitzhardinge, xen-devel, Eddie Dong, Jun Nakajima

[-- Attachment #1: Type: text/plain, Size: 124 bytes --]

Hi Keir & Jeremy

Here is the hypervisor part of hybrid extension support.

Please review, thanks!

-- 
regards
Yang, Sheng

[-- Attachment #2: 1.patch --]
[-- Type: text/x-patch, Size: 1674 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253081544 -28800
# Node ID f92ed10b9522a4dd83a5e31f7f1a8c83bd33d70d
# Parent  e5d904a6c9e6e585ec02124dc8b6592f525ef6ba
Add user controlled cpuid 0x40000002.edx

So that user can turn on/off hybrid feature through configuration file.

diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -659,6 +659,7 @@
     struct domain *d = current->domain;
     /* Optionally shift out of the way of Viridian architectural leaves. */
     uint32_t base = is_viridian_domain(d) ? 0x40000100 : 0x40000000;
+    unsigned int tmp_eax, tmp_ebx, tmp_ecx, tmp_edx;
 
     idx -= base;
     if ( idx > 2 ) 
@@ -689,6 +690,14 @@
         *edx = 0;          /* Features 2 */
         if ( !is_hvm_vcpu(current) )
             *ecx |= XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD;
+
+        /* Check if additional feature specified, e.g. Hybrid */
+        if ( !is_viridian_domain(d) ) {
+            domain_cpuid(d, 0x40000002, 0,
+                         &tmp_eax, &tmp_ebx, &tmp_ecx, &tmp_edx);
+            if (tmp_edx != 0)
+                *edx = tmp_edx & XEN_CPUID_FEAT2_MASK;
+        }
         break;
 
     default:
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -65,4 +65,7 @@
 #define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
+/* Mask unsupported CPUID specified by user */
+#define XEN_CPUID_FEAT2_MASK 0x0ul
+
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #3: 2.patch --]
[-- Type: text/x-patch, Size: 5339 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253085983 -28800
# Node ID 2d68b9d29b425f138345c7b9e0e8bc66b84c7ba1
# Parent  f92ed10b9522a4dd83a5e31f7f1a8c83bd33d70d
Add HVMOP_enable_hybrid hypercall

As the entry of hybrid.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2028,6 +2028,17 @@
     HYPERCALL(hvm_op)
 };
 
+static hvm_hypercall_t *hvm_hypercall_hybrid64_table[NR_hypercalls] = {
+    [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
+    [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
+    HYPERCALL(xen_version),
+    HYPERCALL(console_io),
+    HYPERCALL(vcpu_op),
+    HYPERCALL(sched_op),
+    HYPERCALL(event_channel_op),
+    HYPERCALL(hvm_op),
+};
+
 #endif /* defined(__x86_64__) */
 
 int hvm_do_hypercall(struct cpu_user_regs *regs)
@@ -2058,7 +2069,8 @@
     if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
         return viridian_hypercall(regs);
 
-    if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
+    if ( (eax >= NR_hypercalls) ||
+         (!hvm_hypercall32_table[eax] && !is_hybrid_vcpu(curr)) )
     {
         regs->eax = -ENOSYS;
         return HVM_HCALL_completed;
@@ -2073,11 +2085,18 @@
                     regs->rdi, regs->rsi, regs->rdx, regs->r10, regs->r8);
 
         this_cpu(hvm_64bit_hcall) = 1;
-        regs->rax = hvm_hypercall64_table[eax](regs->rdi,
-                                               regs->rsi,
-                                               regs->rdx,
-                                               regs->r10,
-                                               regs->r8); 
+        if (is_hybrid_vcpu(curr))
+            regs->rax = hvm_hypercall_hybrid64_table[eax](regs->rdi,
+                                                          regs->rsi,
+                                                          regs->rdx,
+                                                          regs->r10,
+                                                          regs->r8);
+        else
+            regs->rax = hvm_hypercall64_table[eax](regs->rdi,
+                                                   regs->rsi,
+                                                   regs->rdx,
+                                                   regs->r10,
+                                                   regs->r8);
         this_cpu(hvm_64bit_hcall) = 0;
     }
     else
@@ -2752,6 +2771,32 @@
         break;
     }
 
+    case HVMOP_enable_hybrid: {
+        struct xen_hvm_hybrid_type a;
+        struct domain *d;
+
+        if ( copy_from_guest(&a, arg, 1) )
+            return -EFAULT;
+
+        rc = rcu_lock_target_domain_by_id(a.domid, &d);
+        if ( rc != 0 )
+            return rc;
+
+        rc = -EINVAL;
+        if ( !is_hvm_domain(d) )
+            goto param_fail5;
+
+        rc = xsm_hvm_param(d, op);
+        if ( rc )
+            goto param_fail5;
+
+        d->hybrid_enabled = XEN_HYBRID_ENABLED;
+        printk("HVM: Hybrid domain enabled\n");
+    param_fail5:
+        rcu_unlock_domain(d);
+        break;
+    }
+
     default:
     {
         gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,6 +66,8 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x0ul
+#define XEN_CPUID_FEAT2_MASK 0x1ul
+#define _XEN_CPUID_FEAT2_HYBRID 0
+#define XEN_CPUID_FEAT2_HYBRID (1u<<0)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -125,6 +125,13 @@
 typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
 DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);
 
+#define HVMOP_enable_hybrid    9
+struct xen_hvm_hybrid_type {
+    domid_t domid;
+    uint64_t flags;
+#define HVM_HYBRID_TIMER (1ull<<1)
+#define HVM_HYBRID_EVTCHN (1ull<<2)
+};
 
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -269,6 +269,11 @@
 
     /* VRAM dirty support. */
     struct sh_dirty_vram *dirty_vram;
+
+#define XEN_HYBRID_ENABLED          (1u << 0)
+#define XEN_HYBRID_TIMER_ENABLED    (1u << 1)
+#define XEN_HYBRID_EVTCHN_ENABLED   (1u << 2)
+    uint64_t hybrid_enabled;
 };
 
 struct domain_setup_info
@@ -551,6 +556,12 @@
 #define is_hvm_domain(d) ((d)->is_hvm)
 #define is_hvm_vcpu(v)   (is_hvm_domain(v->domain))
 #define need_iommu(d)    ((d)->need_iommu && !(d)->is_hvm)
+#define is_hybrid_domain(d) ((d)->hybrid_enabled & XEN_HYBRID_ENABLED)
+#define is_hybrid_vcpu(v)   (is_hybrid_domain(v->domain))
+#define is_hybrid_timer_enabled_domain(d) (is_hybrid_domain(d) && \
+		(d)->hybrid_enabled & XEN_HYBRID_TIMER_ENABLED)
+#define is_hybrid_evtchn_enabled_domain(d) (is_hybrid_domain(d) && \
+		(d)->hybrid_enabled & XEN_HYBRID_EVTCHN_ENABLED)
 
 void set_vcpu_migration_delay(unsigned int delay);
 unsigned int get_vcpu_migration_delay(void);

[-- Attachment #4: 3.patch --]
[-- Type: text/x-patch, Size: 2173 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088144 -28800
# Node ID 91c520c2cefccc8c654839d9b38d04bd8801a391
# Parent  2d68b9d29b425f138345c7b9e0e8bc66b84c7ba1
Add pvtimer support for HVM

We need sync TSC with hypervisor, and update guest wallclock time if we want to
enable pvtimer. The timer interrupt delivered through IRQ0.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2792,6 +2792,11 @@
 
         d->hybrid_enabled = XEN_HYBRID_ENABLED;
         printk("HVM: Hybrid domain enabled\n");
+        if (a.flags & HVM_HYBRID_TIMER) {
+            hvm_funcs.set_tsc_offset(d->vcpu[0], 0);
+            update_domain_wallclock_time(d);
+            d->hybrid_enabled |= XEN_HYBRID_TIMER_ENABLED;
+        }
     param_fail5:
         rcu_unlock_domain(d);
         break;
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -30,6 +30,8 @@
 #include <asm/div64.h>
 #include <asm/hpet.h>
 #include <io_ports.h>
+
+#include <asm/hvm/irq.h>
 
 /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */
 static char opt_clocksource[10];
@@ -1323,6 +1325,11 @@
 void send_timer_event(struct vcpu *v)
 {
     send_guest_vcpu_virq(v, VIRQ_TIMER);
+    if (is_hybrid_timer_enabled_domain(v->domain) &&
+	!is_hybrid_evtchn_enabled_domain(v->domain)) {
+        hvm_isa_irq_deassert(v->domain, 0);
+        hvm_isa_irq_assert(v->domain, 0);
+    }
 }
 
 /* Return secs after 00:00:00 localtime, 1 January, 1970. */
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,8 +66,10 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x1ul
+#define XEN_CPUID_FEAT2_MASK 0x3ul
 #define _XEN_CPUID_FEAT2_HYBRID 0
 #define XEN_CPUID_FEAT2_HYBRID (1u<<0)
+#define _XEN_CPUID_FEAT2_HYBRID_TIMER 1
+#define XEN_CPUID_FEAT2_HYBRID_TIMER (1u<<1)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #5: 4.patch --]
[-- Type: text/x-patch, Size: 4153 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088147 -28800
# Node ID 78e51528eba7c5fd328c90acd4edc58ba46b58e8
# Parent  91c520c2cefccc8c654839d9b38d04bd8801a391
Add a new type of HVM callback

HVMIRQ_callback_vector specific one vector to trigger irq handler in the guest.
And it don't need EOI action for it use IDT directly.

diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -165,6 +165,8 @@
             __hvm_pci_intx_assert(d, pdev, pintx);
         else
             __hvm_pci_intx_deassert(d, pdev, pintx);
+    case HVMIRQ_callback_vector:
+        vcpu_kick(v);
     default:
         break;
     }
@@ -251,7 +253,7 @@
 
     via_type = (uint8_t)(via >> 56) + 1;
     if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) ||
-         (via_type > HVMIRQ_callback_pci_intx) )
+         (via_type > HVMIRQ_callback_vector) )
         via_type = HVMIRQ_callback_none;
 
     spin_lock(&d->arch.hvm_domain.irq_lock);
@@ -297,6 +299,9 @@
         if ( hvm_irq->callback_via_asserted )
              __hvm_pci_intx_assert(d, pdev, pintx);
         break;
+    case HVMIRQ_callback_vector:
+        hvm_irq->callback_via.vector = (uint8_t)via;
+        break;
     default:
         break;
     }
@@ -312,6 +317,10 @@
     case HVMIRQ_callback_pci_intx:
         printk("PCI INTx Dev 0x%02x Int%c\n", pdev, 'A' + pintx);
         break;
+    case HVMIRQ_callback_vector:
+        printk("Set HVMIRQ_callback_vector to %u\n",
+               hvm_irq->callback_via.vector);
+        break;
     default:
         printk("None\n");
         break;
@@ -322,6 +331,10 @@
 {
     struct hvm_domain *plat = &v->domain->arch.hvm_domain;
     int vector;
+
+    if (plat->irq.callback_via_type == HVMIRQ_callback_vector &&
+            vcpu_info(v, evtchn_upcall_pending))
+        return hvm_intack_vector(plat->irq.callback_via.vector);
 
     if ( unlikely(v->nmi_pending) )
         return hvm_intack_nmi;
@@ -357,6 +370,8 @@
     case hvm_intsrc_lapic:
         if ( !vlapic_ack_pending_irq(v, intack.vector) )
             intack = hvm_intack_none;
+        break;
+    case hvm_intsrc_vector:
         break;
     default:
         intack = hvm_intack_none;
diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -161,7 +161,8 @@
     {
         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         vmx_inject_extint(intack.vector);
-        pt_intr_post(v, intack);
+        if (intack.source != hvm_intsrc_vector)
+             pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -31,7 +31,8 @@
     hvm_intsrc_none,
     hvm_intsrc_pic,
     hvm_intsrc_lapic,
-    hvm_intsrc_nmi
+    hvm_intsrc_nmi,
+    hvm_intsrc_vector,
 };
 struct hvm_intack {
     uint8_t source; /* enum hvm_intsrc */
@@ -41,6 +42,7 @@
 #define hvm_intack_pic(vec)   ( (struct hvm_intack) { hvm_intsrc_pic,   vec } )
 #define hvm_intack_lapic(vec) ( (struct hvm_intack) { hvm_intsrc_lapic, vec } )
 #define hvm_intack_nmi        ( (struct hvm_intack) { hvm_intsrc_nmi,   2 } )
+#define hvm_intack_vector(vec)( (struct hvm_intack) { hvm_intsrc_vector, vec } )
 enum hvm_intblk {
     hvm_intblk_none,      /* not blocked (deliverable) */
     hvm_intblk_shadow,    /* MOV-SS or STI shadow */
diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -54,12 +54,14 @@
         enum {
             HVMIRQ_callback_none,
             HVMIRQ_callback_gsi,
-            HVMIRQ_callback_pci_intx
+            HVMIRQ_callback_pci_intx,
+            HVMIRQ_callback_vector,
         } callback_via_type;
     };
     union {
         uint32_t gsi;
         struct { uint8_t dev, intx; } pci;
+        uint32_t vector;
     } callback_via;
 
     /* Number of INTx wires asserting each PCI-ISA link. */

[-- Attachment #6: 5.patch --]
[-- Type: text/x-patch, Size: 3721 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088148 -28800
# Node ID 183fa85d47d9411e6e21c4534d8e231feeac96a7
# Parent  78e51528eba7c5fd328c90acd4edc58ba46b58e8
Enable event channel and QEmu device support for hybrid guest

Each VIRQ from 24 to 40 binding to a QEmu emulated pin, so that if device
assert the pin, one VIRQ would be delivered to the guest instead.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2797,6 +2797,8 @@
             update_domain_wallclock_time(d);
             d->hybrid_enabled |= XEN_HYBRID_TIMER_ENABLED;
         }
+        if (a.flags & HVM_HYBRID_EVTCHN)
+            d->hybrid_enabled |= XEN_HYBRID_EVTCHN_ENABLED;
     param_fail5:
         rcu_unlock_domain(d);
         break;
diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -46,8 +46,18 @@
     if ( (hvm_irq->pci_link_assert_count[link]++ == 0) && isa_irq &&
          (hvm_irq->gsi_assert_count[isa_irq]++ == 0) )
     {
-        vioapic_irq_positive_edge(d, isa_irq);
-        vpic_irq_positive_edge(d, isa_irq);
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+        {
+            vioapic_irq_positive_edge(d, isa_irq);
+            vpic_irq_positive_edge(d, isa_irq);
+        }
+        else
+        {
+            /* TODO fix the critical region here */
+            spin_unlock(&d->arch.hvm_domain.irq_lock);
+            send_guest_global_virq(d, VIRQ_EMUL_PIN(isa_irq));
+            spin_lock(&d->arch.hvm_domain.irq_lock);
+	}
     }
 }
 
@@ -76,8 +86,10 @@
     link    = hvm_pci_intx_link(device, intx);
     isa_irq = hvm_irq->pci_link.route[link];
     if ( (--hvm_irq->pci_link_assert_count[link] == 0) && isa_irq &&
-         (--hvm_irq->gsi_assert_count[isa_irq] == 0) )
-        vpic_irq_negative_edge(d, isa_irq);
+         (--hvm_irq->gsi_assert_count[isa_irq] == 0) ) {
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+            vpic_irq_negative_edge(d, isa_irq);
+    }
 }
 
 void hvm_pci_intx_deassert(
@@ -93,6 +105,7 @@
 {
     struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+    int send_virq = 0;
 
     ASSERT(isa_irq <= 15);
 
@@ -101,11 +114,21 @@
     if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) &&
          (hvm_irq->gsi_assert_count[gsi]++ == 0) )
     {
-        vioapic_irq_positive_edge(d, gsi);
-        vpic_irq_positive_edge(d, isa_irq);
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+        {
+            vioapic_irq_positive_edge(d, gsi);
+            vpic_irq_positive_edge(d, isa_irq);
+        }
+        else
+        {
+            send_virq = 1;
+        }
     }
 
     spin_unlock(&d->arch.hvm_domain.irq_lock);
+
+    if (send_virq)
+	    send_guest_global_virq(d, VIRQ_EMUL_PIN(isa_irq));
 }
 
 void hvm_isa_irq_deassert(
@@ -120,7 +143,10 @@
 
     if ( __test_and_clear_bit(isa_irq, &hvm_irq->isa_irq.i) &&
          (--hvm_irq->gsi_assert_count[gsi] == 0) )
-        vpic_irq_negative_edge(d, isa_irq);
+    {
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+            vpic_irq_negative_edge(d, isa_irq);
+    }
 
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -159,7 +159,12 @@
 #define VIRQ_ARCH_6    22
 #define VIRQ_ARCH_7    23
 
-#define NR_VIRQS       24
+#define VIRQ_EMUL_PIN_START 24
+#define VIRQ_EMUL_PIN_END 39
+#define VIRQ_EMUL_PIN_NUM 16
+#define VIRQ_EMUL_PIN(x) (VIRQ_EMUL_PIN_START + x)
+
+#define NR_VIRQS       40
 
 /*
  * MMU-UPDATE REQUESTS

[-- Attachment #7: 6.patch --]
[-- Type: text/x-patch, Size: 1402 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088149 -28800
# Node ID 86c908c45ac908767b6b6bdbd9e8d863b34a84a8
# Parent  183fa85d47d9411e6e21c4534d8e231feeac96a7
Reserved E820 for hybrid gnttab support

diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
--- a/tools/firmware/hvmloader/config.h
+++ b/tools/firmware/hvmloader/config.h
@@ -16,8 +16,12 @@
 
 /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
 #define PCI_MEM_START       0xf0000000
-#define PCI_MEM_END         0xfc000000
+#define PCI_MEM_END         0xfbfe0000
 extern unsigned long pci_mem_start, pci_mem_end;
+
+/* Reserve 128KB for grant table */
+#define GNTTAB_MEMBASE	    0xfbfe0000
+#define GNTTAB_MEMSIZE	    0x20000
 
 /* We reserve 16MB for special BIOS mappings, etc. */
 #define RESERVED_MEMBASE    0xfc000000
diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -602,6 +602,12 @@
     e820[nr].type = E820_RAM;
     nr++;
 
+    /* Reserved for grant table */
+    e820[nr].addr = GNTTAB_MEMBASE;
+    e820[nr].size = GNTTAB_MEMSIZE;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
     /*
      * Explicitly reserve space for special pages.
      * This space starts at RESERVED_MEMBASE an extends to cover various

[-- Attachment #8: 7.patch --]
[-- Type: text/x-patch, Size: 924 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088151 -28800
# Node ID 607b7ad0488a9db6202e0a6178027684ac0eb027
# Parent  86c908c45ac908767b6b6bdbd9e8d863b34a84a8
Enable event channel feature in CPUID for configuration file

diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,10 +66,12 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x3ul
+#define XEN_CPUID_FEAT2_MASK 0x7ul
 #define _XEN_CPUID_FEAT2_HYBRID 0
 #define XEN_CPUID_FEAT2_HYBRID (1u<<0)
 #define _XEN_CPUID_FEAT2_HYBRID_TIMER 1
 #define XEN_CPUID_FEAT2_HYBRID_TIMER (1u<<1)
+#define _XEN_CPUID_FEAT2_HYBRID_EVTCHN 2
+#define XEN_CPUID_FEAT2_HYBRID_EVTCHN (1u<<2)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #9: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16  8:44 Xen: Hybrid extension patchset for hypervisor Yang, Sheng
@ 2009-09-16  9:08 ` Keir Fraser
  2009-09-16 14:04   ` Keir Fraser
                     ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Keir Fraser @ 2009-09-16  9:08 UTC (permalink / raw)
  To: Yang, Sheng; +Cc: Jeremy Fitzhardinge, xen-devel, Eddie Dong, Jun Nakajima

On 16/09/2009 09:44, "Yang, Sheng" <sheng.yang@intel.com> wrote:

> Hi Keir & Jeremy
> 
> Here is the hypervisor part of hybrid extension support.
> 
> Please review, thanks!

The principle is okay I guess. These changes would have to be trickled in
with a  really good explanation and justification for each one. For example,
I'm not clear why the enable-hybrid hypercall is needed. Why not just
provide access to evtchn and timer hypercalls always, and guest sues them if
it is capable of it? I'm also not sure why PV timer events get routed to
irq0 -- why not via an event channel as usual, now that you are enabling HVM
guests to use the evtchn subsystem? What's a hybrid gnttab, and why does it
need an explciit reserved e820 region? And so on.

The general principle of these patches seems to be to create a set of
individual, and perhaps largely independent, accelerations/enlightenments to
the HVM interface. I can at least agree with and support that aim.

 -- Keir

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16  9:08 ` Keir Fraser
@ 2009-09-16 14:04   ` Keir Fraser
  2009-09-16 16:28     ` Nakajima, Jun
  2009-09-16 19:11   ` Frank van der Linden
  2009-09-17  6:47   ` Yang, Sheng
  2 siblings, 1 reply; 18+ messages in thread
From: Keir Fraser @ 2009-09-16 14:04 UTC (permalink / raw)
  To: Yang, Sheng; +Cc: Jeremy Fitzhardinge, xen-devel, Eddie Dong, Jun Nakajima

On 16/09/2009 10:08, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:

> The principle is okay I guess. These changes would have to be trickled in
> with a  really good explanation and justification for each one. For example,
> I'm not clear why the enable-hybrid hypercall is needed. Why not just
> provide access to evtchn and timer hypercalls always, and guest sues them if
> it is capable of it? I'm also not sure why PV timer events get routed to
> irq0 -- why not via an event channel as usual, now that you are enabling HVM
> guests to use the evtchn subsystem? What's a hybrid gnttab, and why does it
> need an explciit reserved e820 region? And so on.
> 
> The general principle of these patches seems to be to create a set of
> individual, and perhaps largely independent, accelerations/enlightenments to
> the HVM interface. I can at least agree with and support that aim.

By the way, if your intention is to speed up 64-bit guest performance, then
I think you should compare with running a full PV guest in a VMCS container.
That is runs in VMX non-root mode but still retains the usual full-PV
interfaces. I think that would be no more code than you are proposing here,
and would avoid scattering a bunch more code around the guest OS, to which
there is bound to be resistance.

 -- Keir

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: Xen: Hybrid extension patchset for hypervisor
  2009-09-16 14:04   ` Keir Fraser
@ 2009-09-16 16:28     ` Nakajima, Jun
  2009-09-16 18:19       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 18+ messages in thread
From: Nakajima, Jun @ 2009-09-16 16:28 UTC (permalink / raw)
  To: Keir Fraser, Yang, Sheng; +Cc: Dong, Eddie, Fitzhardinge, xen-devel, Jeremy

Keir Fraser wrote on Wed, 16 Sep 2009 at 07:04:10:

> On 16/09/2009 10:08, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:
> 
>> The principle is okay I guess. These changes would have to be trickled
>> in with a  really good explanation and justification for each one. For
>> example, I'm not clear why the enable-hybrid hypercall is needed. Why
>> not just provide access to evtchn and timer hypercalls always, and
>> guest sues them if it is capable of it? I'm also not sure why PV timer
>> events get routed to irq0 -- why not via an event channel as usual, now
>> that you are enabling HVM guests to use the evtchn subsystem? What's a
>> hybrid gnttab, and why does it need an explciit reserved e820 region?
>> And so on.
>> 
>> The general principle of these patches seems to be to create a set of
>> individual, and perhaps largely independent,
>> accelerations/enlightenments to the HVM interface. I can at least agree
>> with and support that aim.
>  By the way, if your intention is to speed up 64-bit guest performance,
> then I think you should compare with running a full PV guest in a VMCS
> container. That is runs in VMX non-root mode but still retains the usual
> full-PV interfaces. I think that would be no more code than you are
> proposing here, and would avoid scattering a bunch more code around the
> guest OS, to which there is bound to be resistance.

Do you mean running the existing 64-bit PV kernel binaries in a VMCS container?

Based on our data, what we would want in PV 64-bit guests are, fundamentally:
- have the kernel run in ring 0 (so that it can regain the performance enhancements)
- use hardware-based MMU virtualization (e.g. EPT-based) if present

> 
>  -- Keir
>

Jun
___
Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16 16:28     ` Nakajima, Jun
@ 2009-09-16 18:19       ` Jeremy Fitzhardinge
  2009-09-16 21:12         ` Ian Campbell
  0 siblings, 1 reply; 18+ messages in thread
From: Jeremy Fitzhardinge @ 2009-09-16 18:19 UTC (permalink / raw)
  To: Nakajima, Jun; +Cc: Yang, Sheng, xen-devel, Dong, Eddie, Keir Fraser

On 09/16/09 09:28, Nakajima, Jun wrote:
> Keir Fraser wrote on Wed, 16 Sep 2009 at 07:04:10:
>   
>>  By the way, if your intention is to speed up 64-bit guest performance,
>> then I think you should compare with running a full PV guest in a VMCS
>> container. That is runs in VMX non-root mode but still retains the usual
>> full-PV interfaces. I think that would be no more code than you are
>> proposing here, and would avoid scattering a bunch more code around the
>> guest OS, to which there is bound to be resistance.
>>     
> Do you mean running the existing 64-bit PV kernel binaries in a VMCS container?
>   

Yes.  I don't think there's any deep problem in doing that.

> Based on our data, what we would want in PV 64-bit guests are, fundamentally:
> - have the kernel run in ring 0 (so that it can regain the performance enhancements)
>   

That's no problem.  PV kernels don't currently assume they're running in
any particular ring, so they'd be happy to run in ring 0 if that's how
they're started (if there are problems, I'd consider that a bug).  We
could then check for ring 0 and enable syscall/sysenter.

> - use hardware-based MMU virtualization (e.g. EPT-based) if present
>   

We could do that with minimal API/ABI changes by:

    * Providing an identity p2m table
    * Changing the hypercall page to make pte writes simple memory
      writes (no hypercalls); xen would still keep track of pinned pages
      and trap'n'emulate on them for back-compatibility (but fast-path
      with no validation).  We could expose the presence of HAP via
      xen_features so that guests know they can avoid marking pagetables
      RO, etc.
    * Similarly, cr3 changes can be fast-pathed within the hypercall page.
    * Whatever else I've overlooked.

This would be very similar to how xenner gets PV guests running under kvm.

The tricky part might be in getting IO working, since it relies on
getting real MFNs for DMA.

At any rate, the changes need only be very localized within the
Xen-specific code.  I would not want to introduce a new or significantly
different kernel<->hypervisor ABI for any new modes (we have enough
already).

    J

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16  9:08 ` Keir Fraser
  2009-09-16 14:04   ` Keir Fraser
@ 2009-09-16 19:11   ` Frank van der Linden
  2009-09-17  6:13     ` Yang, Sheng
  2009-09-17  6:25     ` Keir Fraser
  2009-09-17  6:47   ` Yang, Sheng
  2 siblings, 2 replies; 18+ messages in thread
From: Frank van der Linden @ 2009-09-16 19:11 UTC (permalink / raw)
  To: Keir Fraser
  Cc: Yang, Sheng, Jeremy Fitzhardinge, xen-devel, Eddie Dong, Jun Nakajima

Keir Fraser wrote:
> On 16/09/2009 09:44, "Yang, Sheng" <sheng.yang@intel.com> wrote:
>
>   
>> Hi Keir & Jeremy
>>
>> Here is the hypervisor part of hybrid extension support.
>>
>> Please review, thanks!
>>     
>
> The principle is okay I guess. These changes would have to be trickled in
> with a  really good explanation and justification for each one. For example,
> I'm not clear why the enable-hybrid hypercall is needed. Why not just
> provide access to evtchn and timer hypercalls always, and guest sues them if
> it is capable of it? I'm also not sure why PV timer events get routed to
> irq0 -- why not via an event channel as usual, now that you are enabling HVM
> guests to use the evtchn subsystem? What's a hybrid gnttab, and why does it
> need an explciit reserved e820 region? And so on.
>
> The general principle of these patches seems to be to create a set of
> individual, and perhaps largely independent, accelerations/enlightenments to
> the HVM interface. I can at least agree with and support that aim.
>
>  -- Keir
>   

I did not see the hypervisor part of these patches appear in my 
xen-devel inbox. Is this a problem on my end, or were they not sent to 
the list? If so, I'm interested in them, so it'd be great if they could 
be sent to the list.

Thanks,

- Frank

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16 18:19       ` Jeremy Fitzhardinge
@ 2009-09-16 21:12         ` Ian Campbell
  2009-09-16 21:22           ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 18+ messages in thread
From: Ian Campbell @ 2009-09-16 21:12 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: xen-devel, Yang, Sheng, Dong, Eddie, Fraser, Nakajima, Jun

On Wed, 2009-09-16 at 19:19 +0100, Jeremy Fitzhardinge wrote: 
> On 09/16/09 09:28, Nakajima, Jun wrote:
> > Keir Fraser wrote on Wed, 16 Sep 2009 at 07:04:10:
> >   
> >>  By the way, if your intention is to speed up 64-bit guest performance,
> >> then I think you should compare with running a full PV guest in a VMCS
> >> container. That is runs in VMX non-root mode but still retains the usual
> >> full-PV interfaces. I think that would be no more code than you are
> >> proposing here, and would avoid scattering a bunch more code around the
> >> guest OS, to which there is bound to be resistance.
> >>     
> > Do you mean running the existing 64-bit PV kernel binaries in a VMCS container?
> >   
> 
> Yes.  I don't think there's any deep problem in doing that.
> 
> > Based on our data, what we would want in PV 64-bit guests are, fundamentally:
> > - have the kernel run in ring 0 (so that it can regain the performance enhancements)
> >   
> 
> That's no problem.  PV kernels don't currently assume they're running in
> any particular ring, so they'd be happy to run in ring 0 if that's how
> they're started (if there are problems, I'd consider that a bug).  We
> could then check for ring 0 and enable syscall/sysenter.

XENFEAT_supervisor_mode_kernel is supposed to enable this behaviour,
although it hasn't been actively used for several years and never in the
pvops kernel so you can bet it has bit-rotted...

> 
> > - use hardware-based MMU virtualization (e.g. EPT-based) if present
> >   
> 
> We could do that with minimal API/ABI changes by:
> 
>     * Providing an identity p2m table
>     * Changing the hypercall page to make pte writes simple memory
>       writes (no hypercalls); xen would still keep track of pinned pages
>       and trap'n'emulate on them for back-compatibility (but fast-path
>       with no validation).  We could expose the presence of HAP via
>       xen_features so that guests know they can avoid marking pagetables
>       RO, etc.
>     * Similarly, cr3 changes can be fast-pathed within the hypercall page.
>     * Whatever else I've overlooked.

Some combination of XENFEAT_writable_page_tables
XENFEAT_writable_descriptor_tables and XENFEAT_auto_translated_physmap
might be of interest for this bit.

Ian.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16 21:12         ` Ian Campbell
@ 2009-09-16 21:22           ` Jeremy Fitzhardinge
  2009-09-17  9:16             ` Ian Campbell
  0 siblings, 1 reply; 18+ messages in thread
From: Jeremy Fitzhardinge @ 2009-09-16 21:22 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Yang, Sheng, xen-devel, Dong, Eddie, Keir Fraser, Nakajima, Jun

On 09/16/09 14:12, Ian Campbell wrote:
>>> Based on our data, what we would want in PV 64-bit guests are, fundamentally:
>>> - have the kernel run in ring 0 (so that it can regain the performance enhancements)
>>>   
>>>       
>> That's no problem.  PV kernels don't currently assume they're running in
>> any particular ring, so they'd be happy to run in ring 0 if that's how
>> they're started (if there are problems, I'd consider that a bug).  We
>> could then check for ring 0 and enable syscall/sysenter.
>>     
> XENFEAT_supervisor_mode_kernel is supposed to enable this behaviour,
> although it hasn't been actively used for several years and never in the
> pvops kernel so you can bet it has bit-rotted...
>   

That tends to have a slightly different meaning, viz "dom0 really *is*
privileged and can do anything it feels like".  It isn't necessary to
have a specific feature/mechanism for "kernel happens to be in ring 0";
it can look at its own cs ring number.

>> We could do that with minimal API/ABI changes by:
>>
>>     * Providing an identity p2m table
>>     * Changing the hypercall page to make pte writes simple memory
>>       writes (no hypercalls); xen would still keep track of pinned pages
>>       and trap'n'emulate on them for back-compatibility (but fast-path
>>       with no validation).  We could expose the presence of HAP via
>>       xen_features so that guests know they can avoid marking pagetables
>>       RO, etc.
>>     * Similarly, cr3 changes can be fast-pathed within the hypercall page.
>>     * Whatever else I've overlooked.
>>     
> Some combination of XENFEAT_writable_page_tables
> XENFEAT_writable_descriptor_tables and XENFEAT_auto_translated_physmap
> might be of interest for this bit.

Making use of XENFEAT_auto_translated_physmap would avoid the need for
identity p2m/m2p tables, but I'm not sure whether it still works.  I got
close to completely removing all references to it at one point, but I
think ia64 uses it?

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16 19:11   ` Frank van der Linden
@ 2009-09-17  6:13     ` Yang, Sheng
  2009-09-17  6:25     ` Keir Fraser
  1 sibling, 0 replies; 18+ messages in thread
From: Yang, Sheng @ 2009-09-17  6:13 UTC (permalink / raw)
  To: Frank van der Linden
  Cc: Jeremy Fitzhardinge, xen-devel, Dong, Eddie, Keir Fraser, Nakajima, Jun

[-- Attachment #1: Type: text/plain, Size: 1497 bytes --]

On Thursday 17 September 2009 03:11:49 Frank van der Linden wrote:
> Keir Fraser wrote:
> > On 16/09/2009 09:44, "Yang, Sheng" <sheng.yang@intel.com> wrote:
> >> Hi Keir & Jeremy
> >>
> >> Here is the hypervisor part of hybrid extension support.
> >>
> >> Please review, thanks!
> >
> > The principle is okay I guess. These changes would have to be trickled in
> > with a  really good explanation and justification for each one. For
> > example, I'm not clear why the enable-hybrid hypercall is needed. Why not
> > just provide access to evtchn and timer hypercalls always, and guest sues
> > them if it is capable of it? I'm also not sure why PV timer events get
> > routed to irq0 -- why not via an event channel as usual, now that you are
> > enabling HVM guests to use the evtchn subsystem? What's a hybrid gnttab,
> > and why does it need an explciit reserved e820 region? And so on.
> >
> > The general principle of these patches seems to be to create a set of
> > individual, and perhaps largely independent, accelerations/enlightenments
> > to the HVM interface. I can at least agree with and support that aim.
> >
> >  -- Keir
>
> I did not see the hypervisor part of these patches appear in my
> xen-devel inbox. Is this a problem on my end, or were they not sent to
> the list? If so, I'm interested in them, so it'd be great if they could
> be sent to the list.
>
> Thanks,
>
> - Frank

It's quite strange...

Here is the patches(attached). Send them again...

-- 
regards
Yang, Sheng

[-- Attachment #2: 1.patch --]
[-- Type: text/x-patch, Size: 1674 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253081544 -28800
# Node ID f92ed10b9522a4dd83a5e31f7f1a8c83bd33d70d
# Parent  e5d904a6c9e6e585ec02124dc8b6592f525ef6ba
Add user controlled cpuid 0x40000002.edx

So that user can turn on/off hybrid feature through configuration file.

diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -659,6 +659,7 @@
     struct domain *d = current->domain;
     /* Optionally shift out of the way of Viridian architectural leaves. */
     uint32_t base = is_viridian_domain(d) ? 0x40000100 : 0x40000000;
+    unsigned int tmp_eax, tmp_ebx, tmp_ecx, tmp_edx;
 
     idx -= base;
     if ( idx > 2 ) 
@@ -689,6 +690,14 @@
         *edx = 0;          /* Features 2 */
         if ( !is_hvm_vcpu(current) )
             *ecx |= XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD;
+
+        /* Check if additional feature specified, e.g. Hybrid */
+        if ( !is_viridian_domain(d) ) {
+            domain_cpuid(d, 0x40000002, 0,
+                         &tmp_eax, &tmp_ebx, &tmp_ecx, &tmp_edx);
+            if (tmp_edx != 0)
+                *edx = tmp_edx & XEN_CPUID_FEAT2_MASK;
+        }
         break;
 
     default:
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -65,4 +65,7 @@
 #define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
+/* Mask unsupported CPUID specified by user */
+#define XEN_CPUID_FEAT2_MASK 0x0ul
+
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #3: 2.patch --]
[-- Type: text/x-patch, Size: 5339 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253085983 -28800
# Node ID 2d68b9d29b425f138345c7b9e0e8bc66b84c7ba1
# Parent  f92ed10b9522a4dd83a5e31f7f1a8c83bd33d70d
Add HVMOP_enable_hybrid hypercall

As the entry of hybrid.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2028,6 +2028,17 @@
     HYPERCALL(hvm_op)
 };
 
+static hvm_hypercall_t *hvm_hypercall_hybrid64_table[NR_hypercalls] = {
+    [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
+    [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
+    HYPERCALL(xen_version),
+    HYPERCALL(console_io),
+    HYPERCALL(vcpu_op),
+    HYPERCALL(sched_op),
+    HYPERCALL(event_channel_op),
+    HYPERCALL(hvm_op),
+};
+
 #endif /* defined(__x86_64__) */
 
 int hvm_do_hypercall(struct cpu_user_regs *regs)
@@ -2058,7 +2069,8 @@
     if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
         return viridian_hypercall(regs);
 
-    if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
+    if ( (eax >= NR_hypercalls) ||
+         (!hvm_hypercall32_table[eax] && !is_hybrid_vcpu(curr)) )
     {
         regs->eax = -ENOSYS;
         return HVM_HCALL_completed;
@@ -2073,11 +2085,18 @@
                     regs->rdi, regs->rsi, regs->rdx, regs->r10, regs->r8);
 
         this_cpu(hvm_64bit_hcall) = 1;
-        regs->rax = hvm_hypercall64_table[eax](regs->rdi,
-                                               regs->rsi,
-                                               regs->rdx,
-                                               regs->r10,
-                                               regs->r8); 
+        if (is_hybrid_vcpu(curr))
+            regs->rax = hvm_hypercall_hybrid64_table[eax](regs->rdi,
+                                                          regs->rsi,
+                                                          regs->rdx,
+                                                          regs->r10,
+                                                          regs->r8);
+        else
+            regs->rax = hvm_hypercall64_table[eax](regs->rdi,
+                                                   regs->rsi,
+                                                   regs->rdx,
+                                                   regs->r10,
+                                                   regs->r8);
         this_cpu(hvm_64bit_hcall) = 0;
     }
     else
@@ -2752,6 +2771,32 @@
         break;
     }
 
+    case HVMOP_enable_hybrid: {
+        struct xen_hvm_hybrid_type a;
+        struct domain *d;
+
+        if ( copy_from_guest(&a, arg, 1) )
+            return -EFAULT;
+
+        rc = rcu_lock_target_domain_by_id(a.domid, &d);
+        if ( rc != 0 )
+            return rc;
+
+        rc = -EINVAL;
+        if ( !is_hvm_domain(d) )
+            goto param_fail5;
+
+        rc = xsm_hvm_param(d, op);
+        if ( rc )
+            goto param_fail5;
+
+        d->hybrid_enabled = XEN_HYBRID_ENABLED;
+        printk("HVM: Hybrid domain enabled\n");
+    param_fail5:
+        rcu_unlock_domain(d);
+        break;
+    }
+
     default:
     {
         gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,6 +66,8 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x0ul
+#define XEN_CPUID_FEAT2_MASK 0x1ul
+#define _XEN_CPUID_FEAT2_HYBRID 0
+#define XEN_CPUID_FEAT2_HYBRID (1u<<0)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -125,6 +125,13 @@
 typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
 DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);
 
+#define HVMOP_enable_hybrid    9
+struct xen_hvm_hybrid_type {
+    domid_t domid;
+    uint64_t flags;
+#define HVM_HYBRID_TIMER (1ull<<1)
+#define HVM_HYBRID_EVTCHN (1ull<<2)
+};
 
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -269,6 +269,11 @@
 
     /* VRAM dirty support. */
     struct sh_dirty_vram *dirty_vram;
+
+#define XEN_HYBRID_ENABLED          (1u << 0)
+#define XEN_HYBRID_TIMER_ENABLED    (1u << 1)
+#define XEN_HYBRID_EVTCHN_ENABLED   (1u << 2)
+    uint64_t hybrid_enabled;
 };
 
 struct domain_setup_info
@@ -551,6 +556,12 @@
 #define is_hvm_domain(d) ((d)->is_hvm)
 #define is_hvm_vcpu(v)   (is_hvm_domain(v->domain))
 #define need_iommu(d)    ((d)->need_iommu && !(d)->is_hvm)
+#define is_hybrid_domain(d) ((d)->hybrid_enabled & XEN_HYBRID_ENABLED)
+#define is_hybrid_vcpu(v)   (is_hybrid_domain(v->domain))
+#define is_hybrid_timer_enabled_domain(d) (is_hybrid_domain(d) && \
+		(d)->hybrid_enabled & XEN_HYBRID_TIMER_ENABLED)
+#define is_hybrid_evtchn_enabled_domain(d) (is_hybrid_domain(d) && \
+		(d)->hybrid_enabled & XEN_HYBRID_EVTCHN_ENABLED)
 
 void set_vcpu_migration_delay(unsigned int delay);
 unsigned int get_vcpu_migration_delay(void);

[-- Attachment #4: 3.patch --]
[-- Type: text/x-patch, Size: 2173 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088144 -28800
# Node ID 91c520c2cefccc8c654839d9b38d04bd8801a391
# Parent  2d68b9d29b425f138345c7b9e0e8bc66b84c7ba1
Add pvtimer support for HVM

We need sync TSC with hypervisor, and update guest wallclock time if we want to
enable pvtimer. The timer interrupt delivered through IRQ0.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2792,6 +2792,11 @@
 
         d->hybrid_enabled = XEN_HYBRID_ENABLED;
         printk("HVM: Hybrid domain enabled\n");
+        if (a.flags & HVM_HYBRID_TIMER) {
+            hvm_funcs.set_tsc_offset(d->vcpu[0], 0);
+            update_domain_wallclock_time(d);
+            d->hybrid_enabled |= XEN_HYBRID_TIMER_ENABLED;
+        }
     param_fail5:
         rcu_unlock_domain(d);
         break;
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -30,6 +30,8 @@
 #include <asm/div64.h>
 #include <asm/hpet.h>
 #include <io_ports.h>
+
+#include <asm/hvm/irq.h>
 
 /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */
 static char opt_clocksource[10];
@@ -1323,6 +1325,11 @@
 void send_timer_event(struct vcpu *v)
 {
     send_guest_vcpu_virq(v, VIRQ_TIMER);
+    if (is_hybrid_timer_enabled_domain(v->domain) &&
+	!is_hybrid_evtchn_enabled_domain(v->domain)) {
+        hvm_isa_irq_deassert(v->domain, 0);
+        hvm_isa_irq_assert(v->domain, 0);
+    }
 }
 
 /* Return secs after 00:00:00 localtime, 1 January, 1970. */
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,8 +66,10 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x1ul
+#define XEN_CPUID_FEAT2_MASK 0x3ul
 #define _XEN_CPUID_FEAT2_HYBRID 0
 #define XEN_CPUID_FEAT2_HYBRID (1u<<0)
+#define _XEN_CPUID_FEAT2_HYBRID_TIMER 1
+#define XEN_CPUID_FEAT2_HYBRID_TIMER (1u<<1)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #5: 4.patch --]
[-- Type: text/x-patch, Size: 4153 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088147 -28800
# Node ID 78e51528eba7c5fd328c90acd4edc58ba46b58e8
# Parent  91c520c2cefccc8c654839d9b38d04bd8801a391
Add a new type of HVM callback

HVMIRQ_callback_vector specific one vector to trigger irq handler in the guest.
And it don't need EOI action for it use IDT directly.

diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -165,6 +165,8 @@
             __hvm_pci_intx_assert(d, pdev, pintx);
         else
             __hvm_pci_intx_deassert(d, pdev, pintx);
+    case HVMIRQ_callback_vector:
+        vcpu_kick(v);
     default:
         break;
     }
@@ -251,7 +253,7 @@
 
     via_type = (uint8_t)(via >> 56) + 1;
     if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) ||
-         (via_type > HVMIRQ_callback_pci_intx) )
+         (via_type > HVMIRQ_callback_vector) )
         via_type = HVMIRQ_callback_none;
 
     spin_lock(&d->arch.hvm_domain.irq_lock);
@@ -297,6 +299,9 @@
         if ( hvm_irq->callback_via_asserted )
              __hvm_pci_intx_assert(d, pdev, pintx);
         break;
+    case HVMIRQ_callback_vector:
+        hvm_irq->callback_via.vector = (uint8_t)via;
+        break;
     default:
         break;
     }
@@ -312,6 +317,10 @@
     case HVMIRQ_callback_pci_intx:
         printk("PCI INTx Dev 0x%02x Int%c\n", pdev, 'A' + pintx);
         break;
+    case HVMIRQ_callback_vector:
+        printk("Set HVMIRQ_callback_vector to %u\n",
+               hvm_irq->callback_via.vector);
+        break;
     default:
         printk("None\n");
         break;
@@ -322,6 +331,10 @@
 {
     struct hvm_domain *plat = &v->domain->arch.hvm_domain;
     int vector;
+
+    if (plat->irq.callback_via_type == HVMIRQ_callback_vector &&
+            vcpu_info(v, evtchn_upcall_pending))
+        return hvm_intack_vector(plat->irq.callback_via.vector);
 
     if ( unlikely(v->nmi_pending) )
         return hvm_intack_nmi;
@@ -357,6 +370,8 @@
     case hvm_intsrc_lapic:
         if ( !vlapic_ack_pending_irq(v, intack.vector) )
             intack = hvm_intack_none;
+        break;
+    case hvm_intsrc_vector:
         break;
     default:
         intack = hvm_intack_none;
diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -161,7 +161,8 @@
     {
         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         vmx_inject_extint(intack.vector);
-        pt_intr_post(v, intack);
+        if (intack.source != hvm_intsrc_vector)
+             pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -31,7 +31,8 @@
     hvm_intsrc_none,
     hvm_intsrc_pic,
     hvm_intsrc_lapic,
-    hvm_intsrc_nmi
+    hvm_intsrc_nmi,
+    hvm_intsrc_vector,
 };
 struct hvm_intack {
     uint8_t source; /* enum hvm_intsrc */
@@ -41,6 +42,7 @@
 #define hvm_intack_pic(vec)   ( (struct hvm_intack) { hvm_intsrc_pic,   vec } )
 #define hvm_intack_lapic(vec) ( (struct hvm_intack) { hvm_intsrc_lapic, vec } )
 #define hvm_intack_nmi        ( (struct hvm_intack) { hvm_intsrc_nmi,   2 } )
+#define hvm_intack_vector(vec)( (struct hvm_intack) { hvm_intsrc_vector, vec } )
 enum hvm_intblk {
     hvm_intblk_none,      /* not blocked (deliverable) */
     hvm_intblk_shadow,    /* MOV-SS or STI shadow */
diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -54,12 +54,14 @@
         enum {
             HVMIRQ_callback_none,
             HVMIRQ_callback_gsi,
-            HVMIRQ_callback_pci_intx
+            HVMIRQ_callback_pci_intx,
+            HVMIRQ_callback_vector,
         } callback_via_type;
     };
     union {
         uint32_t gsi;
         struct { uint8_t dev, intx; } pci;
+        uint32_t vector;
     } callback_via;
 
     /* Number of INTx wires asserting each PCI-ISA link. */

[-- Attachment #6: 5.patch --]
[-- Type: text/x-patch, Size: 3721 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088148 -28800
# Node ID 183fa85d47d9411e6e21c4534d8e231feeac96a7
# Parent  78e51528eba7c5fd328c90acd4edc58ba46b58e8
Enable event channel and QEmu device support for hybrid guest

Each VIRQ from 24 to 40 binding to a QEmu emulated pin, so that if device
assert the pin, one VIRQ would be delivered to the guest instead.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2797,6 +2797,8 @@
             update_domain_wallclock_time(d);
             d->hybrid_enabled |= XEN_HYBRID_TIMER_ENABLED;
         }
+        if (a.flags & HVM_HYBRID_EVTCHN)
+            d->hybrid_enabled |= XEN_HYBRID_EVTCHN_ENABLED;
     param_fail5:
         rcu_unlock_domain(d);
         break;
diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -46,8 +46,18 @@
     if ( (hvm_irq->pci_link_assert_count[link]++ == 0) && isa_irq &&
          (hvm_irq->gsi_assert_count[isa_irq]++ == 0) )
     {
-        vioapic_irq_positive_edge(d, isa_irq);
-        vpic_irq_positive_edge(d, isa_irq);
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+        {
+            vioapic_irq_positive_edge(d, isa_irq);
+            vpic_irq_positive_edge(d, isa_irq);
+        }
+        else
+        {
+            /* TODO fix the critical region here */
+            spin_unlock(&d->arch.hvm_domain.irq_lock);
+            send_guest_global_virq(d, VIRQ_EMUL_PIN(isa_irq));
+            spin_lock(&d->arch.hvm_domain.irq_lock);
+	}
     }
 }
 
@@ -76,8 +86,10 @@
     link    = hvm_pci_intx_link(device, intx);
     isa_irq = hvm_irq->pci_link.route[link];
     if ( (--hvm_irq->pci_link_assert_count[link] == 0) && isa_irq &&
-         (--hvm_irq->gsi_assert_count[isa_irq] == 0) )
-        vpic_irq_negative_edge(d, isa_irq);
+         (--hvm_irq->gsi_assert_count[isa_irq] == 0) ) {
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+            vpic_irq_negative_edge(d, isa_irq);
+    }
 }
 
 void hvm_pci_intx_deassert(
@@ -93,6 +105,7 @@
 {
     struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+    int send_virq = 0;
 
     ASSERT(isa_irq <= 15);
 
@@ -101,11 +114,21 @@
     if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) &&
          (hvm_irq->gsi_assert_count[gsi]++ == 0) )
     {
-        vioapic_irq_positive_edge(d, gsi);
-        vpic_irq_positive_edge(d, isa_irq);
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+        {
+            vioapic_irq_positive_edge(d, gsi);
+            vpic_irq_positive_edge(d, isa_irq);
+        }
+        else
+        {
+            send_virq = 1;
+        }
     }
 
     spin_unlock(&d->arch.hvm_domain.irq_lock);
+
+    if (send_virq)
+	    send_guest_global_virq(d, VIRQ_EMUL_PIN(isa_irq));
 }
 
 void hvm_isa_irq_deassert(
@@ -120,7 +143,10 @@
 
     if ( __test_and_clear_bit(isa_irq, &hvm_irq->isa_irq.i) &&
          (--hvm_irq->gsi_assert_count[gsi] == 0) )
-        vpic_irq_negative_edge(d, isa_irq);
+    {
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+            vpic_irq_negative_edge(d, isa_irq);
+    }
 
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -159,7 +159,12 @@
 #define VIRQ_ARCH_6    22
 #define VIRQ_ARCH_7    23
 
-#define NR_VIRQS       24
+#define VIRQ_EMUL_PIN_START 24
+#define VIRQ_EMUL_PIN_END 39
+#define VIRQ_EMUL_PIN_NUM 16
+#define VIRQ_EMUL_PIN(x) (VIRQ_EMUL_PIN_START + x)
+
+#define NR_VIRQS       40
 
 /*
  * MMU-UPDATE REQUESTS

[-- Attachment #7: 6.patch --]
[-- Type: text/x-patch, Size: 1402 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088149 -28800
# Node ID 86c908c45ac908767b6b6bdbd9e8d863b34a84a8
# Parent  183fa85d47d9411e6e21c4534d8e231feeac96a7
Reserved E820 for hybrid gnttab support

diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
--- a/tools/firmware/hvmloader/config.h
+++ b/tools/firmware/hvmloader/config.h
@@ -16,8 +16,12 @@
 
 /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
 #define PCI_MEM_START       0xf0000000
-#define PCI_MEM_END         0xfc000000
+#define PCI_MEM_END         0xfbfe0000
 extern unsigned long pci_mem_start, pci_mem_end;
+
+/* Reserve 128KB for grant table */
+#define GNTTAB_MEMBASE	    0xfbfe0000
+#define GNTTAB_MEMSIZE	    0x20000
 
 /* We reserve 16MB for special BIOS mappings, etc. */
 #define RESERVED_MEMBASE    0xfc000000
diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -602,6 +602,12 @@
     e820[nr].type = E820_RAM;
     nr++;
 
+    /* Reserved for grant table */
+    e820[nr].addr = GNTTAB_MEMBASE;
+    e820[nr].size = GNTTAB_MEMSIZE;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
     /*
      * Explicitly reserve space for special pages.
      * This space starts at RESERVED_MEMBASE an extends to cover various

[-- Attachment #8: 7.patch --]
[-- Type: text/x-patch, Size: 924 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088151 -28800
# Node ID 607b7ad0488a9db6202e0a6178027684ac0eb027
# Parent  86c908c45ac908767b6b6bdbd9e8d863b34a84a8
Enable event channel feature in CPUID for configuration file

diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,10 +66,12 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x3ul
+#define XEN_CPUID_FEAT2_MASK 0x7ul
 #define _XEN_CPUID_FEAT2_HYBRID 0
 #define XEN_CPUID_FEAT2_HYBRID (1u<<0)
 #define _XEN_CPUID_FEAT2_HYBRID_TIMER 1
 #define XEN_CPUID_FEAT2_HYBRID_TIMER (1u<<1)
+#define _XEN_CPUID_FEAT2_HYBRID_EVTCHN 2
+#define XEN_CPUID_FEAT2_HYBRID_EVTCHN (1u<<2)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #9: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16 19:11   ` Frank van der Linden
  2009-09-17  6:13     ` Yang, Sheng
@ 2009-09-17  6:25     ` Keir Fraser
  2009-09-17  6:30       ` Sheng Yang
  1 sibling, 1 reply; 18+ messages in thread
From: Keir Fraser @ 2009-09-17  6:25 UTC (permalink / raw)
  To: Frank van der Linden
  Cc: Yang, Sheng, Jeremy Fitzhardinge, xen-devel, Eddie Dong, Jun Nakajima

On 16/09/2009 20:11, "Frank van der Linden" <Frank.Vanderlinden@Sun.COM>
wrote:

> I did not see the hypervisor part of these patches appear in my
> xen-devel inbox. Is this a problem on my end, or were they not sent to
> the list? If so, I'm interested in them, so it'd be great if they could
> be sent to the list.

They were sent to the list with the subject heading "Xen: Hybrid extension
patchset for hypervisor".

 -- Keir

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-17  6:25     ` Keir Fraser
@ 2009-09-17  6:30       ` Sheng Yang
  0 siblings, 0 replies; 18+ messages in thread
From: Sheng Yang @ 2009-09-17  6:30 UTC (permalink / raw)
  To: Keir Fraser
  Cc: Frank van der Linden, Jeremy Fitzhardinge, xen-devel, Dong,
	Eddie, Nakajima, Jun

[-- Attachment #1: Type: text/plain, Size: 744 bytes --]

On Thursday 17 September 2009 14:25:52 Keir Fraser wrote:
> On 16/09/2009 20:11, "Frank van der Linden" <Frank.Vanderlinden@Sun.COM>
>
> wrote:
> > I did not see the hypervisor part of these patches appear in my
> > xen-devel inbox. Is this a problem on my end, or were they not sent to
> > the list? If so, I'm interested in them, so it'd be great if they could
> > be sent to the list.
>
> They were sent to the list with the subject heading "Xen: Hybrid extension
> patchset for hypervisor".
>
>  -- Keir

I found I can't find them(also the last one I sent, with attached patches) in 
the http://news.gmane.org/gmane.comp.emulators.xen.devel

Seems something bad happened in the mailserver? Try another mailbox... 

-- 
regards
Yang, Sheng


[-- Attachment #2: 1.patch --]
[-- Type: text/x-patch, Size: 1674 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253081544 -28800
# Node ID f92ed10b9522a4dd83a5e31f7f1a8c83bd33d70d
# Parent  e5d904a6c9e6e585ec02124dc8b6592f525ef6ba
Add user controlled cpuid 0x40000002.edx

So that user can turn on/off hybrid feature through configuration file.

diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -659,6 +659,7 @@
     struct domain *d = current->domain;
     /* Optionally shift out of the way of Viridian architectural leaves. */
     uint32_t base = is_viridian_domain(d) ? 0x40000100 : 0x40000000;
+    unsigned int tmp_eax, tmp_ebx, tmp_ecx, tmp_edx;
 
     idx -= base;
     if ( idx > 2 ) 
@@ -689,6 +690,14 @@
         *edx = 0;          /* Features 2 */
         if ( !is_hvm_vcpu(current) )
             *ecx |= XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD;
+
+        /* Check if additional feature specified, e.g. Hybrid */
+        if ( !is_viridian_domain(d) ) {
+            domain_cpuid(d, 0x40000002, 0,
+                         &tmp_eax, &tmp_ebx, &tmp_ecx, &tmp_edx);
+            if (tmp_edx != 0)
+                *edx = tmp_edx & XEN_CPUID_FEAT2_MASK;
+        }
         break;
 
     default:
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -65,4 +65,7 @@
 #define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
+/* Mask unsupported CPUID specified by user */
+#define XEN_CPUID_FEAT2_MASK 0x0ul
+
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #3: 2.patch --]
[-- Type: text/x-patch, Size: 5339 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253085983 -28800
# Node ID 2d68b9d29b425f138345c7b9e0e8bc66b84c7ba1
# Parent  f92ed10b9522a4dd83a5e31f7f1a8c83bd33d70d
Add HVMOP_enable_hybrid hypercall

As the entry of hybrid.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2028,6 +2028,17 @@
     HYPERCALL(hvm_op)
 };
 
+static hvm_hypercall_t *hvm_hypercall_hybrid64_table[NR_hypercalls] = {
+    [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
+    [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
+    HYPERCALL(xen_version),
+    HYPERCALL(console_io),
+    HYPERCALL(vcpu_op),
+    HYPERCALL(sched_op),
+    HYPERCALL(event_channel_op),
+    HYPERCALL(hvm_op),
+};
+
 #endif /* defined(__x86_64__) */
 
 int hvm_do_hypercall(struct cpu_user_regs *regs)
@@ -2058,7 +2069,8 @@
     if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
         return viridian_hypercall(regs);
 
-    if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
+    if ( (eax >= NR_hypercalls) ||
+         (!hvm_hypercall32_table[eax] && !is_hybrid_vcpu(curr)) )
     {
         regs->eax = -ENOSYS;
         return HVM_HCALL_completed;
@@ -2073,11 +2085,18 @@
                     regs->rdi, regs->rsi, regs->rdx, regs->r10, regs->r8);
 
         this_cpu(hvm_64bit_hcall) = 1;
-        regs->rax = hvm_hypercall64_table[eax](regs->rdi,
-                                               regs->rsi,
-                                               regs->rdx,
-                                               regs->r10,
-                                               regs->r8); 
+        if (is_hybrid_vcpu(curr))
+            regs->rax = hvm_hypercall_hybrid64_table[eax](regs->rdi,
+                                                          regs->rsi,
+                                                          regs->rdx,
+                                                          regs->r10,
+                                                          regs->r8);
+        else
+            regs->rax = hvm_hypercall64_table[eax](regs->rdi,
+                                                   regs->rsi,
+                                                   regs->rdx,
+                                                   regs->r10,
+                                                   regs->r8);
         this_cpu(hvm_64bit_hcall) = 0;
     }
     else
@@ -2752,6 +2771,32 @@
         break;
     }
 
+    case HVMOP_enable_hybrid: {
+        struct xen_hvm_hybrid_type a;
+        struct domain *d;
+
+        if ( copy_from_guest(&a, arg, 1) )
+            return -EFAULT;
+
+        rc = rcu_lock_target_domain_by_id(a.domid, &d);
+        if ( rc != 0 )
+            return rc;
+
+        rc = -EINVAL;
+        if ( !is_hvm_domain(d) )
+            goto param_fail5;
+
+        rc = xsm_hvm_param(d, op);
+        if ( rc )
+            goto param_fail5;
+
+        d->hybrid_enabled = XEN_HYBRID_ENABLED;
+        printk("HVM: Hybrid domain enabled\n");
+    param_fail5:
+        rcu_unlock_domain(d);
+        break;
+    }
+
     default:
     {
         gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,6 +66,8 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x0ul
+#define XEN_CPUID_FEAT2_MASK 0x1ul
+#define _XEN_CPUID_FEAT2_HYBRID 0
+#define XEN_CPUID_FEAT2_HYBRID (1u<<0)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -125,6 +125,13 @@
 typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
 DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);
 
+#define HVMOP_enable_hybrid    9
+struct xen_hvm_hybrid_type {
+    domid_t domid;
+    uint64_t flags;
+#define HVM_HYBRID_TIMER (1ull<<1)
+#define HVM_HYBRID_EVTCHN (1ull<<2)
+};
 
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -269,6 +269,11 @@
 
     /* VRAM dirty support. */
     struct sh_dirty_vram *dirty_vram;
+
+#define XEN_HYBRID_ENABLED          (1u << 0)
+#define XEN_HYBRID_TIMER_ENABLED    (1u << 1)
+#define XEN_HYBRID_EVTCHN_ENABLED   (1u << 2)
+    uint64_t hybrid_enabled;
 };
 
 struct domain_setup_info
@@ -551,6 +556,12 @@
 #define is_hvm_domain(d) ((d)->is_hvm)
 #define is_hvm_vcpu(v)   (is_hvm_domain(v->domain))
 #define need_iommu(d)    ((d)->need_iommu && !(d)->is_hvm)
+#define is_hybrid_domain(d) ((d)->hybrid_enabled & XEN_HYBRID_ENABLED)
+#define is_hybrid_vcpu(v)   (is_hybrid_domain(v->domain))
+#define is_hybrid_timer_enabled_domain(d) (is_hybrid_domain(d) && \
+		(d)->hybrid_enabled & XEN_HYBRID_TIMER_ENABLED)
+#define is_hybrid_evtchn_enabled_domain(d) (is_hybrid_domain(d) && \
+		(d)->hybrid_enabled & XEN_HYBRID_EVTCHN_ENABLED)
 
 void set_vcpu_migration_delay(unsigned int delay);
 unsigned int get_vcpu_migration_delay(void);

[-- Attachment #4: 3.patch --]
[-- Type: text/x-patch, Size: 2173 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088144 -28800
# Node ID 91c520c2cefccc8c654839d9b38d04bd8801a391
# Parent  2d68b9d29b425f138345c7b9e0e8bc66b84c7ba1
Add pvtimer support for HVM

We need sync TSC with hypervisor, and update guest wallclock time if we want to
enable pvtimer. The timer interrupt delivered through IRQ0.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2792,6 +2792,11 @@
 
         d->hybrid_enabled = XEN_HYBRID_ENABLED;
         printk("HVM: Hybrid domain enabled\n");
+        if (a.flags & HVM_HYBRID_TIMER) {
+            hvm_funcs.set_tsc_offset(d->vcpu[0], 0);
+            update_domain_wallclock_time(d);
+            d->hybrid_enabled |= XEN_HYBRID_TIMER_ENABLED;
+        }
     param_fail5:
         rcu_unlock_domain(d);
         break;
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -30,6 +30,8 @@
 #include <asm/div64.h>
 #include <asm/hpet.h>
 #include <io_ports.h>
+
+#include <asm/hvm/irq.h>
 
 /* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */
 static char opt_clocksource[10];
@@ -1323,6 +1325,11 @@
 void send_timer_event(struct vcpu *v)
 {
     send_guest_vcpu_virq(v, VIRQ_TIMER);
+    if (is_hybrid_timer_enabled_domain(v->domain) &&
+	!is_hybrid_evtchn_enabled_domain(v->domain)) {
+        hvm_isa_irq_deassert(v->domain, 0);
+        hvm_isa_irq_assert(v->domain, 0);
+    }
 }
 
 /* Return secs after 00:00:00 localtime, 1 January, 1970. */
diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,8 +66,10 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x1ul
+#define XEN_CPUID_FEAT2_MASK 0x3ul
 #define _XEN_CPUID_FEAT2_HYBRID 0
 #define XEN_CPUID_FEAT2_HYBRID (1u<<0)
+#define _XEN_CPUID_FEAT2_HYBRID_TIMER 1
+#define XEN_CPUID_FEAT2_HYBRID_TIMER (1u<<1)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #5: 4.patch --]
[-- Type: text/x-patch, Size: 4153 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088147 -28800
# Node ID 78e51528eba7c5fd328c90acd4edc58ba46b58e8
# Parent  91c520c2cefccc8c654839d9b38d04bd8801a391
Add a new type of HVM callback

HVMIRQ_callback_vector specific one vector to trigger irq handler in the guest.
And it don't need EOI action for it use IDT directly.

diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -165,6 +165,8 @@
             __hvm_pci_intx_assert(d, pdev, pintx);
         else
             __hvm_pci_intx_deassert(d, pdev, pintx);
+    case HVMIRQ_callback_vector:
+        vcpu_kick(v);
     default:
         break;
     }
@@ -251,7 +253,7 @@
 
     via_type = (uint8_t)(via >> 56) + 1;
     if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) ||
-         (via_type > HVMIRQ_callback_pci_intx) )
+         (via_type > HVMIRQ_callback_vector) )
         via_type = HVMIRQ_callback_none;
 
     spin_lock(&d->arch.hvm_domain.irq_lock);
@@ -297,6 +299,9 @@
         if ( hvm_irq->callback_via_asserted )
              __hvm_pci_intx_assert(d, pdev, pintx);
         break;
+    case HVMIRQ_callback_vector:
+        hvm_irq->callback_via.vector = (uint8_t)via;
+        break;
     default:
         break;
     }
@@ -312,6 +317,10 @@
     case HVMIRQ_callback_pci_intx:
         printk("PCI INTx Dev 0x%02x Int%c\n", pdev, 'A' + pintx);
         break;
+    case HVMIRQ_callback_vector:
+        printk("Set HVMIRQ_callback_vector to %u\n",
+               hvm_irq->callback_via.vector);
+        break;
     default:
         printk("None\n");
         break;
@@ -322,6 +331,10 @@
 {
     struct hvm_domain *plat = &v->domain->arch.hvm_domain;
     int vector;
+
+    if (plat->irq.callback_via_type == HVMIRQ_callback_vector &&
+            vcpu_info(v, evtchn_upcall_pending))
+        return hvm_intack_vector(plat->irq.callback_via.vector);
 
     if ( unlikely(v->nmi_pending) )
         return hvm_intack_nmi;
@@ -357,6 +370,8 @@
     case hvm_intsrc_lapic:
         if ( !vlapic_ack_pending_irq(v, intack.vector) )
             intack = hvm_intack_none;
+        break;
+    case hvm_intsrc_vector:
         break;
     default:
         intack = hvm_intack_none;
diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -161,7 +161,8 @@
     {
         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         vmx_inject_extint(intack.vector);
-        pt_intr_post(v, intack);
+        if (intack.source != hvm_intsrc_vector)
+             pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -31,7 +31,8 @@
     hvm_intsrc_none,
     hvm_intsrc_pic,
     hvm_intsrc_lapic,
-    hvm_intsrc_nmi
+    hvm_intsrc_nmi,
+    hvm_intsrc_vector,
 };
 struct hvm_intack {
     uint8_t source; /* enum hvm_intsrc */
@@ -41,6 +42,7 @@
 #define hvm_intack_pic(vec)   ( (struct hvm_intack) { hvm_intsrc_pic,   vec } )
 #define hvm_intack_lapic(vec) ( (struct hvm_intack) { hvm_intsrc_lapic, vec } )
 #define hvm_intack_nmi        ( (struct hvm_intack) { hvm_intsrc_nmi,   2 } )
+#define hvm_intack_vector(vec)( (struct hvm_intack) { hvm_intsrc_vector, vec } )
 enum hvm_intblk {
     hvm_intblk_none,      /* not blocked (deliverable) */
     hvm_intblk_shadow,    /* MOV-SS or STI shadow */
diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -54,12 +54,14 @@
         enum {
             HVMIRQ_callback_none,
             HVMIRQ_callback_gsi,
-            HVMIRQ_callback_pci_intx
+            HVMIRQ_callback_pci_intx,
+            HVMIRQ_callback_vector,
         } callback_via_type;
     };
     union {
         uint32_t gsi;
         struct { uint8_t dev, intx; } pci;
+        uint32_t vector;
     } callback_via;
 
     /* Number of INTx wires asserting each PCI-ISA link. */

[-- Attachment #6: 5.patch --]
[-- Type: text/x-patch, Size: 3721 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088148 -28800
# Node ID 183fa85d47d9411e6e21c4534d8e231feeac96a7
# Parent  78e51528eba7c5fd328c90acd4edc58ba46b58e8
Enable event channel and QEmu device support for hybrid guest

Each VIRQ from 24 to 40 binding to a QEmu emulated pin, so that if device
assert the pin, one VIRQ would be delivered to the guest instead.

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2797,6 +2797,8 @@
             update_domain_wallclock_time(d);
             d->hybrid_enabled |= XEN_HYBRID_TIMER_ENABLED;
         }
+        if (a.flags & HVM_HYBRID_EVTCHN)
+            d->hybrid_enabled |= XEN_HYBRID_EVTCHN_ENABLED;
     param_fail5:
         rcu_unlock_domain(d);
         break;
diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -46,8 +46,18 @@
     if ( (hvm_irq->pci_link_assert_count[link]++ == 0) && isa_irq &&
          (hvm_irq->gsi_assert_count[isa_irq]++ == 0) )
     {
-        vioapic_irq_positive_edge(d, isa_irq);
-        vpic_irq_positive_edge(d, isa_irq);
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+        {
+            vioapic_irq_positive_edge(d, isa_irq);
+            vpic_irq_positive_edge(d, isa_irq);
+        }
+        else
+        {
+            /* TODO fix the critical region here */
+            spin_unlock(&d->arch.hvm_domain.irq_lock);
+            send_guest_global_virq(d, VIRQ_EMUL_PIN(isa_irq));
+            spin_lock(&d->arch.hvm_domain.irq_lock);
+	}
     }
 }
 
@@ -76,8 +86,10 @@
     link    = hvm_pci_intx_link(device, intx);
     isa_irq = hvm_irq->pci_link.route[link];
     if ( (--hvm_irq->pci_link_assert_count[link] == 0) && isa_irq &&
-         (--hvm_irq->gsi_assert_count[isa_irq] == 0) )
-        vpic_irq_negative_edge(d, isa_irq);
+         (--hvm_irq->gsi_assert_count[isa_irq] == 0) ) {
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+            vpic_irq_negative_edge(d, isa_irq);
+    }
 }
 
 void hvm_pci_intx_deassert(
@@ -93,6 +105,7 @@
 {
     struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+    int send_virq = 0;
 
     ASSERT(isa_irq <= 15);
 
@@ -101,11 +114,21 @@
     if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) &&
          (hvm_irq->gsi_assert_count[gsi]++ == 0) )
     {
-        vioapic_irq_positive_edge(d, gsi);
-        vpic_irq_positive_edge(d, isa_irq);
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+        {
+            vioapic_irq_positive_edge(d, gsi);
+            vpic_irq_positive_edge(d, isa_irq);
+        }
+        else
+        {
+            send_virq = 1;
+        }
     }
 
     spin_unlock(&d->arch.hvm_domain.irq_lock);
+
+    if (send_virq)
+	    send_guest_global_virq(d, VIRQ_EMUL_PIN(isa_irq));
 }
 
 void hvm_isa_irq_deassert(
@@ -120,7 +143,10 @@
 
     if ( __test_and_clear_bit(isa_irq, &hvm_irq->isa_irq.i) &&
          (--hvm_irq->gsi_assert_count[gsi] == 0) )
-        vpic_irq_negative_edge(d, isa_irq);
+    {
+        if ( !is_hybrid_evtchn_enabled_domain(d) )
+            vpic_irq_negative_edge(d, isa_irq);
+    }
 
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -159,7 +159,12 @@
 #define VIRQ_ARCH_6    22
 #define VIRQ_ARCH_7    23
 
-#define NR_VIRQS       24
+#define VIRQ_EMUL_PIN_START 24
+#define VIRQ_EMUL_PIN_END 39
+#define VIRQ_EMUL_PIN_NUM 16
+#define VIRQ_EMUL_PIN(x) (VIRQ_EMUL_PIN_START + x)
+
+#define NR_VIRQS       40
 
 /*
  * MMU-UPDATE REQUESTS

[-- Attachment #7: 6.patch --]
[-- Type: text/x-patch, Size: 1402 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088149 -28800
# Node ID 86c908c45ac908767b6b6bdbd9e8d863b34a84a8
# Parent  183fa85d47d9411e6e21c4534d8e231feeac96a7
Reserved E820 for hybrid gnttab support

diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
--- a/tools/firmware/hvmloader/config.h
+++ b/tools/firmware/hvmloader/config.h
@@ -16,8 +16,12 @@
 
 /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
 #define PCI_MEM_START       0xf0000000
-#define PCI_MEM_END         0xfc000000
+#define PCI_MEM_END         0xfbfe0000
 extern unsigned long pci_mem_start, pci_mem_end;
+
+/* Reserve 128KB for grant table */
+#define GNTTAB_MEMBASE	    0xfbfe0000
+#define GNTTAB_MEMSIZE	    0x20000
 
 /* We reserve 16MB for special BIOS mappings, etc. */
 #define RESERVED_MEMBASE    0xfc000000
diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -602,6 +602,12 @@
     e820[nr].type = E820_RAM;
     nr++;
 
+    /* Reserved for grant table */
+    e820[nr].addr = GNTTAB_MEMBASE;
+    e820[nr].size = GNTTAB_MEMSIZE;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
     /*
      * Explicitly reserve space for special pages.
      * This space starts at RESERVED_MEMBASE an extends to cover various

[-- Attachment #8: 7.patch --]
[-- Type: text/x-patch, Size: 924 bytes --]

# HG changeset patch
# User Sheng Yang <sheng@linux.intel.com>
# Date 1253088151 -28800
# Node ID 607b7ad0488a9db6202e0a6178027684ac0eb027
# Parent  86c908c45ac908767b6b6bdbd9e8d863b34a84a8
Enable event channel feature in CPUID for configuration file

diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
--- a/xen/include/public/arch-x86/cpuid.h
+++ b/xen/include/public/arch-x86/cpuid.h
@@ -66,10 +66,12 @@
 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
 
 /* Mask unsupported CPUID specified by user */
-#define XEN_CPUID_FEAT2_MASK 0x3ul
+#define XEN_CPUID_FEAT2_MASK 0x7ul
 #define _XEN_CPUID_FEAT2_HYBRID 0
 #define XEN_CPUID_FEAT2_HYBRID (1u<<0)
 #define _XEN_CPUID_FEAT2_HYBRID_TIMER 1
 #define XEN_CPUID_FEAT2_HYBRID_TIMER (1u<<1)
+#define _XEN_CPUID_FEAT2_HYBRID_EVTCHN 2
+#define XEN_CPUID_FEAT2_HYBRID_EVTCHN (1u<<2)
 
 #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

[-- Attachment #9: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16  9:08 ` Keir Fraser
  2009-09-16 14:04   ` Keir Fraser
  2009-09-16 19:11   ` Frank van der Linden
@ 2009-09-17  6:47   ` Yang, Sheng
  2 siblings, 0 replies; 18+ messages in thread
From: Yang, Sheng @ 2009-09-17  6:47 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Jeremy Fitzhardinge, xen-devel, Dong, Eddie, Nakajima, Jun

On Wednesday 16 September 2009 17:08:20 Keir Fraser wrote:
> On 16/09/2009 09:44, "Yang, Sheng" <sheng.yang@intel.com> wrote:
> > Hi Keir & Jeremy
> >
> > Here is the hypervisor part of hybrid extension support.
> >
> > Please review, thanks!
>
> The principle is okay I guess. These changes would have to be trickled in
> with a  really good explanation and justification for each one. 
>
> For
> example, I'm not clear why the enable-hybrid hypercall is needed. Why not
> just provide access to evtchn and timer hypercalls always, and guest sues
> them if it is capable of it? 

We have purposed a component independence approach, that means user can enable 
PV timer or evtchn separately. Currently we have some limit with event channel 
implementation, e.g. no passthrough device support, and SMP is also not ready 
at this time(but in progress). (And I think there would be some version issue 
later, if we support more features).

The enable-hybrid hypercall is there because we can do adjust some hypervisor 
behaviour if we know guest would be hybrid rather than hvm or pv. For example, 
HVM assume TSC is start from 0, but pv timer assume TSC is no different with 
native. So we need modify tsc offset to 0 to make pv timer work. And we may 
also do some optimization in hypervisor if we know that guest is hybrid rather 
than hvm/pv.

> I'm also not sure why PV timer events get
> routed to irq0 -- why not via an event channel as usual, now that you are
> enabling HVM guests to use the evtchn subsystem? 

As stated above, we support a mode that using PV timer without event channel. 
But I am thinking maybe we can let evtchn co-exist with IOAPIC/LAPIC, then pv 
timer use evtchn, others goes to normal hardware way. And another feature can 
replace IOAPIC/LAPIC with evtchn.

> What's a hybrid gnttab,
> and why does it need an explciit reserved e820 region? And so on.

We need some memory to map gnttab. It was provided by a QEmu emulated device 
in HVM, but we think it's not elegant that a basic feature depends on a 
device, so we got this e820 region...

> The general principle of these patches seems to be to create a set of
> individual, and perhaps largely independent, accelerations/enlightenments
> to the HVM interface. I can at least agree with and support that aim.

Thanks. :)

-- 
regards
Yang, Sheng

>
>  -- Keir

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-16 21:22           ` Jeremy Fitzhardinge
@ 2009-09-17  9:16             ` Ian Campbell
  2009-09-17 15:56               ` Nakajima, Jun
  2009-09-17 17:19               ` Jeremy Fitzhardinge
  0 siblings, 2 replies; 18+ messages in thread
From: Ian Campbell @ 2009-09-17  9:16 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: xen-devel, Yang, Sheng, Dong, Eddie, Fraser, Nakajima, Jun

On Wed, 2009-09-16 at 22:22 +0100, Jeremy Fitzhardinge wrote:
> On 09/16/09 14:12, Ian Campbell wrote:
> >>> Based on our data, what we would want in PV 64-bit guests are, fundamentally:
> >>> - have the kernel run in ring 0 (so that it can regain the performance enhancements)
> >>>   
> >>>       
> >> That's no problem.  PV kernels don't currently assume they're running in
> >> any particular ring, so they'd be happy to run in ring 0 if that's how
> >> they're started (if there are problems, I'd consider that a bug).  We
> >> could then check for ring 0 and enable syscall/sysenter.
> >>     
> > XENFEAT_supervisor_mode_kernel is supposed to enable this behaviour,
> > although it hasn't been actively used for several years and never in the
> > pvops kernel so you can bet it has bit-rotted...
> >   
> 
> That tends to have a slightly different meaning, viz "dom0 really *is*
> privileged and can do anything it feels like".  It isn't necessary to
> have a specific feature/mechanism for "kernel happens to be in ring 0";
> it can look at its own cs ring number.

In practise, at least for the 2.6.18-xen tree (which is the only one
where I expect it was ever completely implemented), it is only used to
set the kernel CS and DS and to gate sysenter setup (for which I think
we have a better mechanism today) but you are right that in principle it
could be more far reaching than that.

> >> We could do that with minimal API/ABI changes by:
> >>
> >>     * Providing an identity p2m table
> >>     * Changing the hypercall page to make pte writes simple memory
> >>       writes (no hypercalls); xen would still keep track of pinned pages
> >>       and trap'n'emulate on them for back-compatibility (but fast-path
> >>       with no validation).  We could expose the presence of HAP via
> >>       xen_features so that guests know they can avoid marking pagetables
> >>       RO, etc.
> >>     * Similarly, cr3 changes can be fast-pathed within the hypercall page.
> >>     * Whatever else I've overlooked.
> >>     
> > Some combination of XENFEAT_writable_page_tables
> > XENFEAT_writable_descriptor_tables and XENFEAT_auto_translated_physmap
> > might be of interest for this bit.
> 
> Making use of XENFEAT_auto_translated_physmap would avoid the need for
> identity p2m/m2p tables, but I'm not sure whether it still works.  I got
> close to completely removing all references to it at one point, but I
> think ia64 uses it?

I very much expect that it'll need fixing/(re)implementing on both the
kernel and hypervisor side...

Ian.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-17  9:16             ` Ian Campbell
@ 2009-09-17 15:56               ` Nakajima, Jun
  2009-09-17 17:34                 ` Jeremy Fitzhardinge
  2009-09-17 17:19               ` Jeremy Fitzhardinge
  1 sibling, 1 reply; 18+ messages in thread
From: Nakajima, Jun @ 2009-09-17 15:56 UTC (permalink / raw)
  To: Ian Campbell, Jeremy Fitzhardinge
  Cc: Yang, Sheng, xen-devel, Dong, Eddie, Fraser

Ian Campbell wrote on Thu, 17 Sep 2009 at 02:16:25:

> On Wed, 2009-09-16 at 22:22 +0100, Jeremy Fitzhardinge wrote:
>>>> We could do that with minimal API/ABI changes by:
>>>> 
>>>>     * Providing an identity p2m table
>>>>     * Changing the hypercall page to make pte writes simple memory
>>>>       writes (no hypercalls); xen would still keep track of pinned
>>>>       pages and trap'n'emulate on them for back-compatibility (but
>>>>       fast- path with no validation).  We could expose the presence
>>>>       of HAP via xen_features so that guests know they can avoid
>>>>       marking pagetables RO, etc.
>>>>     * Similarly, cr3 changes can be fast-pathed within the hypercall
>>>>     page. * Whatever else I've overlooked.
>>>> 
>>> Some combination of XENFEAT_writable_page_tables
>>> XENFEAT_writable_descriptor_tables and XENFEAT_auto_translated_physmap
>>> might be of interest for this bit.
>>  Making use of XENFEAT_auto_translated_physmap would avoid the need for
>> identity p2m/m2p tables, but I'm not sure whether it still works.  I
>> got close to completely removing all references to it at one point, but
>> I think ia64 uses it?
> 
> I very much expect that it'll need fixing/(re)implementing on both the
> kernel and hypervisor side...

To me, leveraging the native MMU code, rather than using existing API/ABI, would simplify both the guest and hypervisor side if hardware MMU virtualization is present. For example:
- today a 64-bit PV guest builds/switches page tables depending on the kernel/user mode. It's not required anymore.
- we can automatically get large page support (2MB, 1GB)

I thought pv_xxx_ps (such as pv_time, pv_cpu_ops, pv_mmu_ops, etc.) was designed to choose the right pv_ops accordingly depending on the features available. 

> 
> Ian.

Jun
___
Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-17  9:16             ` Ian Campbell
  2009-09-17 15:56               ` Nakajima, Jun
@ 2009-09-17 17:19               ` Jeremy Fitzhardinge
  1 sibling, 0 replies; 18+ messages in thread
From: Jeremy Fitzhardinge @ 2009-09-17 17:19 UTC (permalink / raw)
  To: Ian Campbell
  Cc: Yang, Sheng, xen-devel, Dong, Eddie, Keir Fraser, Nakajima, Jun

On 09/17/09 02:16, Ian Campbell wrote:
> In practise, at least for the 2.6.18-xen tree (which is the only one
> where I expect it was ever completely implemented), it is only used to
> set the kernel CS and DS and to gate sysenter setup (for which I think
> we have a better mechanism today) but you are right that in principle it
> could be more far reaching than that.
>   

Yeah.  Looks like the only other thing it allows is that the guest can
set arbitrary gdt entries.

>> Making use of XENFEAT_auto_translated_physmap would avoid the need for
>> identity p2m/m2p tables, but I'm not sure whether it still works.  I got
>> close to completely removing all references to it at one point, but I
>> think ia64 uses it?
>>     
> I very much expect that it'll need fixing/(re)implementing on both the
> kernel and hypervisor side...
>   

Yeah, I think the Xen side is missing altogether on x86 now.  I don't
think its worth recovering it unless its the ABI for making HAP
available to guests.  If IA64 is using it, then I think big parts of the
kernel are probably in OK shape (drivers, etc).

    J

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-17 15:56               ` Nakajima, Jun
@ 2009-09-17 17:34                 ` Jeremy Fitzhardinge
  2009-09-19  0:17                   ` Nakajima, Jun
  0 siblings, 1 reply; 18+ messages in thread
From: Jeremy Fitzhardinge @ 2009-09-17 17:34 UTC (permalink / raw)
  To: Nakajima, Jun
  Cc: Ian Campbell, Yang, Sheng, xen-devel, Dong, Eddie, Keir Fraser

On 09/17/09 08:56, Nakajima, Jun wrote:
>> I very much expect that it'll need fixing/(re)implementing on both the
>> kernel and hypervisor side...
>>     
> To me, leveraging the native MMU code, rather than using existing API/ABI, would simplify both the guest and hypervisor side if hardware MMU virtualization is present. For example:
> - today a 64-bit PV guest builds/switches page tables depending on the kernel/user mode. It's not required anymore.
>   
The two pagetables are largely shared, so it really comes down to
maintaining an additional L4 page.  If the domain is running in a HAP
container, then then the "kernel" pagetable would have proper U/S bit
its pagetable entries (ie, Xen wouldn't strip them off, or set global on
user mappings) and then loading a new pagetable would just mean
reloading cr3 with the kernel pagetable.  In other words, we can still
do an efficient pagetable swap without needing to change the guest or
the ABI at all; the user pagetable would be unused and ignored, but that
isn't a huge burden.

> - we can automatically get large page support (2MB, 1GB)
>   
Once the requirement to mark pagetable pages RO goes away, then it would
be easy to add large-page support.

> I thought pv_xxx_ps (such as pv_time, pv_cpu_ops, pv_mmu_ops, etc.) was designed to choose the right pv_ops accordingly depending on the features available. 
>   

Sure.  It would be easy to either use new special-purpose just plain
native versions of those ops if that's the right thing to do; but it
would be nice if a current unmodified PV guest worked within a HVM
container and got at least some benefit from doing so.  Also, pagetable
issues have repercussions beyond just the raw pagetable update functions.

Of course you can get both these features just by booting the kernel as
an hvm guest.  But if we're talking about giving PV kernels some
benefits from hvm/hap hardware features, I think we should looking at it
from the perspective of starting with a PV kernel then adding
incremental changes.

    J

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-17 17:34                 ` Jeremy Fitzhardinge
@ 2009-09-19  0:17                   ` Nakajima, Jun
  2009-09-19  0:28                     ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 18+ messages in thread
From: Nakajima, Jun @ 2009-09-19  0:17 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: xen-devel, Yang, Sheng, Dong, Eddie, Ian Campbell, Fraser

Jeremy Fitzhardinge wrote on Thu, 17 Sep 2009 at 10:34:51:

> On 09/17/09 08:56, Nakajima, Jun wrote:
>> I thought pv_xxx_ps (such as pv_time, pv_cpu_ops, pv_mmu_ops, etc.)
> was designed to choose the right pv_ops accordingly depending on the
> features available.
>> 
>  Sure.  It would be easy to either use new special-purpose just plain
> native versions of those ops if that's the right thing to do; but it
> would be nice if a current unmodified PV guest worked within a HVM
> container and got at least some benefit from doing so.  Also, pagetable
> issues have repercussions beyond just the raw pagetable update functions.
> 
> Of course you can get both these features just by booting the kernel as
> an hvm guest.  But if we're talking about giving PV kernels some
> benefits from hvm/hap hardware features, I think we should looking at it
> from the perspective of starting with a PV kernel then adding
> incremental changes.
> 

Even if we start from PV kernels, I think what we should do is to implement ability (as "incremental changes") for PV guests to stop using PV MMU (and PV CPU) at boot time depending on the H/W features available rather than to keep using the same ABI, because we may not need them in the near future. Then, such PV kernels would be at par or faster/more efficient than pure HVM guests on machines with HAP enabled because of the other PV features. 

Jun
___
Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: Xen: Hybrid extension patchset for hypervisor
  2009-09-19  0:17                   ` Nakajima, Jun
@ 2009-09-19  0:28                     ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 18+ messages in thread
From: Jeremy Fitzhardinge @ 2009-09-19  0:28 UTC (permalink / raw)
  To: Nakajima, Jun
  Cc: Ian Campbell, Yang, Sheng, xen-devel, Dong, Eddie, Keir Fraser

On 09/18/09 17:17, Nakajima, Jun wrote:
> Even if we start from PV kernels, I think what we should do is to implement ability (as "incremental changes") for PV guests to stop using PV MMU (and PV CPU) at boot time depending on the H/W features available rather than to keep using the same ABI, because we may not need them in the near future. Then, such PV kernels would be at par or faster/more efficient than pure HVM guests on machines with HAP enabled because of the other PV features. 
>   

Sure, that's no problem.  So long as the current ABI keeps working, we
can easily switch to something else more appropriate where possible.

    J

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2009-09-19  0:28 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-16  8:44 Xen: Hybrid extension patchset for hypervisor Yang, Sheng
2009-09-16  9:08 ` Keir Fraser
2009-09-16 14:04   ` Keir Fraser
2009-09-16 16:28     ` Nakajima, Jun
2009-09-16 18:19       ` Jeremy Fitzhardinge
2009-09-16 21:12         ` Ian Campbell
2009-09-16 21:22           ` Jeremy Fitzhardinge
2009-09-17  9:16             ` Ian Campbell
2009-09-17 15:56               ` Nakajima, Jun
2009-09-17 17:34                 ` Jeremy Fitzhardinge
2009-09-19  0:17                   ` Nakajima, Jun
2009-09-19  0:28                     ` Jeremy Fitzhardinge
2009-09-17 17:19               ` Jeremy Fitzhardinge
2009-09-16 19:11   ` Frank van der Linden
2009-09-17  6:13     ` Yang, Sheng
2009-09-17  6:25     ` Keir Fraser
2009-09-17  6:30       ` Sheng Yang
2009-09-17  6:47   ` Yang, Sheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.