All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Make the hypercall PHYSDEVOP_alloc_irq_vector hypercall dummy.
@ 2009-09-01  2:03 Zhang, Xiantao
  2009-09-02 21:46 ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 4+ messages in thread
From: Zhang, Xiantao @ 2009-09-01  2:03 UTC (permalink / raw)
  To: xen-devel, Keir Fraser


[-- Attachment #1.1: Type: text/plain, Size: 399 bytes --]

Hi, Keir
     This patch tends to make the hypercall PHYSDEVOP_alloc_irq_vector dummy, and defer vector allocation to programe ioapic entries by dom0. Basically, dom0 shouldn't touch vector namespace which is only used by hypervisor for servicing real device's interrupts. And this patch also makes broken NetBSD dom0 work again.

Signed-off-by: Xiantao Zhang <xiantao.zhang.intel.com>
Xiantao

[-- Attachment #1.2: Type: text/html, Size: 1139 bytes --]

[-- Attachment #2: make_irq_vector_allocation_hc_dummy.patch --]
[-- Type: application/octet-stream, Size: 14269 bytes --]

# HG changeset patch
# User xiantao.zhang@intel.com
# Date 1251742441 14400
# Node ID 3c91a9ef5010ef6188728948d6145346b650414a
# Parent  dd71c509f3ff13326ddbb8fa39f638022aa814ff
Dummy PHYSDEVOP_alloc_irq_vector hypercall.

Make the hypercall PHYSDEVOP_alloc_irq_vector dummy, and
defer vector allocation to programe ioapic entries by dom0.
Basically, dom0 shouldn't touch vector namespace which is
only used by hypervisor for servicing real device's interrupts.
And this patch also make broken NetBSD dom0 work again.

Signed-off-by: Xiantao Zhang <xiantao.zhang.intel.com>

diff -r 3b7cbf32fee9 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c	Mon Aug 31 10:54:32 2009 +0100
+++ b/xen/arch/x86/io_apic.c	Tue Sep 01 05:58:48 2009 -0400
@@ -85,35 +85,7 @@ static struct irq_pin_list {
     unsigned int next;
 } *irq_2_pin;
 
-static int *pin_irq_map;
-
 static unsigned int irq_2_pin_free_entry;
-
-/* Use an arry to record pin_2_irq_mapping */
-static int get_irq_from_apic_pin(int apic, int pin)
-{
-    int i, pin_base = 0;
-
-    ASSERT(apic < nr_ioapics);
-    
-    for (i = 0; i < apic; i++)
-        pin_base += nr_ioapic_registers[i];
-
-    return pin_irq_map[pin_base + pin];
-}
-
-static void set_irq_to_apic_pin(int apic, int pin, int irq)
-{
-    
-    int i, pin_base = 0;
-
-    ASSERT(apic < nr_ioapics);
-    
-    for (i = 0; i < apic; i++)
-        pin_base += nr_ioapic_registers[i];
-
-    pin_irq_map[pin_base + pin] = irq;
-}
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -141,42 +113,6 @@ static void add_pin_to_irq(unsigned int 
     }
     entry->apic = apic;
     entry->pin = pin;
-
-    set_irq_to_apic_pin(apic, pin, irq);
-}
-
-static void remove_pin_at_irq(unsigned int irq, int apic, int pin)
-{
-    struct irq_pin_list *entry, *prev;
-
-    for (entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next]) {
-        if ((entry->apic == apic) && (entry->pin == pin))
-            break;
-        if (!entry->next)
-            BUG();
-    }
-
-    entry->pin = entry->apic = -1;
-    
-    if (entry != &irq_2_pin[irq]) {
-        /* Removed entry is not at head of list. */
-        prev = &irq_2_pin[irq];
-        while (&irq_2_pin[prev->next] != entry)
-            prev = &irq_2_pin[prev->next];
-        prev->next = entry->next;
-        entry->next = irq_2_pin_free_entry;
-        irq_2_pin_free_entry = entry - irq_2_pin;
-    } else if (entry->next != 0) {
-        /* Removed entry is at head of multi-item list. */
-        prev  = entry;
-        entry = &irq_2_pin[entry->next];
-        *prev = *entry;
-        entry->pin = entry->apic = -1;
-        entry->next = irq_2_pin_free_entry;
-        irq_2_pin_free_entry = entry - irq_2_pin;
-    }
-
-    set_irq_to_apic_pin(apic, pin, -1);
 }
 
 /*
@@ -1138,15 +1074,11 @@ static void __init enable_IO_APIC(void)
     /* Initialise dynamic irq_2_pin free list. */
     irq_2_pin = xmalloc_array(struct irq_pin_list, PIN_MAP_SIZE);
     memset(irq_2_pin, 0, PIN_MAP_SIZE * sizeof(*irq_2_pin));
-    pin_irq_map = xmalloc_array(int, nr_irqs_gsi);
-    memset(pin_irq_map, 0, nr_irqs_gsi * sizeof(int));
         
     for (i = 0; i < PIN_MAP_SIZE; i++)
         irq_2_pin[i].pin = -1;
     for (i = irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
         irq_2_pin[i].next = i + 1;
-    for (i = 0; i < nr_irqs_gsi; i++)
-        pin_irq_map[i] = -1;
 
     for(apic = 0; apic < nr_ioapics; apic++) {
         int pin;
@@ -2165,6 +2097,24 @@ static int ioapic_physbase_to_id(unsigne
     return -EINVAL;
 }
 
+unsigned apic_gsi_base(int apic);
+
+int apic_pin_2_gsi_irq(int apic, int pin)
+{
+    int idx, irq;
+
+    if (apic < 0)
+       return -EINVAL;
+
+    irq = apic_gsi_base(apic) + pin;
+    if (apic == 0) {
+        idx = find_irq_entry(apic, pin, mp_INT);
+        if (idx >= 0)
+            irq = pin_2_irq(idx, apic, pin);
+    }
+    return irq;
+}
+
 int ioapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval)
 {
     int apic;
@@ -2182,17 +2132,19 @@ int ioapic_guest_read(unsigned long phys
 
 #define WARN_BOGUS_WRITE(f, a...)                                       \
     dprintk(XENLOG_INFO, "\n%s: "                                        \
-            "apic=%d, pin=%d, old_irq=%d, new_irq=%d\n"                 \
-            "%s: old_entry=%08x, new_entry=%08x\n"                      \
-            "%s: " f, __FUNCTION__, apic, pin, old_irq, new_irq,        \
-            __FUNCTION__, *(u32 *)&old_rte, *(u32 *)&new_rte,           \
+            "apic=%d, pin=%d, irq=%d\n"                 \
+            "%s: new_entry=%08x\n"                      \
+            "%s: " f, __FUNCTION__, apic, pin, irq,        \
+            __FUNCTION__, *(u32 *)&rte,           \
             __FUNCTION__ , ##a )
 
 int ioapic_guest_write(unsigned long physbase, unsigned int reg, u32 val)
 {
-    int apic, pin, old_irq = -1, new_irq = -1;
-    struct IO_APIC_route_entry old_rte = { 0 }, new_rte = { 0 };
+    int apic, pin, irq, ret, vector, pirq;
+    struct IO_APIC_route_entry rte = { 0 };
     unsigned long flags;
+    struct irq_cfg *cfg;
+    struct irq_desc *desc;
 
     if ( (apic = ioapic_physbase_to_id(physbase)) < 0 )
         return apic;
@@ -2204,7 +2156,7 @@ int ioapic_guest_write(unsigned long phy
     pin = (reg - 0x10) >> 1;
 
     /* Write first half from guest; second half is target info. */
-    *(u32 *)&new_rte = val;
+    *(u32 *)&rte = val;
 
     /*
      * What about weird destination types?
@@ -2214,7 +2166,7 @@ int ioapic_guest_write(unsigned long phy
      *  ExtINT: Ignore? Linux only asserts this at start of day.
      * For now, print a message and return an error. We can fix up on demand.
      */
-    if ( new_rte.delivery_mode > dest_LowestPrio )
+    if ( rte.delivery_mode > dest_LowestPrio )
     {
         printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
         printk("       APIC=%d/%d, lo-reg=%x\n", apic, pin, val);
@@ -2225,83 +2177,65 @@ int ioapic_guest_write(unsigned long phy
      * The guest does not know physical APIC arrangement (flat vs. cluster).
      * Apply genapic conventions for this platform.
      */
-    new_rte.delivery_mode = INT_DELIVERY_MODE;
-    new_rte.dest_mode     = INT_DEST_MODE;
+    rte.delivery_mode = INT_DELIVERY_MODE;
+    rte.dest_mode     = INT_DEST_MODE;
+
+    irq = apic_pin_2_gsi_irq(apic, pin);
+    if ( irq < 0 )
+        return irq;
+
+    desc = irq_to_desc(irq);
+    cfg = desc->chip_data;
+
+    /* Since PHYSDEVOP_alloc_irq_vector is dummy, rte.vector is the pirq
+       which corresponds to this ioapic pin, retrieve it for building
+       pirq and irq mapping. 
+     */
+    pirq = rte.vector;
+    if(pirq < 0 || pirq >= dom0->nr_pirqs)
+        return -EINVAL;
+    
+    if ( desc->action )
+    {
+        WARN_BOGUS_WRITE("Attempt to modify IO-APIC pin for in-use IRQ!\n");
+        return 0;
+    }
+
+    if ( cfg->vector <= 0 || cfg->vector > LAST_DYNAMIC_VECTOR ) {
+
+        printk("allocated vector for irq:%d\n", irq);
+        
+        vector = assign_irq_vector(irq);
+        if ( vector < 0 )
+            return vector;
+
+        add_pin_to_irq(irq, apic, pin);
+    }
+    spin_lock(&pcidevs_lock);
+    spin_lock(&dom0->event_lock);
+    ret = map_domain_pirq(dom0, pirq, irq,
+            MAP_PIRQ_TYPE_GSI, NULL);
+    spin_unlock(&dom0->event_lock);
+    spin_unlock(&pcidevs_lock);
+    if ( ret < 0 )
+        return ret;
 
     spin_lock_irqsave(&ioapic_lock, flags);
-
-    /* Read first (interesting) half of current routing entry. */
-    *(u32 *)&old_rte = io_apic_read(apic, 0x10 + 2 * pin);
-
-    /* No change to the first half of the routing entry? Bail quietly. */
-    if ( *(u32 *)&old_rte == *(u32 *)&new_rte )
-    {
-        spin_unlock_irqrestore(&ioapic_lock, flags);
-        return 0;
-    }
-
-    /* Special delivery modes (SMI,NMI,INIT,ExtInt) should have no vector.  */
-    if ( (old_rte.delivery_mode > dest_LowestPrio) && (old_rte.vector != 0) )
-    {
-        WARN_BOGUS_WRITE("Special delivery mode %d with non-zero vector "
-                         "%02x\n", old_rte.delivery_mode, old_rte.vector);
-        /* Nobble the vector here as it does not relate to a valid irq. */
-        old_rte.vector = 0;
-    }
-
-    if ( old_rte.vector >= FIRST_DYNAMIC_VECTOR )
-        old_irq = get_irq_from_apic_pin(apic, pin);
-
-    /* FIXME: dirty hack to support per-cpu vector. */
-    new_irq = new_rte.vector;
-
-    if ( (old_irq != new_irq) && (old_irq >= 0) && IO_APIC_IRQ(old_irq) )
-    {
-        if ( irq_desc[old_irq].action )
-        {
-            WARN_BOGUS_WRITE("Attempt to remove IO-APIC pin of in-use IRQ!\n");
-            spin_unlock_irqrestore(&ioapic_lock, flags);
-            return 0;
-        }
-
-        remove_pin_at_irq(old_irq, apic, pin);
-    }
-
-    if ( (new_irq >= 0) && IO_APIC_IRQ(new_irq) )
-    {
-        if ( irq_desc[new_irq].action )
-        {
-            WARN_BOGUS_WRITE("Attempt to %s IO-APIC pin for in-use IRQ!\n",
-                             (old_irq != new_irq) ? "add" : "modify");
-            spin_unlock_irqrestore(&ioapic_lock, flags);
-            return 0;
-        }
-        
-        /* Set the correct irq-handling type. */
-        irq_desc[new_irq].handler = new_rte.trigger ? 
-            &ioapic_level_type: &ioapic_edge_type;
-        
-        if ( old_irq != new_irq )
-            add_pin_to_irq(new_irq, apic, pin);
-
-        /* Mask iff level triggered. */
-        new_rte.mask = new_rte.trigger;
-        /* Set the vector field to the real vector! */
-        new_rte.vector = irq_cfg[new_irq].vector;
-    }
-    else if ( !new_rte.mask )
-    {
-        /* This pin leads nowhere but the guest has not masked it. */
-        WARN_BOGUS_WRITE("Installing bogus unmasked IO-APIC entry!\n");
-        new_rte.mask = 1;
-    }
-
-    new_rte.dest.logical.logical_dest = 
-    cpu_mask_to_apicid(irq_cfg[new_irq].domain);
-
-    io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&new_rte) + 0));
-    io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&new_rte) + 1));
-
+    /* Set the correct irq-handling type. */
+    desc->handler = rte.trigger ? 
+        &ioapic_level_type: &ioapic_edge_type;
+
+    /* Mask iff level triggered. */
+    rte.mask = rte.trigger;
+    /* Set the vector field to the real vector! */
+    rte.vector = cfg->vector;
+
+    rte.dest.logical.logical_dest = 
+    cpu_mask_to_apicid(cfg->domain);
+
+    io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&rte) + 0));
+    io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&rte) + 1));
+    
     spin_unlock_irqrestore(&ioapic_lock, flags);
 
     return 0;
diff -r 3b7cbf32fee9 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c	Mon Aug 31 10:54:32 2009 +0100
+++ b/xen/arch/x86/irq.c	Tue Sep 01 05:58:48 2009 -0400
@@ -1293,9 +1293,9 @@ int map_domain_pirq(
     if ( (old_irq && (old_irq != irq) ) ||
          (old_pirq && (old_pirq != pirq)) )
     {
-        dprintk(XENLOG_G_ERR, "dom%d: pirq %d or irq %d already mapped\n",
+        dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or irq %d already mapped\n",
                 d->domain_id, pirq, irq);
-        return -EINVAL;
+        return 0;
     }
 
     ret = irq_permit_access(d, pirq);
diff -r 3b7cbf32fee9 xen/arch/x86/mpparse.c
--- a/xen/arch/x86/mpparse.c	Mon Aug 31 10:54:32 2009 +0100
+++ b/xen/arch/x86/mpparse.c	Tue Sep 01 05:58:48 2009 -0400
@@ -958,6 +958,10 @@ unsigned highest_gsi(void)
     return res;
 }
 
+unsigned apic_gsi_base(int apic)
+{
+    return mp_ioapic_routing[apic].gsi_base;
+}
 
 void __init mp_override_legacy_irq (
 	u8			bus_irq,
diff -r 3b7cbf32fee9 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c	Mon Aug 31 10:54:32 2009 +0100
+++ b/xen/arch/x86/physdev.c	Tue Sep 01 06:00:15 2009 -0400
@@ -329,7 +329,6 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
     case PHYSDEVOP_alloc_irq_vector: {
         struct physdev_irq irq_op;
-        int vector;
 
         ret = -EFAULT;
         if ( copy_from_guest(&irq_op, arg, 1) != 0 )
@@ -343,26 +342,13 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( ret )
             break;
 
-        irq = irq_op.irq;
-        ret = -EINVAL;
+        /* Vector is only used by hypervisor, and dom0 shouldn't
+         touch it in its world, return irq_op.irq as the vecotr,
+         and make this hypercall dummy, and also defer the vector 
+         allocation when dom0 tries to programe ioapic entry. */
+        irq_op.vector = irq_op.irq;
+        ret = 0;
         
-        /* FIXME: Once dom0 breaks GSI IRQ limit, it is
-            a must to eliminate the limit here */
-        BUG_ON(irq >= 256);
-        
-        vector = assign_irq_vector(irq);
-        if (vector >= FIRST_DYNAMIC_VECTOR)
-            irq_op.vector = irq;
-        else
-            irq_op.vector = -ENOSPC;
-
-        spin_lock(&pcidevs_lock);
-        spin_lock(&dom0->event_lock);
-        ret = map_domain_pirq(dom0, irq_op.irq, irq,
-                              MAP_PIRQ_TYPE_GSI, NULL);
-        spin_unlock(&dom0->event_lock);
-        spin_unlock(&pcidevs_lock);
-
         if ( copy_to_guest(arg, &irq_op, 1) != 0 )
             ret = -EFAULT;
         break;
diff -r 3b7cbf32fee9 xen/common/domain.c
--- a/xen/common/domain.c	Mon Aug 31 10:54:32 2009 +0100
+++ b/xen/common/domain.c	Tue Sep 01 05:58:48 2009 -0400
@@ -197,7 +197,7 @@ struct vcpu *alloc_idle_vcpu(unsigned in
     return alloc_vcpu(d, vcpu_id, cpu_id);
 }
 
-static unsigned int extra_dom0_irqs, extra_domU_irqs = 8;
+static unsigned int extra_dom0_irqs = 256, extra_domU_irqs = 32;
 static void __init parse_extra_guest_irqs(const char *s)
 {
     if ( isdigit(*s) )
@@ -253,9 +253,11 @@ struct domain *domain_create(
         d->is_paused_by_controller = 1;
         atomic_inc(&d->pause_count);
 
-        d->nr_pirqs = (nr_irqs_gsi +
-                       (domid ? extra_domU_irqs :
-                        extra_dom0_irqs ?: nr_irqs_gsi));
+        if ( domid )
+            d->nr_pirqs = nr_irqs_gsi + extra_domU_irqs;
+        else
+            d->nr_pirqs = nr_irqs_gsi + extra_dom0_irqs;
+
         d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs);
         d->pirq_mask = xmalloc_array(
             unsigned long, BITS_TO_LONGS(d->nr_pirqs));

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Make the hypercall PHYSDEVOP_alloc_irq_vector hypercall dummy.
  2009-09-01  2:03 [PATCH] Make the hypercall PHYSDEVOP_alloc_irq_vector hypercall dummy Zhang, Xiantao
@ 2009-09-02 21:46 ` Jeremy Fitzhardinge
  2009-09-03 13:14   ` Zhang, Xiantao
  0 siblings, 1 reply; 4+ messages in thread
From: Jeremy Fitzhardinge @ 2009-09-02 21:46 UTC (permalink / raw)
  To: Zhang, Xiantao; +Cc: xen-devel, Keir Fraser

On 08/31/09 19:03, Zhang, Xiantao wrote:
> Hi, Keir
>      This patch tends to make the hypercall PHYSDEVOP_alloc_irq_vector
> dummy, and defer vector allocation to programe ioapic entries by dom0.
> Basically, dom0 shouldn't touch vector namespace which is only used by
> hypervisor for servicing real device's interrupts. And this patch also
> makes broken NetBSD dom0 work again.

This is an interesting approach, and it seems like a less intrusive way
to achieve what my functionally similar patch does.  All it needs is a
corresponding change to replace the paravirtualized IO APIC writes with
a hypercall to directly install the vector for a given GSI (ie, to make
use of Xen's GSI -> chip+pin tables rather than relying on dom0 to do
the discovery for itself).

> # HG changeset patch # User xiantao.zhang@intel.com # Date 1251742441
> 14400 # Node ID 3c91a9ef5010ef6188728948d6145346b650414a # Parent
> dd71c509f3ff13326ddbb8fa39f638022aa814ff Dummy
> PHYSDEVOP_alloc_irq_vector hypercall. Make the hypercall
> PHYSDEVOP_alloc_irq_vector dummy, and defer vector allocation to
> programe ioapic entries by dom0. Basically, dom0 shouldn't touch
> vector namespace which is only used by hypervisor for servicing real
> device's interrupts. And this patch also make broken NetBSD dom0 work
> again. Signed-off-by: Xiantao Zhang <xiantao.zhang.intel.com> diff -r
> 3b7cbf32fee9 xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c Mon
> Aug 31 10:54:32 2009 +0100 +++ b/xen/arch/x86/io_apic.c Tue Sep 01
> 05:58:48 2009 -0400 @@ -85,35 +85,7 @@ static struct irq_pin_list {
> unsigned int next; } *irq_2_pin; -static int *pin_irq_map; - static
> unsigned int irq_2_pin_free_entry; - -/* Use an arry to record
> pin_2_irq_mapping */ -static int get_irq_from_apic_pin(int apic, int
> pin) -{ - int i, pin_base = 0; - - ASSERT(apic < nr_ioapics); - - for
> (i = 0; i < apic; i++) - pin_base += nr_ioapic_registers[i]; - -
> return pin_irq_map[pin_base + pin]; -} - -static void
> set_irq_to_apic_pin(int apic, int pin, int irq) -{ - - int i, pin_base
> = 0; - - ASSERT(apic < nr_ioapics); - - for (i = 0; i < apic; i++) -
> pin_base += nr_ioapic_registers[i]; - - pin_irq_map[pin_base + pin] =
> irq; -} /* * The common case is 1:1 IRQ<->pin mappings. Sometimes
> there are @@ -141,42 +113,6 @@ static void add_pin_to_irq(unsigned int
> } entry->apic = apic; entry->pin = pin; - - set_irq_to_apic_pin(apic,
> pin, irq); -} - -static void remove_pin_at_irq(unsigned int irq, int
> apic, int pin) -{ - struct irq_pin_list *entry, *prev; - - for (entry
> = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next]) { - if
> ((entry->apic == apic) && (entry->pin == pin)) - break; - if
> (!entry->next) - BUG(); - } - - entry->pin = entry->apic = -1; - - if
> (entry != &irq_2_pin[irq]) { - /* Removed entry is not at head of
> list. */ - prev = &irq_2_pin[irq]; - while (&irq_2_pin[prev->next] !=
> entry) - prev = &irq_2_pin[prev->next]; - prev->next = entry->next; -
> entry->next = irq_2_pin_free_entry; - irq_2_pin_free_entry = entry -
> irq_2_pin; - } else if (entry->next != 0) { - /* Removed entry is at
> head of multi-item list. */ - prev = entry; - entry =
> &irq_2_pin[entry->next]; - *prev = *entry; - entry->pin = entry->apic
> = -1; - entry->next = irq_2_pin_free_entry; - irq_2_pin_free_entry =
> entry - irq_2_pin; - } - - set_irq_to_apic_pin(apic, pin, -1); } /* @@
> -1138,15 +1074,11 @@ static void __init enable_IO_APIC(void) /*
> Initialise dynamic irq_2_pin free list. */ irq_2_pin =
> xmalloc_array(struct irq_pin_list, PIN_MAP_SIZE); memset(irq_2_pin, 0,
> PIN_MAP_SIZE * sizeof(*irq_2_pin)); - pin_irq_map = xmalloc_array(int,
> nr_irqs_gsi); - memset(pin_irq_map, 0, nr_irqs_gsi * sizeof(int)); for
> (i = 0; i < PIN_MAP_SIZE; i++) irq_2_pin[i].pin = -1; for (i =
> irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
> irq_2_pin[i].next = i + 1; - for (i = 0; i < nr_irqs_gsi; i++) -
> pin_irq_map[i] = -1; for(apic = 0; apic < nr_ioapics; apic++) { int
> pin; @@ -2165,6 +2097,24 @@ static int ioapic_physbase_to_id(unsigne
> return -EINVAL; } +unsigned apic_gsi_base(int apic); + +int
> apic_pin_2_gsi_irq(int apic, int pin) +{ + int idx, irq; + + if (apic
> < 0) + return -EINVAL; + + irq = apic_gsi_base(apic) + pin; + if (apic
> == 0) { + idx = find_irq_entry(apic, pin, mp_INT); + if (idx >= 0) +
> irq = pin_2_irq(idx, apic, pin); + } + return irq; +} + int
> ioapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval)
> { int apic; @@ -2182,17 +2132,19 @@ int ioapic_guest_read(unsigned
> long phys #define WARN_BOGUS_WRITE(f, a...) \ dprintk(XENLOG_INFO,
> "\n%s: " \ - "apic=%d, pin=%d, old_irq=%d, new_irq=%d\n" \ - "%s:
> old_entry=%08x, new_entry=%08x\n" \ - "%s: " f, __FUNCTION__, apic,
> pin, old_irq, new_irq, \ - __FUNCTION__, *(u32 *)&old_rte, *(u32
> *)&new_rte, \ + "apic=%d, pin=%d, irq=%d\n" \ + "%s: new_entry=%08x\n"
> \ + "%s: " f, __FUNCTION__, apic, pin, irq, \ + __FUNCTION__, *(u32
> *)&rte, \ __FUNCTION__ , ##a ) int ioapic_guest_write(unsigned long
> physbase, unsigned int reg, u32 val) { - int apic, pin, old_irq = -1,
> new_irq = -1; - struct IO_APIC_route_entry old_rte = { 0 }, new_rte =
> { 0 }; + int apic, pin, irq, ret, vector, pirq; + struct
> IO_APIC_route_entry rte = { 0 }; 

Is this just a gratuitious renaming, or does it serve some other purpose?

> unsigned long flags; + struct irq_cfg *cfg; + struct irq_desc *desc;
> if ( (apic = ioapic_physbase_to_id(physbase)) < 0 ) return apic; @@
> -2204,7 +2156,7 @@ int ioapic_guest_write(unsigned long phy pin = (reg
> - 0x10) >> 1; /* Write first half from guest; second half is target
> info. */ - *(u32 *)&new_rte = val; + *(u32 *)&rte = val; /* * What
> about weird destination types? @@ -2214,7 +2166,7 @@ int
> ioapic_guest_write(unsigned long phy * ExtINT: Ignore? Linux only
> asserts this at start of day. * For now, print a message and return an
> error. We can fix up on demand. */ - if ( new_rte.delivery_mode >
> dest_LowestPrio ) + if ( rte.delivery_mode > dest_LowestPrio ) {
> printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
> printk(" APIC=%d/%d, lo-reg=%x\n", apic, pin, val); @@ -2225,83
> +2177,65 @@ int ioapic_guest_write(unsigned long phy * The guest does
> not know physical APIC arrangement (flat vs. cluster). * Apply genapic
> conventions for this platform. */ - new_rte.delivery_mode =
> INT_DELIVERY_MODE; - new_rte.dest_mode = INT_DEST_MODE; +
> rte.delivery_mode = INT_DELIVERY_MODE; + rte.dest_mode =
> INT_DEST_MODE; + + irq = apic_pin_2_gsi_irq(apic, pin); + if ( irq < 0
> ) + return irq; + + desc = irq_to_desc(irq); + cfg = desc->chip_data;
> + + /* Since PHYSDEVOP_alloc_irq_vector is dummy, rte.vector is the
> pirq + which corresponds to this ioapic pin, retrieve it for building
> + pirq and irq mapping. + */ + pirq = rte.vector; + if(pirq < 0 ||
> pirq >= dom0->nr_pirqs) + return -EINVAL; + + if ( desc->action ) + {
> + WARN_BOGUS_WRITE("Attempt to modify IO-APIC pin for in-use IRQ!\n");
> + return 0; + } + + if ( cfg->vector <= 0 || cfg->vector >
> LAST_DYNAMIC_VECTOR ) { + + printk("allocated vector for irq:%d\n",
> irq); + + vector = assign_irq_vector(irq); + if ( vector < 0 ) +
> return vector; + + add_pin_to_irq(irq, apic, pin); 


If this is replacing or disabling a previously set "vector" will it
properly clean up the old state?  (I don't fully understand how all the
datastructures relate pirq/vector/apic/pin to each other.)

> + } + spin_lock(&pcidevs_lock); + spin_lock(&dom0->event_lock); + ret
> = map_domain_pirq(dom0, pirq, irq, + MAP_PIRQ_TYPE_GSI, NULL); +
> spin_unlock(&dom0->event_lock); + spin_unlock(&pcidevs_lock); + if (
> ret < 0 ) + return ret; spin_lock_irqsave(&ioapic_lock, flags); - - /*
> Read first (interesting) half of current routing entry. */ - *(u32
> *)&old_rte = io_apic_read(apic, 0x10 + 2 * pin); - - /* No change to
> the first half of the routing entry? Bail quietly. */ - if ( *(u32
> *)&old_rte == *(u32 *)&new_rte ) - { -
> spin_unlock_irqrestore(&ioapic_lock, flags); - return 0; - } - - /*
> Special delivery modes (SMI,NMI,INIT,ExtInt) should have no vector. */
> - if ( (old_rte.delivery_mode > dest_LowestPrio) && (old_rte.vector !=
> 0) ) - { - WARN_BOGUS_WRITE("Special delivery mode %d with non-zero
> vector " - "%02x\n", old_rte.delivery_mode, old_rte.vector); - /*
> Nobble the vector here as it does not relate to a valid irq. */ -
> old_rte.vector = 0; - } - - if ( old_rte.vector >=
> FIRST_DYNAMIC_VECTOR ) - old_irq = get_irq_from_apic_pin(apic, pin); -
> - /* FIXME: dirty hack to support per-cpu vector. */ - new_irq =
> new_rte.vector; - - if ( (old_irq != new_irq) && (old_irq >= 0) &&
> IO_APIC_IRQ(old_irq) ) - { - if ( irq_desc[old_irq].action ) - { -
> WARN_BOGUS_WRITE("Attempt to remove IO-APIC pin of in-use IRQ!\n"); -
> spin_unlock_irqrestore(&ioapic_lock, flags); - return 0; - } - -
> remove_pin_at_irq(old_irq, apic, pin); - } - - if ( (new_irq >= 0) &&
> IO_APIC_IRQ(new_irq) ) - { - if ( irq_desc[new_irq].action ) - { -
> WARN_BOGUS_WRITE("Attempt to %s IO-APIC pin for in-use IRQ!\n", -
> (old_irq != new_irq) ? "add" : "modify"); -
> spin_unlock_irqrestore(&ioapic_lock, flags); - return 0; - } - - /*
> Set the correct irq-handling type. */ - irq_desc[new_irq].handler =
> new_rte.trigger ? - &ioapic_level_type: &ioapic_edge_type; - - if (
> old_irq != new_irq ) - add_pin_to_irq(new_irq, apic, pin); - - /* Mask
> iff level triggered. */ - new_rte.mask = new_rte.trigger; - /* Set the
> vector field to the real vector! */ - new_rte.vector =
> irq_cfg[new_irq].vector; - } - else if ( !new_rte.mask ) - { - /* This
> pin leads nowhere but the guest has not masked it. */ -
> WARN_BOGUS_WRITE("Installing bogus unmasked IO-APIC entry!\n"); -
> new_rte.mask = 1; - } - - new_rte.dest.logical.logical_dest = -
> cpu_mask_to_apicid(irq_cfg[new_irq].domain); - - io_apic_write(apic,
> 0x10 + 2 * pin, *(((int *)&new_rte) + 0)); - io_apic_write(apic, 0x11
> + 2 * pin, *(((int *)&new_rte) + 1)); - + /* Set the correct
> irq-handling type. */ + desc->handler = rte.trigger ? +
> &ioapic_level_type: &ioapic_edge_type; + + /* Mask iff level
> triggered. */ + rte.mask = rte.trigger; + /* Set the vector field to
> the real vector! */ + rte.vector = cfg->vector; + +
> rte.dest.logical.logical_dest = + cpu_mask_to_apicid(cfg->domain); + +
> io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&rte) + 0)); +
> io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&rte) + 1)); +
> spin_unlock_irqrestore(&ioapic_lock, flags); return 0; diff -r
> 3b7cbf32fee9 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Mon Aug 31
> 10:54:32 2009 +0100 +++ b/xen/arch/x86/irq.c Tue Sep 01 05:58:48 2009
> -0400 @@ -1293,9 +1293,9 @@ int map_domain_pirq( if ( (old_irq &&
> (old_irq != irq) ) || (old_pirq && (old_pirq != pirq)) ) { -
> dprintk(XENLOG_G_ERR, "dom%d: pirq %d or irq %d already mapped\n", +
> dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or irq %d already mapped\n",
> d->domain_id, pirq, irq); - return -EINVAL; + return 0; } ret =
> irq_permit_access(d, pirq); diff -r 3b7cbf32fee9
> xen/arch/x86/mpparse.c --- a/xen/arch/x86/mpparse.c Mon Aug 31
> 10:54:32 2009 +0100 +++ b/xen/arch/x86/mpparse.c Tue Sep 01 05:58:48
> 2009 -0400 @@ -958,6 +958,10 @@ unsigned highest_gsi(void) return res;
> } +unsigned apic_gsi_base(int apic) +{ + return
> mp_ioapic_routing[apic].gsi_base; +} void __init
> mp_override_legacy_irq ( u8 bus_irq, diff -r 3b7cbf32fee9
> xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Mon Aug 31
> 10:54:32 2009 +0100 +++ b/xen/arch/x86/physdev.c Tue Sep 01 06:00:15
> 2009 -0400 @@ -329,7 +329,6 @@ ret_t do_physdev_op(int cmd,
> XEN_GUEST_H case PHYSDEVOP_alloc_irq_vector: { struct physdev_irq
> irq_op; - int vector; ret = -EFAULT; if ( copy_from_guest(&irq_op,
> arg, 1) != 0 ) @@ -343,26 +342,13 @@ ret_t do_physdev_op(int cmd,
> XEN_GUEST_H if ( ret ) break; - irq = irq_op.irq; - ret = -EINVAL; +
> /* Vector is only used by hypervisor, and dom0 shouldn't + touch it in
> its world, return irq_op.irq as the vecotr, + and make this hypercall
> dummy, and also defer the vector + allocation when dom0 tries to
> programe ioapic entry. */ + irq_op.vector = irq_op.irq; + ret = 0; -
> /* FIXME: Once dom0 breaks GSI IRQ limit, it is - a must to eliminate
> the limit here */ - BUG_ON(irq >= 256); - - vector =
> assign_irq_vector(irq); - if (vector >= FIRST_DYNAMIC_VECTOR) -
> irq_op.vector = irq; - else - irq_op.vector = -ENOSPC; - -
> spin_lock(&pcidevs_lock); - spin_lock(&dom0->event_lock); - ret =
> map_domain_pirq(dom0, irq_op.irq, irq, - MAP_PIRQ_TYPE_GSI, NULL); -
> spin_unlock(&dom0->event_lock); - spin_unlock(&pcidevs_lock); - if (
> copy_to_guest(arg, &irq_op, 1) != 0 ) ret = -EFAULT; break; diff -r
> 3b7cbf32fee9 xen/common/domain.c --- a/xen/common/domain.c Mon Aug 31
> 10:54:32 2009 +0100 +++ b/xen/common/domain.c Tue Sep 01 05:58:48 2009
> -0400 @@ -197,7 +197,7 @@ struct vcpu *alloc_idle_vcpu(unsigned in
> return alloc_vcpu(d, vcpu_id, cpu_id); } -static unsigned int
> extra_dom0_irqs, extra_domU_irqs = 8; +static unsigned int
> extra_dom0_irqs = 256, extra_domU_irqs = 32; static void __init
> parse_extra_guest_irqs(const char *s) { if ( isdigit(*s) ) @@ -253,9
> +253,11 @@ struct domain *domain_create( d->is_paused_by_controller =
> 1; atomic_inc(&d->pause_count); - d->nr_pirqs = (nr_irqs_gsi + -
> (domid ? extra_domU_irqs : - extra_dom0_irqs ?: nr_irqs_gsi)); + if (
> domid ) + d->nr_pirqs = nr_irqs_gsi + extra_domU_irqs; + else +
> d->nr_pirqs = nr_irqs_gsi + extra_dom0_irqs; 


Surely there's a better test for this?  We don't want to be hard-coding
"privileged domain == dom0" into Xen.  What version is this patch
against anyway?  I don't see this "extra_dom0_irqs" stuff in current
Xen-unstable.

> + d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs); d->pirq_mask =
> xmalloc_array( unsigned long, BITS_TO_LONGS(d->nr_pirqs)); 

Thanks,
	J

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] Make the hypercall PHYSDEVOP_alloc_irq_vector hypercall dummy.
  2009-09-02 21:46 ` Jeremy Fitzhardinge
@ 2009-09-03 13:14   ` Zhang, Xiantao
  2009-09-03 20:59     ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 4+ messages in thread
From: Zhang, Xiantao @ 2009-09-03 13:14 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Keir, xen-devel, Fraser

Jeremy Fitzhardinge wrote:
> On 08/31/09 19:03, Zhang, Xiantao wrote:

>> pin=%d, irq=%d\n" \ + "%s: new_entry=%08x\n" \ + "%s: " f,
>> __FUNCTION__, apic, pin, irq, \ + __FUNCTION__, *(u32 *)&rte, \
>> __FUNCTION__ , ##a ) int ioapic_guest_write(unsigned long physbase,
>> unsigned int reg, u32 val) { - int apic, pin, old_irq = -1, new_irq
>> = -1; - struct IO_APIC_route_entry old_rte = { 0 }, new_rte = { 0 };
>> + int apic, pin, irq, ret, vector, pirq; + struct
>> IO_APIC_route_entry rte = { 0 };   
> 
> Is this just a gratuitious renaming, or does it serve some other
> purpose? 

Just renaming, since I don't think it needs to care about old_entry, so remove it and rename new_rte to rte. 

>> unsigned long flags; + struct irq_cfg *cfg; + struct irq_desc *desc;
>> if ( (apic = ioapic_physbase_to_id(physbase)) < 0 ) return apic; @@
>> -2204,7 +2156,7 @@ int ioapic_guest_write(unsigned long phy pin =
>> (reg 
>> - 0x10) >> 1; /* Write first half from guest; second half is target
>> info. */ - *(u32 *)&new_rte = val; + *(u32 *)&rte = val; /* * What
>> about weird destination types? @@ -2214,7 +2166,7 @@ int
>> ioapic_guest_write(unsigned long phy * ExtINT: Ignore? Linux only
>> asserts this at start of day. * For now, print a message and return
>> an error. We can fix up on demand. */ - if ( new_rte.delivery_mode >
>> dest_LowestPrio ) + if ( rte.delivery_mode > dest_LowestPrio ) {
>> printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
>> printk(" APIC=%d/%d, lo-reg=%x\n", apic, pin, val); @@ -2225,83
>> +2177,65 @@ int ioapic_guest_write(unsigned long phy * The guest does
>> not know physical APIC arrangement (flat vs. cluster). * Apply
>> genapic conventions for this platform. */ - new_rte.delivery_mode =
>> INT_DELIVERY_MODE; - new_rte.dest_mode = INT_DEST_MODE; +
>> rte.delivery_mode = INT_DELIVERY_MODE; + rte.dest_mode =
>> INT_DEST_MODE; + + irq = apic_pin_2_gsi_irq(apic, pin); + if ( irq <
>> 0 ) + return irq; + + desc = irq_to_desc(irq); + cfg =
>> desc->chip_data; + + /* Since PHYSDEVOP_alloc_irq_vector is dummy,
>> rte.vector is the pirq + which corresponds to this ioapic pin,
>> retrieve it for building + pirq and irq mapping. + */ + pirq =
>> rte.vector; + if(pirq < 0 || pirq >= dom0->nr_pirqs) + return
>> -EINVAL; + + if ( desc->action ) + { + WARN_BOGUS_WRITE("Attempt to
>> modify IO-APIC pin for in-use IRQ!\n"); + return 0; + } + + if (
>> cfg->vector <= 0 || cfg->vector > LAST_DYNAMIC_VECTOR ) { + +
>> printk("allocated vector for irq:%d\n", irq); + + vector =
>> assign_irq_vector(irq); + if ( vector < 0 ) + return vector; + +
>> add_pin_to_irq(irq, apic, pin); 
> 
> 
> If this is replacing or disabling a previously set "vector" will it
> properly clean up the old state?  (I don't fully understand how all
> the datastructures relate pirq/vector/apic/pin to each other.)

IMO, dom0 should only cares about pirq(==GSI) and doesn't need to know the ifno about vector and specific apic&pin at all. Since Xen also knows GSI info, so dom0 and Xen can build irq mapping(xen's irq and dom0's pirq) through it.  Maybe we can get rid of ioapic in dom0 eventually. 

>> extra_dom0_irqs, extra_domU_irqs = 8; +static unsigned int
>> extra_dom0_irqs = 256, extra_domU_irqs = 32; static void __init
>> parse_extra_guest_irqs(const char *s) { if ( isdigit(*s) ) @@ -253,9
>> +253,11 @@ struct domain *domain_create( d->is_paused_by_controller =
>> 1; atomic_inc(&d->pause_count); - d->nr_pirqs = (nr_irqs_gsi + -
>> (domid ? extra_domU_irqs : - extra_dom0_irqs ?: nr_irqs_gsi)); + if (
>> domid ) + d->nr_pirqs = nr_irqs_gsi + extra_domU_irqs; + else +
>> d->nr_pirqs = nr_irqs_gsi + extra_dom0_irqs;
> 
> 
> Surely there's a better test for this?  We don't want to be
> hard-coding "privileged domain == dom0" into Xen.  What version is
> this patch against anyway?  I don't see this "extra_dom0_irqs" stuff
> in current Xen-unstable.

I'm using xen-unsable.hg's tip. 

>> + d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs); d->pirq_mask =
>> xmalloc_array( unsigned long, BITS_TO_LONGS(d->nr_pirqs));
> 
> Thanks,
> 	J

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Make the hypercall PHYSDEVOP_alloc_irq_vector hypercall dummy.
  2009-09-03 13:14   ` Zhang, Xiantao
@ 2009-09-03 20:59     ` Jeremy Fitzhardinge
  0 siblings, 0 replies; 4+ messages in thread
From: Jeremy Fitzhardinge @ 2009-09-03 20:59 UTC (permalink / raw)
  To: Zhang, Xiantao; +Cc: xen-devel, Keir Fraser

On 09/03/09 06:14, Zhang, Xiantao wrote:
> IMO, dom0 should only cares about pirq(==GSI) and doesn't need to know the ifno about vector and specific apic&pin at all. Since Xen also knows GSI info, so dom0 and Xen can build irq mapping(xen's irq and dom0's pirq) through it.

Yes, but if dom0 rebinds the pirq<->gsi association, will Xen maintain
its internal structures correctly?

>   Maybe we can get rid of ioapic in dom0 eventually. 
>   

Yes, I think so (see other mail).

    J

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-09-03 20:59 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-01  2:03 [PATCH] Make the hypercall PHYSDEVOP_alloc_irq_vector hypercall dummy Zhang, Xiantao
2009-09-02 21:46 ` Jeremy Fitzhardinge
2009-09-03 13:14   ` Zhang, Xiantao
2009-09-03 20:59     ` Jeremy Fitzhardinge

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.