All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Xen/PCI: support multi-segment systems
@ 2011-09-22  8:17 Jan Beulich
  2011-09-22  9:06 ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 2+ messages in thread
From: Jan Beulich @ 2011-09-22  8:17 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: xen-devel

Now that the hypercall interface changes are in -unstable, make the
kernel side code not ignore the segment (aka domain) number anymore
(which results in pretty odd behavior on such systems). Rather, if
only the old interfaces are available, don't call them for devices on
non-zero segments at all.

The one thing I wasn't able to spot was a use of PHYSDEVOP_restore_msi
(which would also need to be changed), so if there is some other patch
in some tree that would be introducing this it ought to get adjusted
to try using PHYSDEVOP_restore_msi_ext first.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

---
 arch/x86/pci/xen.c              |   22 ++++++++-
 drivers/xen/pci.c               |   94 +++++++++++++++++++++++++++++++++++-----
 include/xen/interface/physdev.h |   34 ++++++++++++++
 3 files changed, 136 insertions(+), 14 deletions(-)

--- 3.1-rc7/arch/x86/pci/xen.c
+++ 3.1-rc7-xen-pci-multi-seg/arch/x86/pci/xen.c
@@ -248,6 +248,8 @@ error:
 }
 
 #ifdef CONFIG_XEN_DOM0
+static bool __read_mostly pci_seg_supported = true;
+
 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	int ret = 0;
@@ -265,10 +267,11 @@ static int xen_initdom_setup_msi_irqs(st
 
 		memset(&map_irq, 0, sizeof(map_irq));
 		map_irq.domid = domid;
-		map_irq.type = MAP_PIRQ_TYPE_MSI;
+		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
 		map_irq.index = -1;
 		map_irq.pirq = -1;
-		map_irq.bus = dev->bus->number;
+		map_irq.bus = dev->bus->number |
+			      (pci_domain_nr(dev->bus) << 16);
 		map_irq.devfn = dev->devfn;
 
 		if (type == PCI_CAP_ID_MSIX) {
@@ -285,7 +288,20 @@ static int xen_initdom_setup_msi_irqs(st
 			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
 		}
 
-		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+		ret = -EINVAL;
+		if (pci_seg_supported)
+			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+						    &map_irq);
+		if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
+			map_irq.type = MAP_PIRQ_TYPE_MSI;
+			map_irq.index = -1;
+			map_irq.pirq = -1;
+			map_irq.bus = dev->bus->number;
+			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+						    &map_irq);
+			if (ret != -EINVAL)
+				pci_seg_supported = false;
+		}
 		if (ret) {
 			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
 				 ret, domid);
--- 3.1-rc7/drivers/xen/pci.c
+++ 3.1-rc7-xen-pci-multi-seg/drivers/xen/pci.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/acpi.h>
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/xen.h>
@@ -26,26 +27,85 @@
 #include <asm/xen/hypercall.h>
 #include "../pci/pci.h"
 
+static bool __read_mostly pci_seg_supported = true;
+
 static int xen_add_device(struct device *dev)
 {
 	int r;
 	struct pci_dev *pci_dev = to_pci_dev(dev);
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev *physfn = pci_dev->physfn;
+#endif
+
+	if (pci_seg_supported) {
+		struct physdev_pci_device_add add = {
+			.seg = pci_domain_nr(pci_dev->bus),
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
+#ifdef CONFIG_ACPI
+		acpi_handle handle;
+#endif
 
 #ifdef CONFIG_PCI_IOV
-	if (pci_dev->is_virtfn) {
+		if (pci_dev->is_virtfn) {
+			add.flags = XEN_PCI_DEV_VIRTFN;
+			add.physfn.bus = physfn->bus->number;
+			add.physfn.devfn = physfn->devfn;
+		} else
+#endif
+		if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
+			add.flags = XEN_PCI_DEV_EXTFN;
+
+#ifdef CONFIG_ACPI
+		handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+		if (!handle)
+			handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+#ifdef CONFIG_PCI_IOV
+		if (!handle && pci_dev->is_virtfn)
+			handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+#endif
+		if (handle) {
+			acpi_status status;
+
+			do {
+				unsigned long long pxm;
+
+				status = acpi_evaluate_integer(handle, "_PXM",
+							       NULL, &pxm);
+				if (ACPI_SUCCESS(status)) {
+					add.optarr[0] = pxm;
+					add.flags |= XEN_PCI_DEV_PXM;
+					break;
+				}
+				status = acpi_get_parent(handle, &handle);
+			} while (ACPI_SUCCESS(status));
+		}
+#endif /* CONFIG_ACPI */
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
+		if (r != -ENOSYS)
+			return r;
+		pci_seg_supported = false;
+	}
+
+	if (pci_domain_nr(pci_dev->bus))
+		r = -ENOSYS;
+#ifdef CONFIG_PCI_IOV
+	else if (pci_dev->is_virtfn) {
 		struct physdev_manage_pci_ext manage_pci_ext = {
 			.bus		= pci_dev->bus->number,
 			.devfn		= pci_dev->devfn,
 			.is_virtfn 	= 1,
-			.physfn.bus	= pci_dev->physfn->bus->number,
-			.physfn.devfn	= pci_dev->physfn->devfn,
+			.physfn.bus	= physfn->bus->number,
+			.physfn.devfn	= physfn->devfn,
 		};
 
 		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
 			&manage_pci_ext);
-	} else
+	}
 #endif
-	if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
+	else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
 		struct physdev_manage_pci_ext manage_pci_ext = {
 			.bus		= pci_dev->bus->number,
 			.devfn		= pci_dev->devfn,
@@ -71,13 +131,27 @@ static int xen_remove_device(struct devi
 {
 	int r;
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct physdev_manage_pci manage_pci;
 
-	manage_pci.bus = pci_dev->bus->number;
-	manage_pci.devfn = pci_dev->devfn;
+	if (pci_seg_supported) {
+		struct physdev_pci_device device = {
+			.seg = pci_domain_nr(pci_dev->bus),
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
 
-	r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
-		&manage_pci);
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
+					  &device);
+	} else if (pci_domain_nr(pci_dev->bus))
+		r = -ENOSYS;
+	else {
+		struct physdev_manage_pci manage_pci = {
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
+					  &manage_pci);
+	}
 
 	return r;
 }
--- 3.1-rc7/include/xen/interface/physdev.h
+++ 3.1-rc7-xen-pci-multi-seg/include/xen/interface/physdev.h
@@ -109,6 +109,7 @@ struct physdev_irq {
 #define MAP_PIRQ_TYPE_MSI		0x0
 #define MAP_PIRQ_TYPE_GSI		0x1
 #define MAP_PIRQ_TYPE_UNKNOWN		0x2
+#define MAP_PIRQ_TYPE_MSI_SEG		0x3
 
 #define PHYSDEVOP_map_pirq		13
 struct physdev_map_pirq {
@@ -119,7 +120,7 @@ struct physdev_map_pirq {
     int index;
     /* IN or OUT */
     int pirq;
-    /* IN */
+    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
     int bus;
     /* IN */
     int devfn;
@@ -198,6 +199,37 @@ struct physdev_get_free_pirq {
     uint32_t pirq;
 };
 
+#define XEN_PCI_DEV_EXTFN              0x1
+#define XEN_PCI_DEV_VIRTFN             0x2
+#define XEN_PCI_DEV_PXM                0x4
+
+#define PHYSDEVOP_pci_device_add        25
+struct physdev_pci_device_add {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+    uint32_t flags;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    uint32_t optarr[];
+#elif defined(__GNUC__)
+    uint32_t optarr[0];
+#endif
+};
+
+#define PHYSDEVOP_pci_device_remove     26
+#define PHYSDEVOP_restore_msi_ext       27
+struct physdev_pci_device {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+};
+
 /*
  * Notify that some PIRQ-bound event channels have been unmasked.
  * ** This command is obsolete since interface version 0x00030202 and is **

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Xen/PCI: support multi-segment systems
  2011-09-22  8:17 [PATCH] Xen/PCI: support multi-segment systems Jan Beulich
@ 2011-09-22  9:06 ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 2+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-09-22  9:06 UTC (permalink / raw)
  To: Jan Beulich, liang tang; +Cc: xen-devel

On Thu, Sep 22, 2011 at 09:17:57AM +0100, Jan Beulich wrote:
> Now that the hypercall interface changes are in -unstable, make the
> kernel side code not ignore the segment (aka domain) number anymore
> (which results in pretty odd behavior on such systems). Rather, if
> only the old interfaces are available, don't call them for devices on
> non-zero segments at all.
> 
> The one thing I wasn't able to spot was a use of PHYSDEVOP_restore_msi
> (which would also need to be changed), so if there is some other patch
> in some tree that would be introducing this it ought to get adjusted
> to try using PHYSDEVOP_restore_msi_ext first.

Liang,

Can you apply the same logic to the ACPI S3 patches as what Jan did here please?

> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> ---
>  arch/x86/pci/xen.c              |   22 ++++++++-
>  drivers/xen/pci.c               |   94 +++++++++++++++++++++++++++++++++++-----
>  include/xen/interface/physdev.h |   34 ++++++++++++++
>  3 files changed, 136 insertions(+), 14 deletions(-)
> 
> --- 3.1-rc7/arch/x86/pci/xen.c
> +++ 3.1-rc7-xen-pci-multi-seg/arch/x86/pci/xen.c
> @@ -248,6 +248,8 @@ error:
>  }
>  
>  #ifdef CONFIG_XEN_DOM0
> +static bool __read_mostly pci_seg_supported = true;
> +
>  static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
>  {
>  	int ret = 0;
> @@ -265,10 +267,11 @@ static int xen_initdom_setup_msi_irqs(st
>  
>  		memset(&map_irq, 0, sizeof(map_irq));
>  		map_irq.domid = domid;
> -		map_irq.type = MAP_PIRQ_TYPE_MSI;
> +		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
>  		map_irq.index = -1;
>  		map_irq.pirq = -1;
> -		map_irq.bus = dev->bus->number;
> +		map_irq.bus = dev->bus->number |
> +			      (pci_domain_nr(dev->bus) << 16);
>  		map_irq.devfn = dev->devfn;
>  
>  		if (type == PCI_CAP_ID_MSIX) {
> @@ -285,7 +288,20 @@ static int xen_initdom_setup_msi_irqs(st
>  			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
>  		}
>  
> -		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
> +		ret = -EINVAL;
> +		if (pci_seg_supported)
> +			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
> +						    &map_irq);
> +		if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
> +			map_irq.type = MAP_PIRQ_TYPE_MSI;
> +			map_irq.index = -1;
> +			map_irq.pirq = -1;
> +			map_irq.bus = dev->bus->number;
> +			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
> +						    &map_irq);
> +			if (ret != -EINVAL)
> +				pci_seg_supported = false;
> +		}
>  		if (ret) {
>  			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
>  				 ret, domid);
> --- 3.1-rc7/drivers/xen/pci.c
> +++ 3.1-rc7-xen-pci-multi-seg/drivers/xen/pci.c
> @@ -18,6 +18,7 @@
>   */
>  
>  #include <linux/pci.h>
> +#include <linux/acpi.h>
>  #include <xen/xen.h>
>  #include <xen/interface/physdev.h>
>  #include <xen/interface/xen.h>
> @@ -26,26 +27,85 @@
>  #include <asm/xen/hypercall.h>
>  #include "../pci/pci.h"
>  
> +static bool __read_mostly pci_seg_supported = true;
> +
>  static int xen_add_device(struct device *dev)
>  {
>  	int r;
>  	struct pci_dev *pci_dev = to_pci_dev(dev);
> +#ifdef CONFIG_PCI_IOV
> +	struct pci_dev *physfn = pci_dev->physfn;
> +#endif
> +
> +	if (pci_seg_supported) {
> +		struct physdev_pci_device_add add = {
> +			.seg = pci_domain_nr(pci_dev->bus),
> +			.bus = pci_dev->bus->number,
> +			.devfn = pci_dev->devfn
> +		};
> +#ifdef CONFIG_ACPI
> +		acpi_handle handle;
> +#endif
>  
>  #ifdef CONFIG_PCI_IOV
> -	if (pci_dev->is_virtfn) {
> +		if (pci_dev->is_virtfn) {
> +			add.flags = XEN_PCI_DEV_VIRTFN;
> +			add.physfn.bus = physfn->bus->number;
> +			add.physfn.devfn = physfn->devfn;
> +		} else
> +#endif
> +		if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
> +			add.flags = XEN_PCI_DEV_EXTFN;
> +
> +#ifdef CONFIG_ACPI
> +		handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
> +		if (!handle)
> +			handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
> +#ifdef CONFIG_PCI_IOV
> +		if (!handle && pci_dev->is_virtfn)
> +			handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
> +#endif
> +		if (handle) {
> +			acpi_status status;
> +
> +			do {
> +				unsigned long long pxm;
> +
> +				status = acpi_evaluate_integer(handle, "_PXM",
> +							       NULL, &pxm);
> +				if (ACPI_SUCCESS(status)) {
> +					add.optarr[0] = pxm;
> +					add.flags |= XEN_PCI_DEV_PXM;
> +					break;
> +				}
> +				status = acpi_get_parent(handle, &handle);
> +			} while (ACPI_SUCCESS(status));
> +		}
> +#endif /* CONFIG_ACPI */
> +
> +		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
> +		if (r != -ENOSYS)
> +			return r;
> +		pci_seg_supported = false;
> +	}
> +
> +	if (pci_domain_nr(pci_dev->bus))
> +		r = -ENOSYS;
> +#ifdef CONFIG_PCI_IOV
> +	else if (pci_dev->is_virtfn) {
>  		struct physdev_manage_pci_ext manage_pci_ext = {
>  			.bus		= pci_dev->bus->number,
>  			.devfn		= pci_dev->devfn,
>  			.is_virtfn 	= 1,
> -			.physfn.bus	= pci_dev->physfn->bus->number,
> -			.physfn.devfn	= pci_dev->physfn->devfn,
> +			.physfn.bus	= physfn->bus->number,
> +			.physfn.devfn	= physfn->devfn,
>  		};
>  
>  		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
>  			&manage_pci_ext);
> -	} else
> +	}
>  #endif
> -	if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
> +	else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
>  		struct physdev_manage_pci_ext manage_pci_ext = {
>  			.bus		= pci_dev->bus->number,
>  			.devfn		= pci_dev->devfn,
> @@ -71,13 +131,27 @@ static int xen_remove_device(struct devi
>  {
>  	int r;
>  	struct pci_dev *pci_dev = to_pci_dev(dev);
> -	struct physdev_manage_pci manage_pci;
>  
> -	manage_pci.bus = pci_dev->bus->number;
> -	manage_pci.devfn = pci_dev->devfn;
> +	if (pci_seg_supported) {
> +		struct physdev_pci_device device = {
> +			.seg = pci_domain_nr(pci_dev->bus),
> +			.bus = pci_dev->bus->number,
> +			.devfn = pci_dev->devfn
> +		};
>  
> -	r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
> -		&manage_pci);
> +		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
> +					  &device);
> +	} else if (pci_domain_nr(pci_dev->bus))
> +		r = -ENOSYS;
> +	else {
> +		struct physdev_manage_pci manage_pci = {
> +			.bus = pci_dev->bus->number,
> +			.devfn = pci_dev->devfn
> +		};
> +
> +		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
> +					  &manage_pci);
> +	}
>  
>  	return r;
>  }
> --- 3.1-rc7/include/xen/interface/physdev.h
> +++ 3.1-rc7-xen-pci-multi-seg/include/xen/interface/physdev.h
> @@ -109,6 +109,7 @@ struct physdev_irq {
>  #define MAP_PIRQ_TYPE_MSI		0x0
>  #define MAP_PIRQ_TYPE_GSI		0x1
>  #define MAP_PIRQ_TYPE_UNKNOWN		0x2
> +#define MAP_PIRQ_TYPE_MSI_SEG		0x3
>  
>  #define PHYSDEVOP_map_pirq		13
>  struct physdev_map_pirq {
> @@ -119,7 +120,7 @@ struct physdev_map_pirq {
>      int index;
>      /* IN or OUT */
>      int pirq;
> -    /* IN */
> +    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
>      int bus;
>      /* IN */
>      int devfn;
> @@ -198,6 +199,37 @@ struct physdev_get_free_pirq {
>      uint32_t pirq;
>  };
>  
> +#define XEN_PCI_DEV_EXTFN              0x1
> +#define XEN_PCI_DEV_VIRTFN             0x2
> +#define XEN_PCI_DEV_PXM                0x4
> +
> +#define PHYSDEVOP_pci_device_add        25
> +struct physdev_pci_device_add {
> +    /* IN */
> +    uint16_t seg;
> +    uint8_t bus;
> +    uint8_t devfn;
> +    uint32_t flags;
> +    struct {
> +        uint8_t bus;
> +        uint8_t devfn;
> +    } physfn;
> +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
> +    uint32_t optarr[];
> +#elif defined(__GNUC__)
> +    uint32_t optarr[0];
> +#endif
> +};
> +
> +#define PHYSDEVOP_pci_device_remove     26
> +#define PHYSDEVOP_restore_msi_ext       27
> +struct physdev_pci_device {
> +    /* IN */
> +    uint16_t seg;
> +    uint8_t bus;
> +    uint8_t devfn;
> +};
> +
>  /*
>   * Notify that some PIRQ-bound event channels have been unmasked.
>   * ** This command is obsolete since interface version 0x00030202 and is **
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2011-09-22  9:06 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-22  8:17 [PATCH] Xen/PCI: support multi-segment systems Jan Beulich
2011-09-22  9:06 ` Konrad Rzeszutek Wilk

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.