All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
       [not found] <CAE9FiQXzRQ9S0mwwb58R2icBTtHHJ6NMVFMaxEgtVNP9mZVjZw@mail.gmail.com>
@ 2014-03-07 20:08 ` Yinghai Lu
  2014-04-04 22:43   ` Bjorn Helgaas
  2014-04-16 22:11   ` Bjorn Helgaas
  2014-04-17  4:23 ` [PATCH v8] " Yinghai Lu
  2014-04-17 16:40 ` [PATCH v9] " Yinghai Lu
  2 siblings, 2 replies; 35+ messages in thread
From: Yinghai Lu @ 2014-03-07 20:08 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Benjamin Herrenschmidt, Wei Yang, Gavin Shan, Jack Morgenstein,
	amirv, Or Gerlitz, eugenia, Guo Chao, talal, linux-pci,
	Yinghai Lu

When one of children resources does not support MEM_64, MEM_64 for
bridge get reset, so pull down whole pref resource on the bridge under 4G.

If the bridge support pref mem 64, will only allocate that with pref mem64 to
children that support it.
For children resources if they only support pref mem 32, will allocate them
from non pref mem instead.

If the bridge only support 32bit pref mmio, will still have all children pref
mmio under that.

-v2: Add release bridge res support with bridge mem res for pref_mem children res.
-v3: refresh and make it can be applied early before for_each_dev_res patchset.
-v4: fix non-pref mmio 64bit support found by Guo Chao.
-v7: repost as ibm's powerpc system work again when
    1. petiboot boot kernel have this patch.
    2. or mellanox driver added new .shutdown member.
    discussion could be found at:
	 http://marc.info/?t=138968064500001&r=1&w=2

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Tested-by: Guo Chao <yan@linux.vnet.ibm.com>
Tested-by: Wei Yang <weiyang@linux.vnet.ibm.com>

---
 drivers/pci/setup-bus.c |  138 ++++++++++++++++++++++++++++++++----------------
 drivers/pci/setup-res.c |   20 ++++++
 2 files changed, 111 insertions(+), 47 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(stru
    bus resource of a given type. Note: we intentionally skip
    the bus resources which have already been assigned (that is,
    have non-NULL parent resource). */
-static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned long type)
+static struct resource *find_free_bus_resource(struct pci_bus *bus,
+			 unsigned long type_mask, unsigned long type)
 {
 	int i;
 	struct resource *r;
-	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
 
 	pci_bus_for_each_resource(bus, r, i) {
 		if (r == &ioport_resource || r == &iomem_resource)
@@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus
 		resource_size_t add_size, struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
+	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
+							IORESOURCE_IO);
 	resource_size_t size = 0, size0 = 0, size1 = 0;
 	resource_size_t children_add_size = 0;
 	resource_size_t min_align, align;
@@ -915,15 +915,17 @@ static inline resource_size_t calculate_
  * guarantees that all child resources fit in this size.
  */
 static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
-			 unsigned long type, resource_size_t min_size,
-			resource_size_t add_size,
-			struct list_head *realloc_head)
+			 unsigned long type, unsigned long type2,
+			 unsigned long type3,
+			 resource_size_t min_size, resource_size_t add_size,
+			 struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
 	resource_size_t min_align, align, size, size0, size1;
 	resource_size_t aligns[12];	/* Alignments from 1Mb to 2Gb */
 	int order, max_order;
-	struct resource *b_res = find_free_bus_resource(bus, type);
+	struct resource *b_res = find_free_bus_resource(bus,
+					 mask | IORESOURCE_PREFETCH, type);
 	unsigned int mem64_mask = 0;
 	resource_size_t children_add_size = 0;
 
@@ -944,7 +946,9 @@ static int pbus_size_mem(struct pci_bus
 			struct resource *r = &dev->resource[i];
 			resource_size_t r_size;
 
-			if (r->parent || (r->flags & mask) != type)
+			if (r->parent || ((r->flags & mask) != type &&
+					  (r->flags & mask) != type2 &&
+					  (r->flags & mask) != type3))
 				continue;
 			r_size = resource_size(r);
 #ifdef CONFIG_PCI_IOV
@@ -1117,8 +1121,9 @@ void __ref __pci_bus_size_bridges(struct
 			struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	unsigned long mask, prefmask;
+	unsigned long mask, prefmask, type2 = 0, type3 = 0;
 	resource_size_t additional_mem_size = 0, additional_io_size = 0;
+	struct resource *b_res;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct pci_bus *b = dev->subordinate;
@@ -1163,15 +1168,34 @@ void __ref __pci_bus_size_bridges(struct
 		   has already been allocated by arch code, try
 		   non-prefetchable range for both types of PCI memory
 		   resources. */
+		b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
 		mask = IORESOURCE_MEM;
 		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-		if (pbus_size_mem(bus, prefmask, prefmask,
+		if (b_res[2].flags & IORESOURCE_MEM_64) {
+			prefmask |= IORESOURCE_MEM_64;
+			if (pbus_size_mem(bus, prefmask, prefmask,
+				  prefmask, prefmask,
 				  realloc_head ? 0 : additional_mem_size,
-				  additional_mem_size, realloc_head))
-			mask = prefmask; /* Success, size non-prefetch only. */
-		else
-			additional_mem_size += additional_mem_size;
-		pbus_size_mem(bus, mask, IORESOURCE_MEM,
+				  additional_mem_size, realloc_head)) {
+					/* Success, size non-pref64 only. */
+					mask = prefmask;
+					type2 = prefmask & ~IORESOURCE_MEM_64;
+					type3 = prefmask & ~IORESOURCE_PREFETCH;
+			}
+		}
+		if (!type2) {
+			prefmask &= ~IORESOURCE_MEM_64;
+			if (pbus_size_mem(bus, prefmask, prefmask,
+					 prefmask, prefmask,
+					 realloc_head ? 0 : additional_mem_size,
+					 additional_mem_size, realloc_head)) {
+				/* Success, size non-prefetch only. */
+				mask = prefmask;
+			} else
+				additional_mem_size += additional_mem_size;
+			type2 = type3 = IORESOURCE_MEM;
+		}
+		pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
 				realloc_head ? 0 : additional_mem_size,
 				additional_mem_size, realloc_head);
 		break;
@@ -1257,42 +1281,66 @@ static void __ref __pci_bridge_assign_re
 static void pci_bridge_release_resources(struct pci_bus *bus,
 					  unsigned long type)
 {
-	int idx;
-	bool changed = false;
-	struct pci_dev *dev;
+	struct pci_dev *dev = bus->self;
 	struct resource *r;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
+				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
+	unsigned old_flags = 0;
+	struct resource *b_res;
+	int idx = 1;
 
-	dev = bus->self;
-	for (idx = PCI_BRIDGE_RESOURCES; idx <= PCI_BRIDGE_RESOURCE_END;
-	     idx++) {
-		r = &dev->resource[idx];
-		if ((r->flags & type_mask) != type)
-			continue;
-		if (!r->parent)
-			continue;
-		/*
-		 * if there are children under that, we should release them
-		 *  all
-		 */
-		release_child_resources(r);
-		if (!release_resource(r)) {
-			dev_printk(KERN_DEBUG, &dev->dev,
-				 "resource %d %pR released\n", idx, r);
-			/* keep the old size */
-			r->end = resource_size(r) - 1;
-			r->start = 0;
-			r->flags = 0;
-			changed = true;
-		}
-	}
+	b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
+
+	/*
+	 *     1. if there is io port assign fail, will release bridge
+	 *	  io port.
+	 *     2. if there is non pref mmio assign fail, release bridge
+	 *	  nonpref mmio.
+	 *     3. if there is 64bit pref mmio assign fail, and bridge pref
+	 *	  is 64bit, release bridge pref mmio.
+	 *     4. if there is pref mmio assign fail, and bridge pref is
+	 *	  32bit mmio, release bridge pref mmio
+	 *     5. if there is pref mmio assign fail, and bridge pref is not
+	 *	  assigned, release bridge nonpref mmio.
+	 */
+	if (type & IORESOURCE_IO)
+		idx = 0;
+	else if (!(type & IORESOURCE_PREFETCH))
+		idx = 1;
+	else if ((type & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_MEM_64))
+		idx = 2;
+	else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_PREFETCH))
+		idx = 2;
+	else
+		idx = 1;
+
+	r = &b_res[idx];
+
+	if (!r->parent)
+		return;
+
+	/*
+	 * if there are children under that, we should release them
+	 *  all
+	 */
+	release_child_resources(r);
+	if (!release_resource(r)) {
+		type = old_flags = r->flags & type_mask;
+		dev_printk(KERN_DEBUG, &dev->dev, "resource %d %pR released\n",
+					PCI_BRIDGE_RESOURCES + idx, r);
+		/* keep the old size */
+		r->end = resource_size(r) - 1;
+		r->start = 0;
+		r->flags = 0;
 
-	if (changed) {
 		/* avoiding touch the one without PREF */
 		if (type & IORESOURCE_PREFETCH)
 			type = IORESOURCE_PREFETCH;
 		__pci_setup_bridge(bus, type);
+		/* for next child res under same bridge */
+		r->flags = old_flags;
 	}
 }
 
@@ -1471,7 +1519,7 @@ void pci_assign_unassigned_root_bus_reso
 	LIST_HEAD(fail_head);
 	struct pci_dev_resource *fail_res;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
+				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
 	int pci_try_num = 1;
 	enum enable_type enable_local;
 
Index: linux-2.6/drivers/pci/setup-res.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-res.c
+++ linux-2.6/drivers/pci/setup-res.c
@@ -208,15 +208,31 @@ static int __pci_assign_resource(struct
 
 	/* First, try exact prefetching match.. */
 	ret = pci_bus_alloc_resource(bus, res, size, align, min,
-				     IORESOURCE_PREFETCH,
+				     IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
 				     pcibios_align_resource, dev);
 
-	if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
+	if (ret < 0 &&
+	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
+	     (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
+		/*
+		 * That failed.
+		 *
+		 * Try below 4g pref
+		 */
+		ret = pci_bus_alloc_resource(bus, res, size, align, min,
+					     IORESOURCE_PREFETCH,
+					     pcibios_align_resource, dev);
+	}
+
+	if (ret < 0 &&
+	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))) {
 		/*
 		 * That failed.
 		 *
 		 * But a prefetching area can handle a non-prefetching
 		 * window (it will just not perform as well).
+		 *
+		 * Also can put 64bit under 32bit range. (below 4g).
 		 */
 		ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
 					     pcibios_align_resource, dev);

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-03-07 20:08 ` [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g Yinghai Lu
@ 2014-04-04 22:43   ` Bjorn Helgaas
  2014-04-08  2:57     ` Guo Chao
  2014-04-16 22:11   ` Bjorn Helgaas
  1 sibling, 1 reply; 35+ messages in thread
From: Bjorn Helgaas @ 2014-04-04 22:43 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Benjamin Herrenschmidt, Wei Yang, Gavin Shan, Jack Morgenstein,
	Amir Vadai, Or Gerlitz, Eugenia Emantayev, Guo Chao, talal,
	linux-pci

On Fri, Mar 7, 2014 at 1:08 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> When one of children resources does not support MEM_64, MEM_64 for
> bridge get reset, so pull down whole pref resource on the bridge under 4G.
>
> If the bridge support pref mem 64, will only allocate that with pref mem64 to
> children that support it.
> For children resources if they only support pref mem 32, will allocate them
> from non pref mem instead.
>
> If the bridge only support 32bit pref mmio, will still have all children pref
> mmio under that.
>
> -v2: Add release bridge res support with bridge mem res for pref_mem children res.
> -v3: refresh and make it can be applied early before for_each_dev_res patchset.
> -v4: fix non-pref mmio 64bit support found by Guo Chao.
> -v7: repost as ibm's powerpc system work again when
>     1. petiboot boot kernel have this patch.
>     2. or mellanox driver added new .shutdown member.
>     discussion could be found at:
>          http://marc.info/?t=138968064500001&r=1&w=2

I think this fixes some sort of bug, and I suppose if I spent a few
hours decoding the discussion you mention (the 44 message-long
"mlx4_core probe error after applying Yinghai's patch" discussion), I
might be able to figure out what it is.

But maybe somebody who already knows what the bug is can summarize it
for me, and maybe even open a kernel.org bugzilla with a dmesg log as
an example.

I do intend to use this message [1] as a source to help construct a
changelog, but I'd like to also describe a specific problem that is
solved by this patch.  Ideally this would already be in the changelog,
but it's not, so any help in figuring it out would be appreciated.

[1] http://lkml.kernel.org/r/CAErSpo5VWREGptW0MU0wLJUa3h23DXMZPGkdJFMTx5WAGL8J6Q@mail.gmail.com

> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> Tested-by: Guo Chao <yan@linux.vnet.ibm.com>
> Tested-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>
> ---
>  drivers/pci/setup-bus.c |  138 ++++++++++++++++++++++++++++++++----------------
>  drivers/pci/setup-res.c |   20 ++++++
>  2 files changed, 111 insertions(+), 47 deletions(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(stru
>     bus resource of a given type. Note: we intentionally skip
>     the bus resources which have already been assigned (that is,
>     have non-NULL parent resource). */
> -static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned long type)
> +static struct resource *find_free_bus_resource(struct pci_bus *bus,
> +                        unsigned long type_mask, unsigned long type)
>  {
>         int i;
>         struct resource *r;
> -       unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> -                                 IORESOURCE_PREFETCH;
>
>         pci_bus_for_each_resource(bus, r, i) {
>                 if (r == &ioport_resource || r == &iomem_resource)
> @@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus
>                 resource_size_t add_size, struct list_head *realloc_head)
>  {
>         struct pci_dev *dev;
> -       struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
> +       struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
> +                                                       IORESOURCE_IO);
>         resource_size_t size = 0, size0 = 0, size1 = 0;
>         resource_size_t children_add_size = 0;
>         resource_size_t min_align, align;
> @@ -915,15 +915,17 @@ static inline resource_size_t calculate_
>   * guarantees that all child resources fit in this size.
>   */
>  static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
> -                        unsigned long type, resource_size_t min_size,
> -                       resource_size_t add_size,
> -                       struct list_head *realloc_head)
> +                        unsigned long type, unsigned long type2,
> +                        unsigned long type3,
> +                        resource_size_t min_size, resource_size_t add_size,
> +                        struct list_head *realloc_head)
>  {
>         struct pci_dev *dev;
>         resource_size_t min_align, align, size, size0, size1;
>         resource_size_t aligns[12];     /* Alignments from 1Mb to 2Gb */
>         int order, max_order;
> -       struct resource *b_res = find_free_bus_resource(bus, type);
> +       struct resource *b_res = find_free_bus_resource(bus,
> +                                        mask | IORESOURCE_PREFETCH, type);
>         unsigned int mem64_mask = 0;
>         resource_size_t children_add_size = 0;
>
> @@ -944,7 +946,9 @@ static int pbus_size_mem(struct pci_bus
>                         struct resource *r = &dev->resource[i];
>                         resource_size_t r_size;
>
> -                       if (r->parent || (r->flags & mask) != type)
> +                       if (r->parent || ((r->flags & mask) != type &&
> +                                         (r->flags & mask) != type2 &&
> +                                         (r->flags & mask) != type3))
>                                 continue;
>                         r_size = resource_size(r);
>  #ifdef CONFIG_PCI_IOV
> @@ -1117,8 +1121,9 @@ void __ref __pci_bus_size_bridges(struct
>                         struct list_head *realloc_head)
>  {
>         struct pci_dev *dev;
> -       unsigned long mask, prefmask;
> +       unsigned long mask, prefmask, type2 = 0, type3 = 0;
>         resource_size_t additional_mem_size = 0, additional_io_size = 0;
> +       struct resource *b_res;
>
>         list_for_each_entry(dev, &bus->devices, bus_list) {
>                 struct pci_bus *b = dev->subordinate;
> @@ -1163,15 +1168,34 @@ void __ref __pci_bus_size_bridges(struct
>                    has already been allocated by arch code, try
>                    non-prefetchable range for both types of PCI memory
>                    resources. */
> +               b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
>                 mask = IORESOURCE_MEM;
>                 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
> -               if (pbus_size_mem(bus, prefmask, prefmask,
> +               if (b_res[2].flags & IORESOURCE_MEM_64) {
> +                       prefmask |= IORESOURCE_MEM_64;
> +                       if (pbus_size_mem(bus, prefmask, prefmask,
> +                                 prefmask, prefmask,
>                                   realloc_head ? 0 : additional_mem_size,
> -                                 additional_mem_size, realloc_head))
> -                       mask = prefmask; /* Success, size non-prefetch only. */
> -               else
> -                       additional_mem_size += additional_mem_size;
> -               pbus_size_mem(bus, mask, IORESOURCE_MEM,
> +                                 additional_mem_size, realloc_head)) {
> +                                       /* Success, size non-pref64 only. */
> +                                       mask = prefmask;
> +                                       type2 = prefmask & ~IORESOURCE_MEM_64;
> +                                       type3 = prefmask & ~IORESOURCE_PREFETCH;
> +                       }
> +               }
> +               if (!type2) {
> +                       prefmask &= ~IORESOURCE_MEM_64;
> +                       if (pbus_size_mem(bus, prefmask, prefmask,
> +                                        prefmask, prefmask,
> +                                        realloc_head ? 0 : additional_mem_size,
> +                                        additional_mem_size, realloc_head)) {
> +                               /* Success, size non-prefetch only. */
> +                               mask = prefmask;
> +                       } else
> +                               additional_mem_size += additional_mem_size;
> +                       type2 = type3 = IORESOURCE_MEM;
> +               }
> +               pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
>                                 realloc_head ? 0 : additional_mem_size,
>                                 additional_mem_size, realloc_head);
>                 break;
> @@ -1257,42 +1281,66 @@ static void __ref __pci_bridge_assign_re
>  static void pci_bridge_release_resources(struct pci_bus *bus,
>                                           unsigned long type)
>  {
> -       int idx;
> -       bool changed = false;
> -       struct pci_dev *dev;
> +       struct pci_dev *dev = bus->self;
>         struct resource *r;
>         unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> -                                 IORESOURCE_PREFETCH;
> +                                 IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
> +       unsigned old_flags = 0;
> +       struct resource *b_res;
> +       int idx = 1;
>
> -       dev = bus->self;
> -       for (idx = PCI_BRIDGE_RESOURCES; idx <= PCI_BRIDGE_RESOURCE_END;
> -            idx++) {
> -               r = &dev->resource[idx];
> -               if ((r->flags & type_mask) != type)
> -                       continue;
> -               if (!r->parent)
> -                       continue;
> -               /*
> -                * if there are children under that, we should release them
> -                *  all
> -                */
> -               release_child_resources(r);
> -               if (!release_resource(r)) {
> -                       dev_printk(KERN_DEBUG, &dev->dev,
> -                                "resource %d %pR released\n", idx, r);
> -                       /* keep the old size */
> -                       r->end = resource_size(r) - 1;
> -                       r->start = 0;
> -                       r->flags = 0;
> -                       changed = true;
> -               }
> -       }
> +       b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
> +
> +       /*
> +        *     1. if there is io port assign fail, will release bridge
> +        *        io port.
> +        *     2. if there is non pref mmio assign fail, release bridge
> +        *        nonpref mmio.
> +        *     3. if there is 64bit pref mmio assign fail, and bridge pref
> +        *        is 64bit, release bridge pref mmio.
> +        *     4. if there is pref mmio assign fail, and bridge pref is
> +        *        32bit mmio, release bridge pref mmio
> +        *     5. if there is pref mmio assign fail, and bridge pref is not
> +        *        assigned, release bridge nonpref mmio.
> +        */
> +       if (type & IORESOURCE_IO)
> +               idx = 0;
> +       else if (!(type & IORESOURCE_PREFETCH))
> +               idx = 1;
> +       else if ((type & IORESOURCE_MEM_64) &&
> +                (b_res[2].flags & IORESOURCE_MEM_64))
> +               idx = 2;
> +       else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
> +                (b_res[2].flags & IORESOURCE_PREFETCH))
> +               idx = 2;
> +       else
> +               idx = 1;
> +
> +       r = &b_res[idx];
> +
> +       if (!r->parent)
> +               return;
> +
> +       /*
> +        * if there are children under that, we should release them
> +        *  all
> +        */
> +       release_child_resources(r);
> +       if (!release_resource(r)) {
> +               type = old_flags = r->flags & type_mask;
> +               dev_printk(KERN_DEBUG, &dev->dev, "resource %d %pR released\n",
> +                                       PCI_BRIDGE_RESOURCES + idx, r);
> +               /* keep the old size */
> +               r->end = resource_size(r) - 1;
> +               r->start = 0;
> +               r->flags = 0;
>
> -       if (changed) {
>                 /* avoiding touch the one without PREF */
>                 if (type & IORESOURCE_PREFETCH)
>                         type = IORESOURCE_PREFETCH;
>                 __pci_setup_bridge(bus, type);
> +               /* for next child res under same bridge */
> +               r->flags = old_flags;
>         }
>  }
>
> @@ -1471,7 +1519,7 @@ void pci_assign_unassigned_root_bus_reso
>         LIST_HEAD(fail_head);
>         struct pci_dev_resource *fail_res;
>         unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> -                                 IORESOURCE_PREFETCH;
> +                                 IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
>         int pci_try_num = 1;
>         enum enable_type enable_local;
>
> Index: linux-2.6/drivers/pci/setup-res.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-res.c
> +++ linux-2.6/drivers/pci/setup-res.c
> @@ -208,15 +208,31 @@ static int __pci_assign_resource(struct
>
>         /* First, try exact prefetching match.. */
>         ret = pci_bus_alloc_resource(bus, res, size, align, min,
> -                                    IORESOURCE_PREFETCH,
> +                                    IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
>                                      pcibios_align_resource, dev);
>
> -       if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
> +       if (ret < 0 &&
> +           (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
> +            (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
> +               /*
> +                * That failed.
> +                *
> +                * Try below 4g pref
> +                */
> +               ret = pci_bus_alloc_resource(bus, res, size, align, min,
> +                                            IORESOURCE_PREFETCH,
> +                                            pcibios_align_resource, dev);
> +       }
> +
> +       if (ret < 0 &&
> +           (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))) {
>                 /*
>                  * That failed.
>                  *
>                  * But a prefetching area can handle a non-prefetching
>                  * window (it will just not perform as well).
> +                *
> +                * Also can put 64bit under 32bit range. (below 4g).
>                  */
>                 ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
>                                              pcibios_align_resource, dev);

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-04 22:43   ` Bjorn Helgaas
@ 2014-04-08  2:57     ` Guo Chao
  2014-04-08  7:18       ` Or Gerlitz
  2014-04-08 15:02       ` Bjorn Helgaas
  0 siblings, 2 replies; 35+ messages in thread
From: Guo Chao @ 2014-04-08  2:57 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

On Fri, Apr 04, 2014 at 04:43:27PM -0600, Bjorn Helgaas wrote:
> On Fri, Mar 7, 2014 at 1:08 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> > When one of children resources does not support MEM_64, MEM_64 for
> > bridge get reset, so pull down whole pref resource on the bridge under 4G.
> >
> > If the bridge support pref mem 64, will only allocate that with pref mem64 to
> > children that support it.
> > For children resources if they only support pref mem 32, will allocate them
> > from non pref mem instead.
> >
> > If the bridge only support 32bit pref mmio, will still have all children pref
> > mmio under that.
> >
> > -v2: Add release bridge res support with bridge mem res for pref_mem children res.
> > -v3: refresh and make it can be applied early before for_each_dev_res patchset.
> > -v4: fix non-pref mmio 64bit support found by Guo Chao.
> > -v7: repost as ibm's powerpc system work again when
> >     1. petiboot boot kernel have this patch.
> >     2. or mellanox driver added new .shutdown member.
> >     discussion could be found at:
> >          http://marc.info/?t=138968064500001&r=1&w=2
> 
> I think this fixes some sort of bug, and I suppose if I spent a few
> hours decoding the discussion you mention (the 44 message-long
> "mlx4_core probe error after applying Yinghai's patch" discussion), I
> might be able to figure out what it is.
> 

The result of 44 message-long discussion is: the Mellanox card's failure is
due to a bug in its driver instead of this patch.

The patch refines the logic of using prefetchable window, so that 64-bit
prefetchable BARs have a chance to be really prefetchable. It does not fix
any bugs, instead, it exposes one. 

Thanks,
Guo Chao

> But maybe somebody who already knows what the bug is can summarize it
> for me, and maybe even open a kernel.org bugzilla with a dmesg log as
> an example.
> 
> I do intend to use this message [1] as a source to help construct a
> changelog, but I'd like to also describe a specific problem that is
> solved by this patch.  Ideally this would already be in the changelog,
> but it's not, so any help in figuring it out would be appreciated.
> 
> [1] http://lkml.kernel.org/r/CAErSpo5VWREGptW0MU0wLJUa3h23DXMZPGkdJFMTx5WAGL8J6Q@mail.gmail.com
> 
> > Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> > Tested-by: Guo Chao <yan@linux.vnet.ibm.com>
> > Tested-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> >
> > ---
> >  drivers/pci/setup-bus.c |  138 ++++++++++++++++++++++++++++++++----------------
> >  drivers/pci/setup-res.c |   20 ++++++
> >  2 files changed, 111 insertions(+), 47 deletions(-)
> >
> > Index: linux-2.6/drivers/pci/setup-bus.c
> > ===================================================================
> > --- linux-2.6.orig/drivers/pci/setup-bus.c
> > +++ linux-2.6/drivers/pci/setup-bus.c
> > @@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(stru
> >     bus resource of a given type. Note: we intentionally skip
> >     the bus resources which have already been assigned (that is,
> >     have non-NULL parent resource). */
> > -static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned long type)
> > +static struct resource *find_free_bus_resource(struct pci_bus *bus,
> > +                        unsigned long type_mask, unsigned long type)
> >  {
> >         int i;
> >         struct resource *r;
> > -       unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> > -                                 IORESOURCE_PREFETCH;
> >
> >         pci_bus_for_each_resource(bus, r, i) {
> >                 if (r == &ioport_resource || r == &iomem_resource)
> > @@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus
> >                 resource_size_t add_size, struct list_head *realloc_head)
> >  {
> >         struct pci_dev *dev;
> > -       struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
> > +       struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
> > +                                                       IORESOURCE_IO);
> >         resource_size_t size = 0, size0 = 0, size1 = 0;
> >         resource_size_t children_add_size = 0;
> >         resource_size_t min_align, align;
> > @@ -915,15 +915,17 @@ static inline resource_size_t calculate_
> >   * guarantees that all child resources fit in this size.
> >   */
> >  static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
> > -                        unsigned long type, resource_size_t min_size,
> > -                       resource_size_t add_size,
> > -                       struct list_head *realloc_head)
> > +                        unsigned long type, unsigned long type2,
> > +                        unsigned long type3,
> > +                        resource_size_t min_size, resource_size_t add_size,
> > +                        struct list_head *realloc_head)
> >  {
> >         struct pci_dev *dev;
> >         resource_size_t min_align, align, size, size0, size1;
> >         resource_size_t aligns[12];     /* Alignments from 1Mb to 2Gb */
> >         int order, max_order;
> > -       struct resource *b_res = find_free_bus_resource(bus, type);
> > +       struct resource *b_res = find_free_bus_resource(bus,
> > +                                        mask | IORESOURCE_PREFETCH, type);
> >         unsigned int mem64_mask = 0;
> >         resource_size_t children_add_size = 0;
> >
> > @@ -944,7 +946,9 @@ static int pbus_size_mem(struct pci_bus
> >                         struct resource *r = &dev->resource[i];
> >                         resource_size_t r_size;
> >
> > -                       if (r->parent || (r->flags & mask) != type)
> > +                       if (r->parent || ((r->flags & mask) != type &&
> > +                                         (r->flags & mask) != type2 &&
> > +                                         (r->flags & mask) != type3))
> >                                 continue;
> >                         r_size = resource_size(r);
> >  #ifdef CONFIG_PCI_IOV
> > @@ -1117,8 +1121,9 @@ void __ref __pci_bus_size_bridges(struct
> >                         struct list_head *realloc_head)
> >  {
> >         struct pci_dev *dev;
> > -       unsigned long mask, prefmask;
> > +       unsigned long mask, prefmask, type2 = 0, type3 = 0;
> >         resource_size_t additional_mem_size = 0, additional_io_size = 0;
> > +       struct resource *b_res;
> >
> >         list_for_each_entry(dev, &bus->devices, bus_list) {
> >                 struct pci_bus *b = dev->subordinate;
> > @@ -1163,15 +1168,34 @@ void __ref __pci_bus_size_bridges(struct
> >                    has already been allocated by arch code, try
> >                    non-prefetchable range for both types of PCI memory
> >                    resources. */
> > +               b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
> >                 mask = IORESOURCE_MEM;
> >                 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
> > -               if (pbus_size_mem(bus, prefmask, prefmask,
> > +               if (b_res[2].flags & IORESOURCE_MEM_64) {
> > +                       prefmask |= IORESOURCE_MEM_64;
> > +                       if (pbus_size_mem(bus, prefmask, prefmask,
> > +                                 prefmask, prefmask,
> >                                   realloc_head ? 0 : additional_mem_size,
> > -                                 additional_mem_size, realloc_head))
> > -                       mask = prefmask; /* Success, size non-prefetch only. */
> > -               else
> > -                       additional_mem_size += additional_mem_size;
> > -               pbus_size_mem(bus, mask, IORESOURCE_MEM,
> > +                                 additional_mem_size, realloc_head)) {
> > +                                       /* Success, size non-pref64 only. */
> > +                                       mask = prefmask;
> > +                                       type2 = prefmask & ~IORESOURCE_MEM_64;
> > +                                       type3 = prefmask & ~IORESOURCE_PREFETCH;
> > +                       }
> > +               }
> > +               if (!type2) {
> > +                       prefmask &= ~IORESOURCE_MEM_64;
> > +                       if (pbus_size_mem(bus, prefmask, prefmask,
> > +                                        prefmask, prefmask,
> > +                                        realloc_head ? 0 : additional_mem_size,
> > +                                        additional_mem_size, realloc_head)) {
> > +                               /* Success, size non-prefetch only. */
> > +                               mask = prefmask;
> > +                       } else
> > +                               additional_mem_size += additional_mem_size;
> > +                       type2 = type3 = IORESOURCE_MEM;
> > +               }
> > +               pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
> >                                 realloc_head ? 0 : additional_mem_size,
> >                                 additional_mem_size, realloc_head);
> >                 break;
> > @@ -1257,42 +1281,66 @@ static void __ref __pci_bridge_assign_re
> >  static void pci_bridge_release_resources(struct pci_bus *bus,
> >                                           unsigned long type)
> >  {
> > -       int idx;
> > -       bool changed = false;
> > -       struct pci_dev *dev;
> > +       struct pci_dev *dev = bus->self;
> >         struct resource *r;
> >         unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> > -                                 IORESOURCE_PREFETCH;
> > +                                 IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
> > +       unsigned old_flags = 0;
> > +       struct resource *b_res;
> > +       int idx = 1;
> >
> > -       dev = bus->self;
> > -       for (idx = PCI_BRIDGE_RESOURCES; idx <= PCI_BRIDGE_RESOURCE_END;
> > -            idx++) {
> > -               r = &dev->resource[idx];
> > -               if ((r->flags & type_mask) != type)
> > -                       continue;
> > -               if (!r->parent)
> > -                       continue;
> > -               /*
> > -                * if there are children under that, we should release them
> > -                *  all
> > -                */
> > -               release_child_resources(r);
> > -               if (!release_resource(r)) {
> > -                       dev_printk(KERN_DEBUG, &dev->dev,
> > -                                "resource %d %pR released\n", idx, r);
> > -                       /* keep the old size */
> > -                       r->end = resource_size(r) - 1;
> > -                       r->start = 0;
> > -                       r->flags = 0;
> > -                       changed = true;
> > -               }
> > -       }
> > +       b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
> > +
> > +       /*
> > +        *     1. if there is io port assign fail, will release bridge
> > +        *        io port.
> > +        *     2. if there is non pref mmio assign fail, release bridge
> > +        *        nonpref mmio.
> > +        *     3. if there is 64bit pref mmio assign fail, and bridge pref
> > +        *        is 64bit, release bridge pref mmio.
> > +        *     4. if there is pref mmio assign fail, and bridge pref is
> > +        *        32bit mmio, release bridge pref mmio
> > +        *     5. if there is pref mmio assign fail, and bridge pref is not
> > +        *        assigned, release bridge nonpref mmio.
> > +        */
> > +       if (type & IORESOURCE_IO)
> > +               idx = 0;
> > +       else if (!(type & IORESOURCE_PREFETCH))
> > +               idx = 1;
> > +       else if ((type & IORESOURCE_MEM_64) &&
> > +                (b_res[2].flags & IORESOURCE_MEM_64))
> > +               idx = 2;
> > +       else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
> > +                (b_res[2].flags & IORESOURCE_PREFETCH))
> > +               idx = 2;
> > +       else
> > +               idx = 1;
> > +
> > +       r = &b_res[idx];
> > +
> > +       if (!r->parent)
> > +               return;
> > +
> > +       /*
> > +        * if there are children under that, we should release them
> > +        *  all
> > +        */
> > +       release_child_resources(r);
> > +       if (!release_resource(r)) {
> > +               type = old_flags = r->flags & type_mask;
> > +               dev_printk(KERN_DEBUG, &dev->dev, "resource %d %pR released\n",
> > +                                       PCI_BRIDGE_RESOURCES + idx, r);
> > +               /* keep the old size */
> > +               r->end = resource_size(r) - 1;
> > +               r->start = 0;
> > +               r->flags = 0;
> >
> > -       if (changed) {
> >                 /* avoiding touch the one without PREF */
> >                 if (type & IORESOURCE_PREFETCH)
> >                         type = IORESOURCE_PREFETCH;
> >                 __pci_setup_bridge(bus, type);
> > +               /* for next child res under same bridge */
> > +               r->flags = old_flags;
> >         }
> >  }
> >
> > @@ -1471,7 +1519,7 @@ void pci_assign_unassigned_root_bus_reso
> >         LIST_HEAD(fail_head);
> >         struct pci_dev_resource *fail_res;
> >         unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> > -                                 IORESOURCE_PREFETCH;
> > +                                 IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
> >         int pci_try_num = 1;
> >         enum enable_type enable_local;
> >
> > Index: linux-2.6/drivers/pci/setup-res.c
> > ===================================================================
> > --- linux-2.6.orig/drivers/pci/setup-res.c
> > +++ linux-2.6/drivers/pci/setup-res.c
> > @@ -208,15 +208,31 @@ static int __pci_assign_resource(struct
> >
> >         /* First, try exact prefetching match.. */
> >         ret = pci_bus_alloc_resource(bus, res, size, align, min,
> > -                                    IORESOURCE_PREFETCH,
> > +                                    IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
> >                                      pcibios_align_resource, dev);
> >
> > -       if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
> > +       if (ret < 0 &&
> > +           (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
> > +            (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
> > +               /*
> > +                * That failed.
> > +                *
> > +                * Try below 4g pref
> > +                */
> > +               ret = pci_bus_alloc_resource(bus, res, size, align, min,
> > +                                            IORESOURCE_PREFETCH,
> > +                                            pcibios_align_resource, dev);
> > +       }
> > +
> > +       if (ret < 0 &&
> > +           (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))) {
> >                 /*
> >                  * That failed.
> >                  *
> >                  * But a prefetching area can handle a non-prefetching
> >                  * window (it will just not perform as well).
> > +                *
> > +                * Also can put 64bit under 32bit range. (below 4g).
> >                  */
> >                 ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
> >                                              pcibios_align_resource, dev);
> 


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-08  2:57     ` Guo Chao
@ 2014-04-08  7:18       ` Or Gerlitz
  2014-04-08  7:41         ` Wei Yang
  2014-04-08 15:02       ` Bjorn Helgaas
  1 sibling, 1 reply; 35+ messages in thread
From: Or Gerlitz @ 2014-04-08  7:18 UTC (permalink / raw)
  To: Guo Chao, Bjorn Helgaas
  Cc: Yinghai Lu, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Eugenia Emantayev, talal,
	linux-pci

On 08/04/2014 05:57, Guo Chao wrote:
> On Fri, Apr 04, 2014 at 04:43:27PM -0600, Bjorn Helgaas wrote:
>> On Fri, Mar 7, 2014 at 1:08 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>> When one of children resources does not support MEM_64, MEM_64 for
>>> bridge get reset, so pull down whole pref resource on the bridge under 4G.
>>>
>>> If the bridge support pref mem 64, will only allocate that with pref mem64 to
>>> children that support it.
>>> For children resources if they only support pref mem 32, will allocate them
>>> from non pref mem instead.
>>>
>>> If the bridge only support 32bit pref mmio, will still have all children pref
>>> mmio under that.
>>>
>>> -v2: Add release bridge res support with bridge mem res for pref_mem children res.
>>> -v3: refresh and make it can be applied early before for_each_dev_res patchset.
>>> -v4: fix non-pref mmio 64bit support found by Guo Chao.
>>> -v7: repost as ibm's powerpc system work again when
>>>      1. petiboot boot kernel have this patch.
>>>      2. or mellanox driver added new .shutdown member.
>>>      discussion could be found at:
>>>           http://marc.info/?t=138968064500001&r=1&w=2
>> I think this fixes some sort of bug, and I suppose if I spent a few
>> hours decoding the discussion you mention (the 44 message-long
>> "mlx4_core probe error after applying Yinghai's patch" discussion), I
>> might be able to figure out what it is.
>>
> The result of 44 message-long discussion is: the Mellanox card's failure is
> due to a bug in its driver instead of this patch.


So just to make sure we're on the same page -- fixing the bug is carried 
out through netdev, e.g this 3.14-rc8
upstream commit 97a5221 "net/mlx4_core: pass pci_device_id.driver_data 
to __mlx4_init_one during reset"
and now this one more fix which is under discussion 
http://marc.info/?l=linux-pci&m=139675010015646&w=2, right?



>
> The patch refines the logic of using prefetchable window, so that 64-bit
> prefetchable BARs have a chance to be really prefetchable. It does not fix
> any bugs, instead, it exposes one.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-08  7:18       ` Or Gerlitz
@ 2014-04-08  7:41         ` Wei Yang
  2014-04-08  7:55           ` Or Gerlitz
  0 siblings, 1 reply; 35+ messages in thread
From: Wei Yang @ 2014-04-08  7:41 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Guo Chao, Bjorn Helgaas, Yinghai Lu, Benjamin Herrenschmidt,
	Wei Yang, Gavin Shan, Jack Morgenstein, Amir Vadai,
	Eugenia Emantayev, talal, linux-pci

On Tue, Apr 08, 2014 at 10:18:48AM +0300, Or Gerlitz wrote:
>On 08/04/2014 05:57, Guo Chao wrote:
>>On Fri, Apr 04, 2014 at 04:43:27PM -0600, Bjorn Helgaas wrote:
>>>On Fri, Mar 7, 2014 at 1:08 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>>>>When one of children resources does not support MEM_64, MEM_64 for
>>>>bridge get reset, so pull down whole pref resource on the bridge under 4G.
>>>>
>>>>If the bridge support pref mem 64, will only allocate that with pref mem64 to
>>>>children that support it.
>>>>For children resources if they only support pref mem 32, will allocate them
>>>>from non pref mem instead.
>>>>
>>>>If the bridge only support 32bit pref mmio, will still have all children pref
>>>>mmio under that.
>>>>
>>>>-v2: Add release bridge res support with bridge mem res for pref_mem children res.
>>>>-v3: refresh and make it can be applied early before for_each_dev_res patchset.
>>>>-v4: fix non-pref mmio 64bit support found by Guo Chao.
>>>>-v7: repost as ibm's powerpc system work again when
>>>>     1. petiboot boot kernel have this patch.
>>>>     2. or mellanox driver added new .shutdown member.
>>>>     discussion could be found at:
>>>>          http://marc.info/?t=138968064500001&r=1&w=2
>>>I think this fixes some sort of bug, and I suppose if I spent a few
>>>hours decoding the discussion you mention (the 44 message-long
>>>"mlx4_core probe error after applying Yinghai's patch" discussion), I
>>>might be able to figure out what it is.
>>>
>>The result of 44 message-long discussion is: the Mellanox card's failure is
>>due to a bug in its driver instead of this patch.
>
>
>So just to make sure we're on the same page -- fixing the bug is
>carried out through netdev, e.g this 3.14-rc8
>upstream commit 97a5221 "net/mlx4_core: pass
>pci_device_id.driver_data to __mlx4_init_one during reset"
>and now this one more fix which is under discussion
>http://marc.info/?l=linux-pci&m=139675010015646&w=2, right?

No, this one http://marc.info/?l=linux-pci&m=139675010015646&w=2 is another
bug in mlx4 driver, missing a proper pci_dev_data in mlx4_init_one.

The bug discussing in this thread is the driver "forget" to release the
"owner" during kexec. So this device couldn't be usded after kexec.

>
>
>
>>
>>The patch refines the logic of using prefetchable window, so that 64-bit
>>prefetchable BARs have a chance to be really prefetchable. It does not fix
>>any bugs, instead, it exposes one.

-- 
Richard Yang
Help you, Help me


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-08  7:41         ` Wei Yang
@ 2014-04-08  7:55           ` Or Gerlitz
  2014-04-08  8:22             ` Guo Chao
  0 siblings, 1 reply; 35+ messages in thread
From: Or Gerlitz @ 2014-04-08  7:55 UTC (permalink / raw)
  To: Wei Yang, Guo Chao
  Cc: Bjorn Helgaas, Yinghai Lu, Benjamin Herrenschmidt, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Eugenia Emantayev, talal,
	linux-pci

On 08/04/2014 10:41, Wei Yang wrote:
>> >So just to make sure we're on the same page -- fixing the bug is
>> >carried out through netdev, e.g this 3.14-rc8
>> >upstream commit 97a5221 "net/mlx4_core: pass
>> >pci_device_id.driver_data to __mlx4_init_one during reset"
>> >and now this one more fix which is under discussion
>> >http://marc.info/?l=linux-pci&m=139675010015646&w=2, right?
> No, this onehttp://marc.info/?l=linux-pci&m=139675010015646&w=2  is another
> bug in mlx4 driver, missing a proper pci_dev_data in mlx4_init_one.
>
> The bug discussing in this thread is the driver "forget" to release the
> "owner" during kexec. So this device couldn't be usded after kexec.
>
Can you guys please open a kernel.org bugzilla ticket, or better... work 
on a patch?

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-08  7:55           ` Or Gerlitz
@ 2014-04-08  8:22             ` Guo Chao
  0 siblings, 0 replies; 35+ messages in thread
From: Guo Chao @ 2014-04-08  8:22 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Wei Yang, Bjorn Helgaas, Yinghai Lu, Benjamin Herrenschmidt,
	Gavin Shan, Jack Morgenstein, Amir Vadai, Eugenia Emantayev,
	talal, linux-pci

On Tue, Apr 08, 2014 at 10:55:55AM +0300, Or Gerlitz wrote:
> On 08/04/2014 10:41, Wei Yang wrote:
> >>>So just to make sure we're on the same page -- fixing the bug is
> >>>carried out through netdev, e.g this 3.14-rc8
> >>>upstream commit 97a5221 "net/mlx4_core: pass
> >>>pci_device_id.driver_data to __mlx4_init_one during reset"
> >>>and now this one more fix which is under discussion
> >>>http://marc.info/?l=linux-pci&m=139675010015646&w=2, right?
> >No, this onehttp://marc.info/?l=linux-pci&m=139675010015646&w=2  is another
> >bug in mlx4 driver, missing a proper pci_dev_data in mlx4_init_one.
> >
> >The bug discussing in this thread is the driver "forget" to release the
> >"owner" during kexec. So this device couldn't be usded after kexec.
> >
> Can you guys please open a kernel.org bugzilla ticket, or better...
> work on a patch?
> 

It's already fixed in upstream.

commit 367d56f7b4d5ce61e883c64f81786c7a3ae88eea
Author: Gavin Shan <shangw@linux.vnet.ibm.com>
Date:   Tue Mar 4 15:35:20 2014 +0800

    net/mlx4: Support shutdown() interface


Thanks,
Guo Chao


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-08  2:57     ` Guo Chao
  2014-04-08  7:18       ` Or Gerlitz
@ 2014-04-08 15:02       ` Bjorn Helgaas
  2014-04-09  7:52         ` Guo Chao
  1 sibling, 1 reply; 35+ messages in thread
From: Bjorn Helgaas @ 2014-04-08 15:02 UTC (permalink / raw)
  To: Guo Chao
  Cc: Yinghai Lu, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

On Mon, Apr 7, 2014 at 8:57 PM, Guo Chao <yan@linux.vnet.ibm.com> wrote:
> On Fri, Apr 04, 2014 at 04:43:27PM -0600, Bjorn Helgaas wrote:
>> On Fri, Mar 7, 2014 at 1:08 PM, Yinghai Lu <yinghai@kernel.org> wrote:
>> > When one of children resources does not support MEM_64, MEM_64 for
>> > bridge get reset, so pull down whole pref resource on the bridge under 4G.
>> >
>> > If the bridge support pref mem 64, will only allocate that with pref mem64 to
>> > children that support it.
>> > For children resources if they only support pref mem 32, will allocate them
>> > from non pref mem instead.
>> >
>> > If the bridge only support 32bit pref mmio, will still have all children pref
>> > mmio under that.
>> >
>> > -v2: Add release bridge res support with bridge mem res for pref_mem children res.
>> > -v3: refresh and make it can be applied early before for_each_dev_res patchset.
>> > -v4: fix non-pref mmio 64bit support found by Guo Chao.
>> > -v7: repost as ibm's powerpc system work again when
>> >     1. petiboot boot kernel have this patch.
>> >     2. or mellanox driver added new .shutdown member.
>> >     discussion could be found at:
>> >          http://marc.info/?t=138968064500001&r=1&w=2
>>
>> I think this fixes some sort of bug, and I suppose if I spent a few
>> hours decoding the discussion you mention (the 44 message-long
>> "mlx4_core probe error after applying Yinghai's patch" discussion), I
>> might be able to figure out what it is.
>>
>
> The result of 44 message-long discussion is: the Mellanox card's failure is
> due to a bug in its driver instead of this patch.
>
> The patch refines the logic of using prefetchable window, so that 64-bit
> prefetchable BARs have a chance to be really prefetchable. It does not fix
> any bugs, instead, it exposes one.

OK, then I'm back to square one, and I need an explanation of why we
want to merge this patch.

I think the patch conserves 32-bit address space by putting more
things above 4G than we used to.  But this comes at the cost of using
non-prefetchable windows in some cases where we used to use
prefetchable ones.

Somebody has to explain why we want to do this and why the benefit of
conserving the 32-bit space is more important losing the
prefetchability.

Bjorn

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-08 15:02       ` Bjorn Helgaas
@ 2014-04-09  7:52         ` Guo Chao
  2014-04-10 17:26           ` Bjorn Helgaas
  0 siblings, 1 reply; 35+ messages in thread
From: Guo Chao @ 2014-04-09  7:52 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

Hi, Bjorn:

On Tue, Apr 08, 2014 at 09:02:10AM -0600, Bjorn Helgaas wrote:
> On Mon, Apr 7, 2014 at 8:57 PM, Guo Chao <yan@linux.vnet.ibm.com> wrote:
> > On Fri, Apr 04, 2014 at 04:43:27PM -0600, Bjorn Helgaas wrote:
> >> On Fri, Mar 7, 2014 at 1:08 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> >> > When one of children resources does not support MEM_64, MEM_64 for
> >> > bridge get reset, so pull down whole pref resource on the bridge under 4G.
> >> >
> >> > If the bridge support pref mem 64, will only allocate that with pref mem64 to
> >> > children that support it.
> >> > For children resources if they only support pref mem 32, will allocate them
> >> > from non pref mem instead.
> >> >
> >> > If the bridge only support 32bit pref mmio, will still have all children pref
> >> > mmio under that.
> >> >
> >> > -v2: Add release bridge res support with bridge mem res for pref_mem children res.
> >> > -v3: refresh and make it can be applied early before for_each_dev_res patchset.
> >> > -v4: fix non-pref mmio 64bit support found by Guo Chao.
> >> > -v7: repost as ibm's powerpc system work again when
> >> >     1. petiboot boot kernel have this patch.
> >> >     2. or mellanox driver added new .shutdown member.
> >> >     discussion could be found at:
> >> >          http://marc.info/?t=138968064500001&r=1&w=2
> >>
> >> I think this fixes some sort of bug, and I suppose if I spent a few
> >> hours decoding the discussion you mention (the 44 message-long
> >> "mlx4_core probe error after applying Yinghai's patch" discussion), I
> >> might be able to figure out what it is.
> >>
> >
> > The result of 44 message-long discussion is: the Mellanox card's failure is
> > due to a bug in its driver instead of this patch.
> >
> > The patch refines the logic of using prefetchable window, so that 64-bit
> > prefetchable BARs have a chance to be really prefetchable. It does not fix
> > any bugs, instead, it exposes one.
> 
> OK, then I'm back to square one, and I need an explanation of why we
> want to merge this patch.
> 
> I think the patch conserves 32-bit address space by putting more
> things above 4G than we used to.  But this comes at the cost of using
> non-prefetchable windows in some cases where we used to use
> prefetchable ones.
> 
> Somebody has to explain why we want to do this and why the benefit of
> conserving the 32-bit space is more important losing the
> prefetchability.

I want to make sure we are talking about this patch:
	PCI: Try best to allocate pref mmio 64bit above 4g
	http://patchwork.ozlabs.org/patch/328067/

instead of this one:
	PCI: Try to allocate mem64 above 4G at first
	http://patchwork.ozlabs.org/patch/303197/

This patch does not intend to conserve 32-bit space at all. It makes
better use of prefetchable window.

The problem with the old code is: when both 32-bit and 64-bit prefetchable
BARs present, it's in favor of 32-bit one to use prefetchable window.
This window is then not supposed to get 4G-above address, and this
32-bit-address-only property would propagates upwards until reaching root
bridge. In later assignment phase, 4G-above space is never touched. This
is just caused by a single 32-bit prefetchable BAR (say a ROM BAR).

This patch helps by making better decision:

	* Keep the old behaviour if only 32-bit or 64-bit prefetchable
	  BARs present

	* If both of them present, put 64-bit ones to prefetchable window,
	  32-bit ones to non-prefetchable window

So 4G-above space can be properly used.

Why does enabling 64-bit space matter?

* 32-bit space is just too small. If larger-than-4G BAR sounds
  unrealistic (we do have such devices), there is still a chance that
  total MMIO size under a domain is too large, especially when SR-IOV
  enabled (we met this situation).

* On PowerNV platform, we can do better platform configuration for 64-bit
  prefetchable space, that's important for enabling SR-IOV on this
  platform.


Thanks,
Guo Chao

> 
> Bjorn
> 


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-09  7:52         ` Guo Chao
@ 2014-04-10 17:26           ` Bjorn Helgaas
  2014-04-10 21:23             ` Benjamin Herrenschmidt
  2014-04-15 11:54             ` Guo Chao
  0 siblings, 2 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-04-10 17:26 UTC (permalink / raw)
  To: Guo Chao
  Cc: Yinghai Lu, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

On Wed, Apr 9, 2014 at 1:52 AM, Guo Chao <yan@linux.vnet.ibm.com> wrote:

>> OK, then I'm back to square one, and I need an explanation of why we
>> want to merge this patch.
>>
>> I think the patch conserves 32-bit address space by putting more
>> things above 4G than we used to.  But this comes at the cost of using
>> non-prefetchable windows in some cases where we used to use
>> prefetchable ones.
>>
>> Somebody has to explain why we want to do this and why the benefit of
>> conserving the 32-bit space is more important losing the
>> prefetchability.
>
> I want to make sure we are talking about this patch:
>         PCI: Try best to allocate pref mmio 64bit above 4g
>         http://patchwork.ozlabs.org/patch/328067/
>
> instead of this one:
>         PCI: Try to allocate mem64 above 4G at first
>         http://patchwork.ozlabs.org/patch/303197/

Yes, I'm talking about http://patchwork.ozlabs.org/patch/328067; if
you look at patchwork, the second one (303197) is marked "superseded."

> This patch does not intend to conserve 32-bit space at all. It makes
> better use of prefetchable window.
>
> The problem with the old code is: when both 32-bit and 64-bit prefetchable
> BARs present, it's in favor of 32-bit one to use prefetchable window.
> This window is then not supposed to get 4G-above address, and this
> 32-bit-address-only property would propagates upwards until reaching root
> bridge. In later assignment phase, 4G-above space is never touched. This
> is just caused by a single 32-bit prefetchable BAR (say a ROM BAR).
>
> This patch helps by making better decision:
>
>         * Keep the old behaviour if only 32-bit or 64-bit prefetchable
>           BARs present
>
>         * If both of them present, put 64-bit ones to prefetchable window,
>           32-bit ones to non-prefetchable window

I thought the patch was supposed to change the way we allocate bridge
windows.  But you are talking about the way we assign 32- and 64-bit
device resources, i.e., changing the way we decide whether to put them
in prefetchable or non-prefetchable windows.

> So 4G-above space can be properly used.
>
> Why does enabling 64-bit space matter?
>
> * 32-bit space is just too small. If larger-than-4G BAR sounds
>   unrealistic (we do have such devices), there is still a chance that
>   total MMIO size under a domain is too large, especially when SR-IOV
>   enabled (we met this situation).

Please give more details about the problem you saw.  A complete dmesg
log showing a boot failure or a device that doesn't work, and another
log with this patch applied, showing things working as desired, would
go a long ways toward clarifying this.

This sounds like a specific case that will be fixed by the patch, and
if you give more details, maybe I can figure out what's going on.

Bjorn

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-10 17:26           ` Bjorn Helgaas
@ 2014-04-10 21:23             ` Benjamin Herrenschmidt
  2014-04-15 11:54             ` Guo Chao
  1 sibling, 0 replies; 35+ messages in thread
From: Benjamin Herrenschmidt @ 2014-04-10 21:23 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Guo Chao, Yinghai Lu, Wei Yang, Gavin Shan, Jack Morgenstein,
	Amir Vadai, Or Gerlitz, Eugenia Emantayev, talal, linux-pci

On Thu, 2014-04-10 at 11:26 -0600, Bjorn Helgaas wrote:
> > The problem with the old code is: when both 32-bit and 64-bit prefetchable
> > BARs present, it's in favor of 32-bit one to use prefetchable window.
> > This window is then not supposed to get 4G-above address, and this
> > 32-bit-address-only property would propagates upwards until reaching root
> > bridge. In later assignment phase, 4G-above space is never touched. This
> > is just caused by a single 32-bit prefetchable BAR (say a ROM BAR).
> >
> > This patch helps by making better decision:
> >
> >         * Keep the old behaviour if only 32-bit or 64-bit prefetchable
> >           BARs present
> >
> >         * If both of them present, put 64-bit ones to prefetchable window,
> >           32-bit ones to non-prefetchable window
> 
> I thought the patch was supposed to change the way we allocate bridge
> windows.  But you are talking about the way we assign 32- and 64-bit
> device resources, i.e., changing the way we decide whether to put them
> in prefetchable or non-prefetchable windows.

Wait...

Are you putting "non-prefetchable" 64-bit BARs in the prefetchable
range ?

That's bad. Don't do that.

Some switches or bridges *will* actively prefetch, and putting a
register BAR marked non-prefetchable in the prefetchable range will thus
cause nasty bugs if a register with side effects becomes the target of a
prefetch operations.

We can do that on powerpc *selectively* when and only when we know for
sure that no bridge will prefetch on the path to the device. We know our
host bridges won't and we *might* (to be verified) know that the PLX
switches we have soldered on the mobo won't, but that's the only cases
where it is legit.

> > So 4G-above space can be properly used.
> >
> > Why does enabling 64-bit space matter?
> >
> > * 32-bit space is just too small. If larger-than-4G BAR sounds
> >   unrealistic (we do have such devices), there is still a chance that
> >   total MMIO size under a domain is too large, especially when SR-IOV
> >   enabled (we met this situation).
> 
> Please give more details about the problem you saw.  A complete dmesg
> log showing a boot failure or a device that doesn't work, and another
> log with this patch applied, showing things working as desired, would
> go a long ways toward clarifying this.
> 
> This sounds like a specific case that will be fixed by the patch, and
> if you give more details, maybe I can figure out what's going on.

Well, in our case it's a very complicated story. We have this segmented
space where we have 256 segments covering our 32-bit space. We need each
"partitionable endpoint" to be in separate sets of segments. PEs are
basically iommu domain but also affect MMIO and are our unit of
assignment to guests when virtualizing and of error handling when doing
EEH (so we freeze access on error by PE).

So with something like SR-IOV where each VF has to be a separate PE,
the 32-bit space is problematic because we want the VF BARs to stride
over segments so each VF lives in a separate PE, but we run out of
segments and the 32-bit space has a fixed segment size that may not
match the VF BAR stride.

So we want our BARs to shoot into 64-bit space where we have additional
windows with each 256 segments that we can use, and additionally, we can
resize these to make the segment size match the VF BAR stride.

Cheers,
Ben.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-10 17:26           ` Bjorn Helgaas
  2014-04-10 21:23             ` Benjamin Herrenschmidt
@ 2014-04-15 11:54             ` Guo Chao
  2014-04-16  0:09               ` Bjorn Helgaas
  1 sibling, 1 reply; 35+ messages in thread
From: Guo Chao @ 2014-04-15 11:54 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

On Thu, Apr 10, 2014 at 11:26:14AM -0600, Bjorn Helgaas wrote:
> On Wed, Apr 9, 2014 at 1:52 AM, Guo Chao <yan@linux.vnet.ibm.com> wrote:
> > So 4G-above space can be properly used.
> >
> > Why does enabling 64-bit space matter?
> >
> > * 32-bit space is just too small. If larger-than-4G BAR sounds
> >   unrealistic (we do have such devices), there is still a chance that
> >   total MMIO size under a domain is too large, especially when SR-IOV
> >   enabled (we met this situation).
> 
> Please give more details about the problem you saw.  A complete dmesg
> log showing a boot failure or a device that doesn't work, and another
> log with this patch applied, showing things working as desired, would
> go a long ways toward clarifying this.
> 
> This sounds like a specific case that will be fixed by the patch, and
> if you give more details, maybe I can figure out what's going on.
> 

Let's see an example.

| pci 0003:05:00.0: reg 0x10: [mem 0x3d05801000000-0x3d058010fffff 64bit]
| pci 0003:05:00.0: reg 0x18: [mem 0x3d05010000000-0x3d05017ffffff 64bit pref]
| pci 0003:05:00.0: reg 0x30: [mem 0x00000000-0x000fffff pref]
| pci 0003:05:00.0: reg 0x134: [mem 0x3d05018000000-0x3d0501fffffff 64bit pref]

This is printed at enumeration phase. This device has a SRIOV BAR with
size of 0x7ffffff (128M). That's the size of a signle VF BAR. The device
supports 63 VFs so we need near 8G space in total. Apparanlty we need
exploit 64-bit space.

| PCI host bridge to bus 0003:00
| pci_bus 0003:00: root bus resource [mem 0x3d05800000000-0x3d0587ffeffff] (bus address [0x80000000-0xfffeffff])
| pci_bus 0003:00: root bus resource [mem 0x3d05008000000-0x3d057ffffffff 64bit pref]

And we do have a huge (32G) 64-bit prefetchable window supply. We expect
everything to work fine, but:

| pci 0003:00:00.0: BAR 15: can't assign mem pref (size 0x206000000)
| pci 0003:00:00.0: BAR 14: assigned [mem 0x3d05800000000-0x3d05802ffffff]
| pci 0003:00:00.0: BAR 13: can't assign io (size 0x4000)

It went wrong at the beginning. Note the error message never considers
64-bit or not, but BAR 15 here has it MEM_64 flag cleared. It first
tried to find a 32-bit prefetchable window, but we only supply a 64-bit one.
So it fall back to (32-bit) non-prefetchable window, but there is no enough
room there. At last it went into complicated steps (not show here) of
allocating requested resource first, then try best for the optional ones, etc..

Why is BAR 15 (prefetchable) 32 bit instead of 64? Because PCI core favours
32-bit prefetchable BARs and we have some. This is one of them:

| pci 0003:05:00.0: reg 0x30: [mem 0x00000000-0x000fffff pref]

PCI core decides to let them enjoy the benefition of prefetch. They can't
bear the risk of getting 4G-above address, so its parent, its parent's parent,
its parent's parent's parent, finally the root bridge (00:00.0) must have their
MEM_64 flag of prefetchable resource (BAR 15) clear. In the end nobody
is eligible to use the 64-bit (prefetchable) space even we have huge
supply !

Note even the resource is small and successfully fall back into 32-bit
non-prefetchable window, that's still not OK for us because we need
SRIOV resource be at 64-bit prefetchable space to do platform
configuration.

With Yinghai's patch, when 64-bit prefetchable BARs found, they're more
favoured than the 32-bit prefetchable ones (if any), so all upstream bridges'
prefetchable windows have their MEM_64 flag reserved and the huge 64-bit
prefetchable space will be exploited:

| pci 0003:00:00.0: BAR 15: assigned [mem 0x3d05008000000-0x3d0521fffffff 64bit pref]
| pci 0003:00:00.0: BAR 14: assigned [mem 0x3d05800000000-0x3d05802ffffff]
| pci 0003:00:00.0: BAR 13: can't assign io (size 0x4000)

(The IO resource error here is due to we do not provide IO window)

Thanks,
Guo Chao


> Bjorn
> 


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-15 11:54             ` Guo Chao
@ 2014-04-16  0:09               ` Bjorn Helgaas
  2014-04-16  2:29                 ` Yinghai Lu
                                   ` (2 more replies)
  0 siblings, 3 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-04-16  0:09 UTC (permalink / raw)
  To: Guo Chao
  Cc: Yinghai Lu, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

On Tue, Apr 15, 2014 at 5:54 AM, Guo Chao <yan@linux.vnet.ibm.com> wrote:
> On Thu, Apr 10, 2014 at 11:26:14AM -0600, Bjorn Helgaas wrote:
>> On Wed, Apr 9, 2014 at 1:52 AM, Guo Chao <yan@linux.vnet.ibm.com> wrote:
>> > So 4G-above space can be properly used.
>> >
>> > Why does enabling 64-bit space matter?
>> >
>> > * 32-bit space is just too small. If larger-than-4G BAR sounds
>> >   unrealistic (we do have such devices), there is still a chance that
>> >   total MMIO size under a domain is too large, especially when SR-IOV
>> >   enabled (we met this situation).
>>
>> Please give more details about the problem you saw.  A complete dmesg
>> log showing a boot failure or a device that doesn't work, and another
>> log with this patch applied, showing things working as desired, would
>> go a long ways toward clarifying this.
>>
>> This sounds like a specific case that will be fixed by the patch, and
>> if you give more details, maybe I can figure out what's going on.
>>
>
> Let's see an example.

Thanks for the example.  Please open a bug report at
http://bugzilla.kernel.org and attach the complete dmesg logs before
and after Yinghai's patch.

Having the complete logs helps me answer questions myself without
having to bother you, and it also helps me figure out whether we can
improve our logging to make it easier to diagnose problems like this.

> | pci 0003:05:00.0: reg 0x10: [mem 0x3d05801000000-0x3d058010fffff 64bit]
> | pci 0003:05:00.0: reg 0x18: [mem 0x3d05010000000-0x3d05017ffffff 64bit pref]
> | pci 0003:05:00.0: reg 0x30: [mem 0x00000000-0x000fffff pref]
> | pci 0003:05:00.0: reg 0x134: [mem 0x3d05018000000-0x3d0501fffffff 64bit pref]
>
> This is printed at enumeration phase. This device has a SRIOV BAR with
> size of 0x7ffffff (128M). That's the size of a signle VF BAR. The device
> supports 63 VFs so we need near 8G space in total. Apparanlty we need
> exploit 64-bit space.

Yes.  Do we print a hint anywhere about how many VFs there are?  In
other words, can you deduce the number "63" from the dmesg, or do you
have to figure that out some other way?  It'd be nice if that
information were somewhere in dmesg.

> | PCI host bridge to bus 0003:00
> | pci_bus 0003:00: root bus resource [mem 0x3d05800000000-0x3d0587ffeffff] (bus address [0x80000000-0xfffeffff])
> | pci_bus 0003:00: root bus resource [mem 0x3d05008000000-0x3d057ffffffff 64bit pref]
>
> And we do have a huge (32G) 64-bit prefetchable window supply. We expect
> everything to work fine, but:
>
> | pci 0003:00:00.0: BAR 15: can't assign mem pref (size 0x206000000)
> | pci 0003:00:00.0: BAR 14: assigned [mem 0x3d05800000000-0x3d05802ffffff]
> | pci 0003:00:00.0: BAR 13: can't assign io (size 0x4000)
>
> It went wrong at the beginning. Note the error message never considers
> 64-bit or not, but BAR 15 here has it MEM_64 flag cleared.

BAR 15 is a bridge window.  I think its resource flags should reflect
the capability of the *window*, even if we disable the window or we
happen to assign addresses that are under 4GB.  So I think it's wrong
that we clear the MEM_64 flag  in pbus_size_mem() and the IO flag in
pbus_size_io().

> It first
> tried to find a 32-bit prefetchable window, but we only supply a 64-bit one.
> So it fall back to (32-bit) non-prefetchable window, but there is no enough
> room there. At last it went into complicated steps (not show here) of
> allocating requested resource first, then try best for the optional ones, etc..
>
> Why is BAR 15 (prefetchable) 32 bit instead of 64? Because PCI core favours
> 32-bit prefetchable BARs and we have some. This is one of them:
>
> | pci 0003:05:00.0: reg 0x30: [mem 0x00000000-0x000fffff pref]
>
> PCI core decides to let them enjoy the benefition of prefetch. They can't
> bear the risk of getting 4G-above address, so its parent, its parent's parent,
> its parent's parent's parent, finally the root bridge (00:00.0) must have their
> MEM_64 flag of prefetchable resource (BAR 15) clear.

It sounds like we're tracking the resource requirements
(prefetchability and BAR width) by using the flags on bridge windows.
If that's the case, I think it's wrong.  We should preserve the bridge
window flags, because those express the bridge hardware capabilities,
and we should explicitly keep track of what's required by devices
below the bridge in some other way.

> In the end nobody
> is eligible to use the 64-bit (prefetchable) space even we have huge
> supply !
>
> Note even the resource is small and successfully fall back into 32-bit
> non-prefetchable window, that's still not OK for us because we need
> SRIOV resource be at 64-bit prefetchable space to do platform
> configuration.
>
> With Yinghai's patch, when 64-bit prefetchable BARs found, they're more
> favoured than the 32-bit prefetchable ones (if any), so all upstream bridges'
> prefetchable windows have their MEM_64 flag reserved and the huge 64-bit
> prefetchable space will be exploited:
>
> | pci 0003:00:00.0: BAR 15: assigned [mem 0x3d05008000000-0x3d0521fffffff 64bit pref]
> | pci 0003:00:00.0: BAR 14: assigned [mem 0x3d05800000000-0x3d05802ffffff]
> | pci 0003:00:00.0: BAR 13: can't assign io (size 0x4000)
>
> (The IO resource error here is due to we do not provide IO window)

Yes.  The lack of I/O space is just a constraint of the platform.
It'd be nice if we printed a more meaningful error message in this
case.  One really has to be a PCI expert to distinguish this from a
real problem that we need to fix.

Bjorn

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16  0:09               ` Bjorn Helgaas
@ 2014-04-16  2:29                 ` Yinghai Lu
  2014-04-16 17:06                   ` Bjorn Helgaas
  2014-04-16  4:16                 ` Yinghai Lu
  2014-04-16  6:30                 ` Benjamin Herrenschmidt
  2 siblings, 1 reply; 35+ messages in thread
From: Yinghai Lu @ 2014-04-16  2:29 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Guo Chao, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

[-- Attachment #1: Type: text/plain, Size: 2852 bytes --]

On Tue, Apr 15, 2014 at 5:09 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>> It went wrong at the beginning. Note the error message never considers
>> 64-bit or not, but BAR 15 here has it MEM_64 flag cleared.
>
> BAR 15 is a bridge window.  I think its resource flags should reflect
> the capability of the *window*, even if we disable the window or we
> happen to assign addresses that are under 4GB.  So I think it's wrong
> that we clear the MEM_64 flag  in pbus_size_mem() and the IO flag in
> pbus_size_io().

We keep PCI_PREF_RANGE_TYPE_64 in the flag.

We should check that before we touch PCI_PREF_LIMIT_UPPER32 BAR in
pci_setup_bridge_mmio_pref()

like following:

Subject: [PATCH] PCI: only touch upper 32bit for pref64bit bridge bar

If the bridge pref mmio bar does not support 64bit, we should touch
upper 32bit.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 drivers/pci/setup-bus.c |   14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -592,23 +592,23 @@ static void pci_setup_bridge_mmio(struct
 static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
 {
     struct pci_dev *bridge = bus->self;
-    struct resource *res;
+    struct resource *res = bus->resource[2];
     struct pci_bus_region region;
     u32 l, bu, lu;

     /* Clear out the upper 32 bits of PREF limit.
        If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
        disables PREF range, which is ok. */
-    pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
+    if (res->flags & PCI_PREF_RANGE_TYPE_64)
+        pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);

     /* Set up PREF base/limit. */
     bu = lu = 0;
-    res = bus->resource[2];
     pcibios_resource_to_bus(bridge->bus, &region, res);
     if (res->flags & IORESOURCE_PREFETCH) {
         l = (region.start >> 16) & 0xfff0;
         l |= region.end & 0xfff00000;
-        if (res->flags & IORESOURCE_MEM_64) {
+        if (res->flags & PCI_PREF_RANGE_TYPE_64) {
             bu = upper_32_bits(region.start);
             lu = upper_32_bits(region.end);
         }
@@ -619,8 +619,10 @@ static void pci_setup_bridge_mmio_pref(s
     pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);

     /* Set the upper 32 bits of PREF base & limit. */
-    pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
-    pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
+    if (res->flags & PCI_PREF_RANGE_TYPE_64) {
+        pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
+        pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
+    }
 }

 static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)

[-- Attachment #2: only_touch_uppper32_for_64bitpref.patch --]
[-- Type: text/x-patch, Size: 2050 bytes --]

Subject: [PATCH] PCI: only touch upper 32bit for pref64bit bridge bar

If the bridge pref mmio bar does not support 64bit, we should touch
upper 32bit.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 drivers/pci/setup-bus.c |   14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -592,23 +592,23 @@ static void pci_setup_bridge_mmio(struct
 static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
 {
 	struct pci_dev *bridge = bus->self;
-	struct resource *res;
+	struct resource *res = bus->resource[2];
 	struct pci_bus_region region;
 	u32 l, bu, lu;
 
 	/* Clear out the upper 32 bits of PREF limit.
 	   If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
 	   disables PREF range, which is ok. */
-	pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
+	if (res->flags & PCI_PREF_RANGE_TYPE_64)
+		pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
 
 	/* Set up PREF base/limit. */
 	bu = lu = 0;
-	res = bus->resource[2];
 	pcibios_resource_to_bus(bridge->bus, &region, res);
 	if (res->flags & IORESOURCE_PREFETCH) {
 		l = (region.start >> 16) & 0xfff0;
 		l |= region.end & 0xfff00000;
-		if (res->flags & IORESOURCE_MEM_64) {
+		if (res->flags & PCI_PREF_RANGE_TYPE_64) {
 			bu = upper_32_bits(region.start);
 			lu = upper_32_bits(region.end);
 		}
@@ -619,8 +619,10 @@ static void pci_setup_bridge_mmio_pref(s
 	pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
 
 	/* Set the upper 32 bits of PREF base & limit. */
-	pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
-	pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
+	if (res->flags & PCI_PREF_RANGE_TYPE_64) {
+		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
+		pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
+	}
 }
 
 static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16  0:09               ` Bjorn Helgaas
  2014-04-16  2:29                 ` Yinghai Lu
@ 2014-04-16  4:16                 ` Yinghai Lu
  2014-04-16 17:10                   ` Bjorn Helgaas
  2014-04-16  6:30                 ` Benjamin Herrenschmidt
  2 siblings, 1 reply; 35+ messages in thread
From: Yinghai Lu @ 2014-04-16  4:16 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Guo Chao, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

On Tue, Apr 15, 2014 at 5:09 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>> It first
>> tried to find a 32-bit prefetchable window, but we only supply a 64-bit one.
>> So it fall back to (32-bit) non-prefetchable window, but there is no enough
>> room there. At last it went into complicated steps (not show here) of
>> allocating requested resource first, then try best for the optional ones, etc..
>>
>> Why is BAR 15 (prefetchable) 32 bit instead of 64? Because PCI core favours
>> 32-bit prefetchable BARs and we have some. This is one of them:
>>
>> | pci 0003:05:00.0: reg 0x30: [mem 0x00000000-0x000fffff pref]
>>
>> PCI core decides to let them enjoy the benefition of prefetch. They can't
>> bear the risk of getting 4G-above address, so its parent, its parent's parent,
>> its parent's parent's parent, finally the root bridge (00:00.0) must have their
>> MEM_64 flag of prefetchable resource (BAR 15) clear.
>
> It sounds like we're tracking the resource requirements
> (prefetchability and BAR width) by using the flags on bridge windows.
> If that's the case, I think it's wrong.  We should preserve the bridge
> window flags, because those express the bridge hardware capabilities,
> and we should explicitly keep track of what's required by devices
> below the bridge in some other way.

if the BAR support 64bit, then resource could be 64bit.
if the BAR does not support 64bit, then resource should be 32bit.

That should be best guess to decide resource flag.

Yinghai

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16  0:09               ` Bjorn Helgaas
  2014-04-16  2:29                 ` Yinghai Lu
  2014-04-16  4:16                 ` Yinghai Lu
@ 2014-04-16  6:30                 ` Benjamin Herrenschmidt
  2014-04-16  6:33                   ` Benjamin Herrenschmidt
  2 siblings, 1 reply; 35+ messages in thread
From: Benjamin Herrenschmidt @ 2014-04-16  6:30 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Guo Chao, Yinghai Lu, Wei Yang, Gavin Shan, Jack Morgenstein,
	Amir Vadai, Or Gerlitz, Eugenia Emantayev, talal, linux-pci

On Tue, 2014-04-15 at 18:09 -0600, Bjorn Helgaas wrote:
> 
> Thanks for the example.  Please open a bug report at
> http://bugzilla.kernel.org and attach the complete dmesg logs before
> and after Yinghai's patch.
> 
> Having the complete logs helps me answer questions myself without
> having to bother you, and it also helps me figure out whether we can
> improve our logging to make it easier to diagnose problems like this.

Unfortunately, for a *little while* longer (hint !) we can't publish
a complete log from a Power8 machine, but we should be able to include
everything remotely related to PCI.

> > | pci 0003:05:00.0: reg 0x10: [mem 0x3d05801000000-0x3d058010fffff 64bit]
> > | pci 0003:05:00.0: reg 0x18: [mem 0x3d05010000000-0x3d05017ffffff 64bit pref]
> > | pci 0003:05:00.0: reg 0x30: [mem 0x00000000-0x000fffff pref]
> > | pci 0003:05:00.0: reg 0x134: [mem 0x3d05018000000-0x3d0501fffffff 64bit pref]
> >
> > This is printed at enumeration phase. This device has a SRIOV BAR with
> > size of 0x7ffffff (128M). That's the size of a signle VF BAR. The device
> > supports 63 VFs so we need near 8G space in total. Apparanlty we need
> > exploit 64-bit space.
> 
> Yes.  Do we print a hint anywhere about how many VFs there are?  In
> other words, can you deduce the number "63" from the dmesg, or do you
> have to figure that out some other way?  It'd be nice if that
> information were somewhere in dmesg.
>
> > | PCI host bridge to bus 0003:00
> > | pci_bus 0003:00: root bus resource [mem 0x3d05800000000-0x3d0587ffeffff] (bus address [0x80000000-0xfffeffff])
> > | pci_bus 0003:00: root bus resource [mem 0x3d05008000000-0x3d057ffffffff 64bit pref]
> >
> > And we do have a huge (32G) 64-bit prefetchable window supply. We expect
> > everything to work fine, but:
> >
> > | pci 0003:00:00.0: BAR 15: can't assign mem pref (size 0x206000000)
> > | pci 0003:00:00.0: BAR 14: assigned [mem 0x3d05800000000-0x3d05802ffffff]
> > | pci 0003:00:00.0: BAR 13: can't assign io (size 0x4000)
> >
> > It went wrong at the beginning. Note the error message never considers
> > 64-bit or not, but BAR 15 here has it MEM_64 flag cleared.
> 
> BAR 15 is a bridge window.  I think its resource flags should reflect
> the capability of the *window*, even if we disable the window or we
> happen to assign addresses that are under 4GB.  So I think it's wrong
> that we clear the MEM_64 flag  in pbus_size_mem() and the IO flag in
> pbus_size_io().
> 
> > It first
> > tried to find a 32-bit prefetchable window, but we only supply a 64-bit one.
> > So it fall back to (32-bit) non-prefetchable window, but there is no enough
> > room there. At last it went into complicated steps (not show here) of
> > allocating requested resource first, then try best for the optional ones, etc..
> >
> > Why is BAR 15 (prefetchable) 32 bit instead of 64? Because PCI core favours
> > 32-bit prefetchable BARs and we have some. This is one of them:
> >
> > | pci 0003:05:00.0: reg 0x30: [mem 0x00000000-0x000fffff pref]
> >
> > PCI core decides to let them enjoy the benefition of prefetch. They can't
> > bear the risk of getting 4G-above address, so its parent, its parent's parent,
> > its parent's parent's parent, finally the root bridge (00:00.0) must have their
> > MEM_64 flag of prefetchable resource (BAR 15) clear.
> 
> It sounds like we're tracking the resource requirements
> (prefetchability and BAR width) by using the flags on bridge windows.
> If that's the case, I think it's wrong.  We should preserve the bridge
> window flags, because those express the bridge hardware capabilities,
> and we should explicitly keep track of what's required by devices
> below the bridge in some other way.
> 
> > In the end nobody
> > is eligible to use the 64-bit (prefetchable) space even we have huge
> > supply !
> >
> > Note even the resource is small and successfully fall back into 32-bit
> > non-prefetchable window, that's still not OK for us because we need
> > SRIOV resource be at 64-bit prefetchable space to do platform
> > configuration.
> >
> > With Yinghai's patch, when 64-bit prefetchable BARs found, they're more
> > favoured than the 32-bit prefetchable ones (if any), so all upstream bridges'
> > prefetchable windows have their MEM_64 flag reserved and the huge 64-bit
> > prefetchable space will be exploited:
> >
> > | pci 0003:00:00.0: BAR 15: assigned [mem 0x3d05008000000-0x3d0521fffffff 64bit pref]
> > | pci 0003:00:00.0: BAR 14: assigned [mem 0x3d05800000000-0x3d05802ffffff]
> > | pci 0003:00:00.0: BAR 13: can't assign io (size 0x4000)
> >
> > (The IO resource error here is due to we do not provide IO window)
> 
> Yes.  The lack of I/O space is just a constraint of the platform.
> It'd be nice if we printed a more meaningful error message in this
> case.  One really has to be a PCI expert to distinguish this from a
> real problem that we need to fix.
> 
> Bjorn



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16  6:30                 ` Benjamin Herrenschmidt
@ 2014-04-16  6:33                   ` Benjamin Herrenschmidt
  2014-04-16 17:15                     ` Bjorn Helgaas
  0 siblings, 1 reply; 35+ messages in thread
From: Benjamin Herrenschmidt @ 2014-04-16  6:33 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Guo Chao, Yinghai Lu, Wei Yang, Gavin Shan, Jack Morgenstein,
	Amir Vadai, Or Gerlitz, Eugenia Emantayev, talal, linux-pci

On Wed, 2014-04-16 at 16:30 +1000, Benjamin Herrenschmidt wrote:
> On Tue, 2014-04-15 at 18:09 -0600, Bjorn Helgaas wrote:
> > 
> > Thanks for the example.  Please open a bug report at
> > http://bugzilla.kernel.org and attach the complete dmesg logs before
> > and after Yinghai's patch.
> > 
> > Having the complete logs helps me answer questions myself without
> > having to bother you, and it also helps me figure out whether we can
> > improve our logging to make it easier to diagnose problems like this.
> 
> Unfortunately, for a *little while* longer (hint !) we can't publish
> a complete log from a Power8 machine, but we should be able to include
> everything remotely related to PCI.

Hrm, actually, I'm not sure we can disclose the device details just yet,
so let's just hide the vendor/device ID to make the lawyers happy and
you can print all the rest.

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16  2:29                 ` Yinghai Lu
@ 2014-04-16 17:06                   ` Bjorn Helgaas
  2014-04-17  3:30                     ` Yinghai Lu
  0 siblings, 1 reply; 35+ messages in thread
From: Bjorn Helgaas @ 2014-04-16 17:06 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Guo Chao, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

On Tue, Apr 15, 2014 at 8:29 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Tue, Apr 15, 2014 at 5:09 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>> It went wrong at the beginning. Note the error message never considers
>>> 64-bit or not, but BAR 15 here has it MEM_64 flag cleared.
>>
>> BAR 15 is a bridge window.  I think its resource flags should reflect
>> the capability of the *window*, even if we disable the window or we
>> happen to assign addresses that are under 4GB.  So I think it's wrong
>> that we clear the MEM_64 flag  in pbus_size_mem() and the IO flag in
>> pbus_size_io().
>
> We keep PCI_PREF_RANGE_TYPE_64 in the flag.

I think we should stop putting PCI_PREF_RANGE_TYPE_64 in res->flags.
I don't think we ever test for PCI_PREF_RANGE_TYPE_64 today, except
for the trivial usage in pci_read_bridge_mmio_pref(), which can be
easily removed.

Everywhere else, we use IORESOURCE_MEM, IORESOURCE_MEM_64, and
IORESOURCE_PREFETCH to keep track of the properties of the hardware
underlying the struct resource, i.e., the BAR.  Why shouldn't we do
the same for bridge windows?

The struct resource flags should tell us the *possible* properties of
the window, e.g., what the bridge can support.  These properties are
fixed by the bridge hardware, regardless of what devices happen to be
below the bridge.  We discover those properties at enumeration-time,
and we shouldn't change them afterwards.

> We should check that before we touch PCI_PREF_LIMIT_UPPER32 BAR in
> pci_setup_bridge_mmio_pref()
>
> like following:
>
> Subject: [PATCH] PCI: only touch upper 32bit for pref64bit bridge bar
>
> If the bridge pref mmio bar does not support 64bit, we should touch
> upper 32bit.
>
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
>
> ---
>  drivers/pci/setup-bus.c |   14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
>
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -592,23 +592,23 @@ static void pci_setup_bridge_mmio(struct
>  static void pci_setup_bridge_mmio_pref(struct pci_bus *bus)
>  {
>      struct pci_dev *bridge = bus->self;
> -    struct resource *res;
> +    struct resource *res = bus->resource[2];
>      struct pci_bus_region region;
>      u32 l, bu, lu;
>
>      /* Clear out the upper 32 bits of PREF limit.
>         If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
>         disables PREF range, which is ok. */
> -    pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
> +    if (res->flags & PCI_PREF_RANGE_TYPE_64)
> +        pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
>
>      /* Set up PREF base/limit. */
>      bu = lu = 0;
> -    res = bus->resource[2];
>      pcibios_resource_to_bus(bridge->bus, &region, res);
>      if (res->flags & IORESOURCE_PREFETCH) {
>          l = (region.start >> 16) & 0xfff0;
>          l |= region.end & 0xfff00000;
> -        if (res->flags & IORESOURCE_MEM_64) {
> +        if (res->flags & PCI_PREF_RANGE_TYPE_64) {
>              bu = upper_32_bits(region.start);
>              lu = upper_32_bits(region.end);
>          }
> @@ -619,8 +619,10 @@ static void pci_setup_bridge_mmio_pref(s
>      pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
>
>      /* Set the upper 32 bits of PREF base & limit. */
> -    pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
> -    pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
> +    if (res->flags & PCI_PREF_RANGE_TYPE_64) {
> +        pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, bu);
> +        pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, lu);
> +    }
>  }
>
>  static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16  4:16                 ` Yinghai Lu
@ 2014-04-16 17:10                   ` Bjorn Helgaas
  0 siblings, 0 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-04-16 17:10 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Guo Chao, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

On Tue, Apr 15, 2014 at 10:16 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Tue, Apr 15, 2014 at 5:09 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>>> It first
>>> tried to find a 32-bit prefetchable window, but we only supply a 64-bit one.
>>> So it fall back to (32-bit) non-prefetchable window, but there is no enough
>>> room there. At last it went into complicated steps (not show here) of
>>> allocating requested resource first, then try best for the optional ones, etc..
>>>
>>> Why is BAR 15 (prefetchable) 32 bit instead of 64? Because PCI core favours
>>> 32-bit prefetchable BARs and we have some. This is one of them:
>>>
>>> | pci 0003:05:00.0: reg 0x30: [mem 0x00000000-0x000fffff pref]
>>>
>>> PCI core decides to let them enjoy the benefition of prefetch. They can't
>>> bear the risk of getting 4G-above address, so its parent, its parent's parent,
>>> its parent's parent's parent, finally the root bridge (00:00.0) must have their
>>> MEM_64 flag of prefetchable resource (BAR 15) clear.
>>
>> It sounds like we're tracking the resource requirements
>> (prefetchability and BAR width) by using the flags on bridge windows.
>> If that's the case, I think it's wrong.  We should preserve the bridge
>> window flags, because those express the bridge hardware capabilities,
>> and we should explicitly keep track of what's required by devices
>> below the bridge in some other way.
>
> if the BAR support 64bit, then resource could be 64bit.
> if the BAR does not support 64bit, then resource should be 32bit.
>
> That should be best guess to decide resource flag.

We don't need to "decide" the resource flags.  The hardware tells us
what it can support, and *that* should be reflected in the flags.  We
do need to decide what addresses to assign and whether to put a
prefetchable BAR in a prefetchable or non-prefetchable window.

Putting decision results in the resource flags obfuscates the whole process.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16  6:33                   ` Benjamin Herrenschmidt
@ 2014-04-16 17:15                     ` Bjorn Helgaas
  0 siblings, 0 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-04-16 17:15 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Guo Chao, Yinghai Lu, Wei Yang, Gavin Shan, Jack Morgenstein,
	Amir Vadai, Or Gerlitz, Eugenia Emantayev, talal, linux-pci

On Wed, Apr 16, 2014 at 12:33 AM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Wed, 2014-04-16 at 16:30 +1000, Benjamin Herrenschmidt wrote:
>> On Tue, 2014-04-15 at 18:09 -0600, Bjorn Helgaas wrote:
>> >
>> > Thanks for the example.  Please open a bug report at
>> > http://bugzilla.kernel.org and attach the complete dmesg logs before
>> > and after Yinghai's patch.
>> >
>> > Having the complete logs helps me answer questions myself without
>> > having to bother you, and it also helps me figure out whether we can
>> > improve our logging to make it easier to diagnose problems like this.
>>
>> Unfortunately, for a *little while* longer (hint !) we can't publish
>> a complete log from a Power8 machine, but we should be able to include
>> everything remotely related to PCI.
>
> Hrm, actually, I'm not sure we can disclose the device details just yet,
> so let's just hide the vendor/device ID to make the lawyers happy and
> you can print all the rest.

Sure, no problem (and it looks like Guo has done just that with
https://bugzilla.kernel.org/show_bug.cgi?id=74151).

I'm pretty sure this problem is not specific to your secret hardware;
it should be reproducible on any hardware with similar apertures and
similar BARs.  I am a little concerned about the PE implications, but
I'm ignoring that for now until we sort out the first-order problem of
getting the allocation scheme to work.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-03-07 20:08 ` [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g Yinghai Lu
  2014-04-04 22:43   ` Bjorn Helgaas
@ 2014-04-16 22:11   ` Bjorn Helgaas
  2014-04-17  4:20     ` Yinghai Lu
  1 sibling, 1 reply; 35+ messages in thread
From: Bjorn Helgaas @ 2014-04-16 22:11 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Benjamin Herrenschmidt, Wei Yang, Jack Morgenstein, amirv,
	Or Gerlitz, eugenia, Guo Chao, talal, linux-pci

[-cc Gavin (addr is bouncing)]

On Fri, Mar 07, 2014 at 12:08:44PM -0800, Yinghai Lu wrote:
> When one of children resources does not support MEM_64, MEM_64 for
> bridge get reset, so pull down whole pref resource on the bridge under 4G.
> 
> If the bridge support pref mem 64, will only allocate that with pref mem64 to
> children that support it.
> For children resources if they only support pref mem 32, will allocate them
> from non pref mem instead.
> 
> If the bridge only support 32bit pref mmio, will still have all children pref
> mmio under that.
> 
> -v2: Add release bridge res support with bridge mem res for pref_mem children res.
> -v3: refresh and make it can be applied early before for_each_dev_res patchset.
> -v4: fix non-pref mmio 64bit support found by Guo Chao.
> -v7: repost as ibm's powerpc system work again when
>     1. petiboot boot kernel have this patch.
>     2. or mellanox driver added new .shutdown member.
>     discussion could be found at:
> 	 http://marc.info/?t=138968064500001&r=1&w=2
> 
> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> Tested-by: Guo Chao <yan@linux.vnet.ibm.com>
> Tested-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> 
> ---
>  drivers/pci/setup-bus.c |  138 ++++++++++++++++++++++++++++++++----------------
>  drivers/pci/setup-res.c |   20 ++++++
>  2 files changed, 111 insertions(+), 47 deletions(-)
> 
> Index: linux-2.6/drivers/pci/setup-bus.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-bus.c
> +++ linux-2.6/drivers/pci/setup-bus.c
> @@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(stru
>     bus resource of a given type. Note: we intentionally skip
>     the bus resources which have already been assigned (that is,
>     have non-NULL parent resource). */
> -static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned long type)
> +static struct resource *find_free_bus_resource(struct pci_bus *bus,
> +			 unsigned long type_mask, unsigned long type)
>  {
>  	int i;
>  	struct resource *r;
> -	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> -				  IORESOURCE_PREFETCH;
>  
>  	pci_bus_for_each_resource(bus, r, i) {
>  		if (r == &ioport_resource || r == &iomem_resource)
> @@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus
>  		resource_size_t add_size, struct list_head *realloc_head)
>  {
>  	struct pci_dev *dev;
> -	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
> +	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
> +							IORESOURCE_IO);
>  	resource_size_t size = 0, size0 = 0, size1 = 0;
>  	resource_size_t children_add_size = 0;
>  	resource_size_t min_align, align;
> @@ -915,15 +915,17 @@ static inline resource_size_t calculate_
>   * guarantees that all child resources fit in this size.
>   */
>  static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
> -			 unsigned long type, resource_size_t min_size,
> -			resource_size_t add_size,
> -			struct list_head *realloc_head)
> +			 unsigned long type, unsigned long type2,
> +			 unsigned long type3,
> +			 resource_size_t min_size, resource_size_t add_size,
> +			 struct list_head *realloc_head)

This no longer matches the function documentation above, but it doesn't
really matter, because this interface, with a mask plus three type
parameters, is just way too complicated.

>  {
>  	struct pci_dev *dev;
>  	resource_size_t min_align, align, size, size0, size1;
>  	resource_size_t aligns[12];	/* Alignments from 1Mb to 2Gb */
>  	int order, max_order;
> -	struct resource *b_res = find_free_bus_resource(bus, type);
> +	struct resource *b_res = find_free_bus_resource(bus,
> +					 mask | IORESOURCE_PREFETCH, type);
>  	unsigned int mem64_mask = 0;
>  	resource_size_t children_add_size = 0;
>  
> @@ -944,7 +946,9 @@ static int pbus_size_mem(struct pci_bus
>  			struct resource *r = &dev->resource[i];
>  			resource_size_t r_size;
>  
> -			if (r->parent || (r->flags & mask) != type)
> +			if (r->parent || ((r->flags & mask) != type &&
> +					  (r->flags & mask) != type2 &&
> +					  (r->flags & mask) != type3))
>  				continue;
>  			r_size = resource_size(r);
>  #ifdef CONFIG_PCI_IOV
> @@ -1117,8 +1121,9 @@ void __ref __pci_bus_size_bridges(struct
>  			struct list_head *realloc_head)
>  {
>  	struct pci_dev *dev;
> -	unsigned long mask, prefmask;
> +	unsigned long mask, prefmask, type2 = 0, type3 = 0;
>  	resource_size_t additional_mem_size = 0, additional_io_size = 0;
> +	struct resource *b_res;
>  
>  	list_for_each_entry(dev, &bus->devices, bus_list) {
>  		struct pci_bus *b = dev->subordinate;
> @@ -1163,15 +1168,34 @@ void __ref __pci_bus_size_bridges(struct
>  		   has already been allocated by arch code, try
>  		   non-prefetchable range for both types of PCI memory
>  		   resources. */
> +		b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
>  		mask = IORESOURCE_MEM;
>  		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
> -		if (pbus_size_mem(bus, prefmask, prefmask,
> +		if (b_res[2].flags & IORESOURCE_MEM_64) {
> +			prefmask |= IORESOURCE_MEM_64;
> +			if (pbus_size_mem(bus, prefmask, prefmask,
> +				  prefmask, prefmask,
>  				  realloc_head ? 0 : additional_mem_size,
> -				  additional_mem_size, realloc_head))
> -			mask = prefmask; /* Success, size non-prefetch only. */
> -		else
> -			additional_mem_size += additional_mem_size;
> -		pbus_size_mem(bus, mask, IORESOURCE_MEM,
> +				  additional_mem_size, realloc_head)) {
> +					/* Success, size non-pref64 only. */
> +					mask = prefmask;
> +					type2 = prefmask & ~IORESOURCE_MEM_64;
> +					type3 = prefmask & ~IORESOURCE_PREFETCH;
> +			}
> +		}
> +		if (!type2) {
> +			prefmask &= ~IORESOURCE_MEM_64;
> +			if (pbus_size_mem(bus, prefmask, prefmask,
> +					 prefmask, prefmask,
> +					 realloc_head ? 0 : additional_mem_size,
> +					 additional_mem_size, realloc_head)) {
> +				/* Success, size non-prefetch only. */
> +				mask = prefmask;
> +			} else
> +				additional_mem_size += additional_mem_size;
> +			type2 = type3 = IORESOURCE_MEM;
> +		}
> +		pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
>  				realloc_head ? 0 : additional_mem_size,
>  				additional_mem_size, realloc_head);

I spent quite a bit of time trying to understand this code yesterday, but
I failed.  Comments might help, but more importantly, it needs to be
simplified so the code makes sense even without comments.

>  		break;
> @@ -1257,42 +1281,66 @@ static void __ref __pci_bridge_assign_re
>  static void pci_bridge_release_resources(struct pci_bus *bus,
>  					  unsigned long type)
>  {
> -	int idx;
> -	bool changed = false;
> -	struct pci_dev *dev;
> +	struct pci_dev *dev = bus->self;
>  	struct resource *r;
>  	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> -				  IORESOURCE_PREFETCH;
> +				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
> +	unsigned old_flags = 0;
> +	struct resource *b_res;
> +	int idx = 1;
>  
> -	dev = bus->self;
> -	for (idx = PCI_BRIDGE_RESOURCES; idx <= PCI_BRIDGE_RESOURCE_END;
> -	     idx++) {
> -		r = &dev->resource[idx];
> -		if ((r->flags & type_mask) != type)
> -			continue;
> -		if (!r->parent)
> -			continue;
> -		/*
> -		 * if there are children under that, we should release them
> -		 *  all
> -		 */
> -		release_child_resources(r);
> -		if (!release_resource(r)) {
> -			dev_printk(KERN_DEBUG, &dev->dev,
> -				 "resource %d %pR released\n", idx, r);
> -			/* keep the old size */
> -			r->end = resource_size(r) - 1;
> -			r->start = 0;
> -			r->flags = 0;
> -			changed = true;
> -		}
> -	}
> +	b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
> +
> +	/*
> +	 *     1. if there is io port assign fail, will release bridge
> +	 *	  io port.
> +	 *     2. if there is non pref mmio assign fail, release bridge
> +	 *	  nonpref mmio.
> +	 *     3. if there is 64bit pref mmio assign fail, and bridge pref
> +	 *	  is 64bit, release bridge pref mmio.
> +	 *     4. if there is pref mmio assign fail, and bridge pref is
> +	 *	  32bit mmio, release bridge pref mmio
> +	 *     5. if there is pref mmio assign fail, and bridge pref is not
> +	 *	  assigned, release bridge nonpref mmio.
> +	 */
> +	if (type & IORESOURCE_IO)
> +		idx = 0;
> +	else if (!(type & IORESOURCE_PREFETCH))
> +		idx = 1;
> +	else if ((type & IORESOURCE_MEM_64) &&
> +		 (b_res[2].flags & IORESOURCE_MEM_64))
> +		idx = 2;
> +	else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
> +		 (b_res[2].flags & IORESOURCE_PREFETCH))
> +		idx = 2;
> +	else
> +		idx = 1;

This is pretty hard to figure out.  Is there any way we can just pass in
the bridge window identifier directly, e.g., 0 == I/O window, 1 ==
non-prefetchable MEM window, 2 == prefetchable window?  It seems like way
up in pci_assign_unassigned_root_bus_resources(), we ought to know which
window failed, and we could just pass that in somehow instead of having to
derive it here.

> +
> +	r = &b_res[idx];
> +
> +	if (!r->parent)
> +		return;
> +
> +	/*
> +	 * if there are children under that, we should release them
> +	 *  all
> +	 */
> +	release_child_resources(r);
> +	if (!release_resource(r)) {
> +		type = old_flags = r->flags & type_mask;
> +		dev_printk(KERN_DEBUG, &dev->dev, "resource %d %pR released\n",
> +					PCI_BRIDGE_RESOURCES + idx, r);
> +		/* keep the old size */
> +		r->end = resource_size(r) - 1;
> +		r->start = 0;
> +		r->flags = 0;
>  
> -	if (changed) {
>  		/* avoiding touch the one without PREF */
>  		if (type & IORESOURCE_PREFETCH)
>  			type = IORESOURCE_PREFETCH;
>  		__pci_setup_bridge(bus, type);
> +		/* for next child res under same bridge */
> +		r->flags = old_flags;
>  	}
>  }
>  
> @@ -1471,7 +1519,7 @@ void pci_assign_unassigned_root_bus_reso
>  	LIST_HEAD(fail_head);
>  	struct pci_dev_resource *fail_res;
>  	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
> -				  IORESOURCE_PREFETCH;
> +				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
>  	int pci_try_num = 1;
>  	enum enable_type enable_local;
>  
> Index: linux-2.6/drivers/pci/setup-res.c
> ===================================================================
> --- linux-2.6.orig/drivers/pci/setup-res.c
> +++ linux-2.6/drivers/pci/setup-res.c
> @@ -208,15 +208,31 @@ static int __pci_assign_resource(struct
>  
>  	/* First, try exact prefetching match.. */
>  	ret = pci_bus_alloc_resource(bus, res, size, align, min,
> -				     IORESOURCE_PREFETCH,
> +				     IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
>  				     pcibios_align_resource, dev);
>  
> -	if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
> +	if (ret < 0 &&
> +	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
> +	     (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
> +		/*
> +		 * That failed.
> +		 *
> +		 * Try below 4g pref
> +		 */
> +		ret = pci_bus_alloc_resource(bus, res, size, align, min,
> +					     IORESOURCE_PREFETCH,
> +					     pcibios_align_resource, dev);
> +	}

pci_bus_alloc_from_region() already has a mechanism for restricting
allocation to certain bus addresses.  I don't like this second mechanism of
also using IORESOURCE_MEM_64.  There's too much implicit state -- I think
there's a dependency here on how we manage the IORESOURCE_MEM_64 bit in the
bridge window resources.  That makes this code hard to understand.

> +
> +	if (ret < 0 &&
> +	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))) {
>  		/*
>  		 * That failed.
>  		 *
>  		 * But a prefetching area can handle a non-prefetching
>  		 * window (it will just not perform as well).
> +		 *
> +		 * Also can put 64bit under 32bit range. (below 4g).
>  		 */
>  		ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
>  					     pcibios_align_resource, dev);

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16 17:06                   ` Bjorn Helgaas
@ 2014-04-17  3:30                     ` Yinghai Lu
  0 siblings, 0 replies; 35+ messages in thread
From: Yinghai Lu @ 2014-04-17  3:30 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Guo Chao, Benjamin Herrenschmidt, Wei Yang, Gavin Shan,
	Jack Morgenstein, Amir Vadai, Or Gerlitz, Eugenia Emantayev,
	talal, linux-pci

[-- Attachment #1: Type: text/plain, Size: 3031 bytes --]

On Wed, Apr 16, 2014 at 10:06 AM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> I think we should stop putting PCI_PREF_RANGE_TYPE_64 in res->flags.
> I don't think we ever test for PCI_PREF_RANGE_TYPE_64 today, except
> for the trivial usage in pci_read_bridge_mmio_pref(), which can be
> easily removed.
>
> Everywhere else, we use IORESOURCE_MEM, IORESOURCE_MEM_64, and
> IORESOURCE_PREFETCH to keep track of the properties of the hardware
> underlying the struct resource, i.e., the BAR.  Why shouldn't we do
> the same for bridge windows?
>
> The struct resource flags should tell us the *possible* properties of
> the window, e.g., what the bridge can support.  These properties are
> fixed by the bridge hardware, regardless of what devices happen to be
> below the bridge.  We discover those properties at enumeration-time,
> and we shouldn't change them afterwards.

After this patch (we check exact type with 64bit pref), actually bridge flags
IORESOURCE_MEM_64 would not get cleared.
so we don't check that in pbus_size_mem anymore.

Following patch could be applied later.

Subject: [PATCH] PCI: don't update bridge resource flags according to children

After  PCI: Try best to allocate pref mmio 64bit above 4g
We don't need to check if children support 64bit, and update
bridge resource flags anymore.
As the code size and assign according exact type already.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 drivers/pci/setup-bus.c |    7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -926,7 +926,6 @@ static int pbus_size_mem(struct pci_bus
     int order, max_order;
     struct resource *b_res = find_free_bus_resource(bus,
                      mask | IORESOURCE_PREFETCH, type);
-    unsigned int mem64_mask = 0;
     resource_size_t children_add_size = 0;

     if (!b_res)
@@ -936,9 +935,6 @@ static int pbus_size_mem(struct pci_bus
     max_order = 0;
     size = 0;

-    mem64_mask = b_res->flags & IORESOURCE_MEM_64;
-    b_res->flags &= ~IORESOURCE_MEM_64;
-
     list_for_each_entry(dev, &bus->devices, bus_list) {
         int i;

@@ -980,7 +976,6 @@ static int pbus_size_mem(struct pci_bus
                 aligns[order] += align;
             if (order > max_order)
                 max_order = order;
-            mem64_mask &= r->flags & IORESOURCE_MEM_64;

             if (realloc_head)
                 children_add_size += get_res_add_size(realloc_head, r);
@@ -1005,7 +1000,7 @@ static int pbus_size_mem(struct pci_bus
     }
     b_res->start = min_align;
     b_res->end = size0 + min_align - 1;
-    b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask;
+    b_res->flags |= IORESOURCE_STARTALIGN;
     if (size1 > size0 && realloc_head) {
         add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
         dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "

[-- Attachment #2: remove_mmio64_check.patch --]
[-- Type: text/x-patch, Size: 1806 bytes --]

Subject: [PATCH] PCI: don't update bridge resource flags according to children

After  PCI: Try best to allocate pref mmio 64bit above 4g
We don't need to check if children support 64bit, and update
bridge resource flags anymore.
As the code size and assign according exact type already.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>

---
 drivers/pci/setup-bus.c |    7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -926,7 +926,6 @@ static int pbus_size_mem(struct pci_bus
 	int order, max_order;
 	struct resource *b_res = find_free_bus_resource(bus,
 					 mask | IORESOURCE_PREFETCH, type);
-	unsigned int mem64_mask = 0;
 	resource_size_t children_add_size = 0;
 
 	if (!b_res)
@@ -936,9 +935,6 @@ static int pbus_size_mem(struct pci_bus
 	max_order = 0;
 	size = 0;
 
-	mem64_mask = b_res->flags & IORESOURCE_MEM_64;
-	b_res->flags &= ~IORESOURCE_MEM_64;
-
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		int i;
 
@@ -980,7 +976,6 @@ static int pbus_size_mem(struct pci_bus
 				aligns[order] += align;
 			if (order > max_order)
 				max_order = order;
-			mem64_mask &= r->flags & IORESOURCE_MEM_64;
 
 			if (realloc_head)
 				children_add_size += get_res_add_size(realloc_head, r);
@@ -1005,7 +1000,7 @@ static int pbus_size_mem(struct pci_bus
 	}
 	b_res->start = min_align;
 	b_res->end = size0 + min_align - 1;
-	b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask;
+	b_res->flags |= IORESOURCE_STARTALIGN;
 	if (size1 > size0 && realloc_head) {
 		add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
 		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-16 22:11   ` Bjorn Helgaas
@ 2014-04-17  4:20     ` Yinghai Lu
  2014-04-17 16:35       ` Yinghai Lu
  0 siblings, 1 reply; 35+ messages in thread
From: Yinghai Lu @ 2014-04-17  4:20 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Benjamin Herrenschmidt, Wei Yang, Jack Morgenstein, Amir Vadai,
	Or Gerlitz, Eugenia Emantayev, Guo Chao, talal, linux-pci

On Wed, Apr 16, 2014 at 3:11 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>
> I spent quite a bit of time trying to understand this code yesterday, but
> I failed.  Comments might help, but more importantly, it needs to be
> simplified so the code makes sense even without comments.

will add some comments.

>> +     b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
>> +
>> +     /*
>> +      *     1. if there is io port assign fail, will release bridge
>> +      *        io port.
>> +      *     2. if there is non pref mmio assign fail, release bridge
>> +      *        nonpref mmio.
>> +      *     3. if there is 64bit pref mmio assign fail, and bridge pref
>> +      *        is 64bit, release bridge pref mmio.
>> +      *     4. if there is pref mmio assign fail, and bridge pref is
>> +      *        32bit mmio, release bridge pref mmio
>> +      *     5. if there is pref mmio assign fail, and bridge pref is not
>> +      *        assigned, release bridge nonpref mmio.
>> +      */
>> +     if (type & IORESOURCE_IO)
>> +             idx = 0;
>> +     else if (!(type & IORESOURCE_PREFETCH))
>> +             idx = 1;
>> +     else if ((type & IORESOURCE_MEM_64) &&
>> +              (b_res[2].flags & IORESOURCE_MEM_64))
>> +             idx = 2;
>> +     else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
>> +              (b_res[2].flags & IORESOURCE_PREFETCH))
>> +             idx = 2;
>> +     else
>> +             idx = 1;
>
> This is pretty hard to figure out.  Is there any way we can just pass in
> the bridge window identifier directly, e.g., 0 == I/O window, 1 ==
> non-prefetchable MEM window, 2 == prefetchable window?  It seems like way
> up in pci_assign_unassigned_root_bus_resources(), we ought to know which
> window failed, and we could just pass that in somehow instead of having to
> derive it here.

that is for find out which up-level could be scratched...
and do not know exact which one should be fit as twisted mmio-pref under mmio.

>>
>> -     if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
>> +     if (ret < 0 &&
>> +         (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
>> +          (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
>> +             /*
>> +              * That failed.
>> +              *
>> +              * Try below 4g pref
>> +              */
>> +             ret = pci_bus_alloc_resource(bus, res, size, align, min,
>> +                                          IORESOURCE_PREFETCH,
>> +                                          pcibios_align_resource, dev);
>> +     }
>
> pci_bus_alloc_from_region() already has a mechanism for restricting
> allocation to certain bus addresses.  I don't like this second mechanism of
> also using IORESOURCE_MEM_64.  There's too much implicit state -- I think
> there's a dependency here on how we manage the IORESOURCE_MEM_64 bit in the
> bridge window resources.  That makes this code hard to understand.
>

will drop that.

Yinghai

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v8] PCI: Try best to allocate pref mmio 64bit above 4g
       [not found] <CAE9FiQXzRQ9S0mwwb58R2icBTtHHJ6NMVFMaxEgtVNP9mZVjZw@mail.gmail.com>
  2014-03-07 20:08 ` [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g Yinghai Lu
@ 2014-04-17  4:23 ` Yinghai Lu
  2014-04-17 16:40 ` [PATCH v9] " Yinghai Lu
  2 siblings, 0 replies; 35+ messages in thread
From: Yinghai Lu @ 2014-04-17  4:23 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Benjamin Herrenschmidt, Wei Yang, Gavin Shan, Jack Morgenstein,
	amirv, Or Gerlitz, eugenia, Guo Chao, talal, linux-pci,
	Yinghai Lu

When one of children resources does not support MEM_64, MEM_64 for
bridge get reset, so pull down whole pref resource on the bridge under 4G.

If the bridge support pref mem 64, will only allocate that with pref mem64 to
children that support it.
For children resources if they only support pref mem 32, will allocate them
from non pref mem instead.

If the bridge only support 32bit pref mmio, will still have all children pref
mmio under that.

-v2: Add release bridge res support with bridge mem res for pref_mem children res.
-v3: refresh and make it can be applied early before for_each_dev_res patchset.
-v4: fix non-pref mmio 64bit support found by Guo Chao.
-v7: repost as ibm's powerpc system work again when
    1. petiboot boot kernel have this patch.
    2. or mellanox driver added new .shutdown member.
    discussion could be found at:
	 http://marc.info/?t=138968064500001&r=1&w=2
-v8: update comment a bit.
     drop change in __pci_assign_resource()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Tested-by: Guo Chao <yan@linux.vnet.ibm.com>
Tested-by: Wei Yang <weiyang@linux.vnet.ibm.com>

---
 drivers/pci/setup-bus.c |  143 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 98 insertions(+), 45 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(stru
    bus resource of a given type. Note: we intentionally skip
    the bus resources which have already been assigned (that is,
    have non-NULL parent resource). */
-static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned long type)
+static struct resource *find_free_bus_resource(struct pci_bus *bus,
+			 unsigned long type_mask, unsigned long type)
 {
 	int i;
 	struct resource *r;
-	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
 
 	pci_bus_for_each_resource(bus, r, i) {
 		if (r == &ioport_resource || r == &iomem_resource)
@@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus
 		resource_size_t add_size, struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
+	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
+							IORESOURCE_IO);
 	resource_size_t size = 0, size0 = 0, size1 = 0;
 	resource_size_t children_add_size = 0;
 	resource_size_t min_align, align;
@@ -907,6 +907,8 @@ static inline resource_size_t calculate_
  * @bus : the bus
  * @mask: mask the resource flag, then compare it with type
  * @type: the type of free resource from bridge
+ * @type2: second match type
+ * @type3: third match type
  * @min_size : the minimum memory window that must to be allocated
  * @add_size : additional optional memory window
  * @realloc_head : track the additional memory window on this list
@@ -915,15 +917,17 @@ static inline resource_size_t calculate_
  * guarantees that all child resources fit in this size.
  */
 static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
-			 unsigned long type, resource_size_t min_size,
-			resource_size_t add_size,
-			struct list_head *realloc_head)
+			 unsigned long type, unsigned long type2,
+			 unsigned long type3,
+			 resource_size_t min_size, resource_size_t add_size,
+			 struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
 	resource_size_t min_align, align, size, size0, size1;
 	resource_size_t aligns[12];	/* Alignments from 1Mb to 2Gb */
 	int order, max_order;
-	struct resource *b_res = find_free_bus_resource(bus, type);
+	struct resource *b_res = find_free_bus_resource(bus,
+					 mask | IORESOURCE_PREFETCH, type);
 	unsigned int mem64_mask = 0;
 	resource_size_t children_add_size = 0;
 
@@ -944,7 +948,9 @@ static int pbus_size_mem(struct pci_bus
 			struct resource *r = &dev->resource[i];
 			resource_size_t r_size;
 
-			if (r->parent || (r->flags & mask) != type)
+			if (r->parent || ((r->flags & mask) != type &&
+					  (r->flags & mask) != type2 &&
+					  (r->flags & mask) != type3))
 				continue;
 			r_size = resource_size(r);
 #ifdef CONFIG_PCI_IOV
@@ -1117,8 +1123,9 @@ void __ref __pci_bus_size_bridges(struct
 			struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	unsigned long mask, prefmask;
+	unsigned long mask, prefmask, type2 = 0, type3 = 0;
 	resource_size_t additional_mem_size = 0, additional_io_size = 0;
+	struct resource *b_res;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct pci_bus *b = dev->subordinate;
@@ -1163,15 +1170,37 @@ void __ref __pci_bus_size_bridges(struct
 		   has already been allocated by arch code, try
 		   non-prefetchable range for both types of PCI memory
 		   resources. */
+		b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
 		mask = IORESOURCE_MEM;
 		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-		if (pbus_size_mem(bus, prefmask, prefmask,
+		if (b_res[2].flags & IORESOURCE_MEM_64) {
+			prefmask |= IORESOURCE_MEM_64;
+			if (pbus_size_mem(bus, prefmask, prefmask,
+				  prefmask, prefmask,
 				  realloc_head ? 0 : additional_mem_size,
-				  additional_mem_size, realloc_head))
-			mask = prefmask; /* Success, size non-prefetch only. */
-		else
-			additional_mem_size += additional_mem_size;
-		pbus_size_mem(bus, mask, IORESOURCE_MEM,
+				  additional_mem_size, realloc_head)) {
+					/*
+					 * Success, with pref mmio64,
+					 * next will size non-pref or
+					 * non-mmio64 */
+					mask = prefmask;
+					type2 = prefmask & ~IORESOURCE_MEM_64;
+					type3 = prefmask & ~IORESOURCE_PREFETCH;
+			}
+		}
+		if (!type2) {
+			prefmask &= ~IORESOURCE_MEM_64;
+			if (pbus_size_mem(bus, prefmask, prefmask,
+					 prefmask, prefmask,
+					 realloc_head ? 0 : additional_mem_size,
+					 additional_mem_size, realloc_head)) {
+				/* Success, next will size non-prefetch. */
+				mask = prefmask;
+			} else
+				additional_mem_size += additional_mem_size;
+			type2 = type3 = IORESOURCE_MEM;
+		}
+		pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
 				realloc_head ? 0 : additional_mem_size,
 				additional_mem_size, realloc_head);
 		break;
@@ -1257,42 +1286,66 @@ static void __ref __pci_bridge_assign_re
 static void pci_bridge_release_resources(struct pci_bus *bus,
 					  unsigned long type)
 {
-	int idx;
-	bool changed = false;
-	struct pci_dev *dev;
+	struct pci_dev *dev = bus->self;
 	struct resource *r;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
+				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
+	unsigned old_flags = 0;
+	struct resource *b_res;
+	int idx = 1;
 
-	dev = bus->self;
-	for (idx = PCI_BRIDGE_RESOURCES; idx <= PCI_BRIDGE_RESOURCE_END;
-	     idx++) {
-		r = &dev->resource[idx];
-		if ((r->flags & type_mask) != type)
-			continue;
-		if (!r->parent)
-			continue;
-		/*
-		 * if there are children under that, we should release them
-		 *  all
-		 */
-		release_child_resources(r);
-		if (!release_resource(r)) {
-			dev_printk(KERN_DEBUG, &dev->dev,
-				 "resource %d %pR released\n", idx, r);
-			/* keep the old size */
-			r->end = resource_size(r) - 1;
-			r->start = 0;
-			r->flags = 0;
-			changed = true;
-		}
-	}
+	b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
+
+	/*
+	 *     1. if there is io port assign fail, will release bridge
+	 *	  io port.
+	 *     2. if there is non pref mmio assign fail, release bridge
+	 *	  nonpref mmio.
+	 *     3. if there is 64bit pref mmio assign fail, and bridge pref
+	 *	  is 64bit, release bridge pref mmio.
+	 *     4. if there is pref mmio assign fail, and bridge pref is
+	 *	  32bit mmio, release bridge pref mmio
+	 *     5. if there is pref mmio assign fail, and bridge pref is not
+	 *	  assigned, release bridge nonpref mmio.
+	 */
+	if (type & IORESOURCE_IO)
+		idx = 0;
+	else if (!(type & IORESOURCE_PREFETCH))
+		idx = 1;
+	else if ((type & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_MEM_64))
+		idx = 2;
+	else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_PREFETCH))
+		idx = 2;
+	else
+		idx = 1;
+
+	r = &b_res[idx];
+
+	if (!r->parent)
+		return;
+
+	/*
+	 * if there are children under that, we should release them
+	 *  all
+	 */
+	release_child_resources(r);
+	if (!release_resource(r)) {
+		type = old_flags = r->flags & type_mask;
+		dev_printk(KERN_DEBUG, &dev->dev, "resource %d %pR released\n",
+					PCI_BRIDGE_RESOURCES + idx, r);
+		/* keep the old size */
+		r->end = resource_size(r) - 1;
+		r->start = 0;
+		r->flags = 0;
 
-	if (changed) {
 		/* avoiding touch the one without PREF */
 		if (type & IORESOURCE_PREFETCH)
 			type = IORESOURCE_PREFETCH;
 		__pci_setup_bridge(bus, type);
+		/* for next child res under same bridge */
+		r->flags = old_flags;
 	}
 }
 
@@ -1471,7 +1524,7 @@ void pci_assign_unassigned_root_bus_reso
 	LIST_HEAD(fail_head);
 	struct pci_dev_resource *fail_res;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
+				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
 	int pci_try_num = 1;
 	enum enable_type enable_local;
 

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g
  2014-04-17  4:20     ` Yinghai Lu
@ 2014-04-17 16:35       ` Yinghai Lu
  0 siblings, 0 replies; 35+ messages in thread
From: Yinghai Lu @ 2014-04-17 16:35 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Benjamin Herrenschmidt, Wei Yang, Jack Morgenstein, Amir Vadai,
	Or Gerlitz, Eugenia Emantayev, Guo Chao, talal, linux-pci

On Wed, Apr 16, 2014 at 9:20 PM, Yinghai Lu <yinghai@kernel.org> wrote:
> On Wed, Apr 16, 2014 at 3:11 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
>
>>>
>>> -     if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
>>> +     if (ret < 0 &&
>>> +         (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
>>> +          (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
>>> +             /*
>>> +              * That failed.
>>> +              *
>>> +              * Try below 4g pref
>>> +              */
>>> +             ret = pci_bus_alloc_resource(bus, res, size, align, min,
>>> +                                          IORESOURCE_PREFETCH,
>>> +                                          pcibios_align_resource, dev);
>>> +     }
>>
>> pci_bus_alloc_from_region() already has a mechanism for restricting
>> allocation to certain bus addresses.  I don't like this second mechanism of
>> also using IORESOURCE_MEM_64.  There's too much implicit state -- I think
>> there's a dependency here on how we manage the IORESOURCE_MEM_64 bit in the
>> bridge window resources.  That makes this code hard to understand.
>>
>
> will drop that.

After more checking, found out that we still need the change.
Reason:
We size pref,mmio64 at first, and then put pref without 64 under nonpref.

If bridge have pref/mmio64, and mmio32, children have pref/mmio64, and
pref/mmio32.
then during sizing: children pref/mmio64 is under bridge pref/mmio64,
children pref/mmio32
is under bridge mmio32.

If during allocation, bridge pref/mmio64 still could get allocation under 4G.
then without this change we could put children pref/mmio32 under that.
could be ok, we would put children pref/mm64 under bridge mmio32.
But in some case size could be different, then could be fail if bridge
mmio32 is small then bridge pref/mmio64.

Please check latest v9, with comments change.

Thanks

Yinghai

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v9] PCI: Try best to allocate pref mmio 64bit above 4g
       [not found] <CAE9FiQXzRQ9S0mwwb58R2icBTtHHJ6NMVFMaxEgtVNP9mZVjZw@mail.gmail.com>
  2014-03-07 20:08 ` [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g Yinghai Lu
  2014-04-17  4:23 ` [PATCH v8] " Yinghai Lu
@ 2014-04-17 16:40 ` Yinghai Lu
  2014-05-20  3:45   ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Bjorn Helgaas
  2 siblings, 1 reply; 35+ messages in thread
From: Yinghai Lu @ 2014-04-17 16:40 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Benjamin Herrenschmidt, Wei Yang, Gavin Shan, Jack Morgenstein,
	amirv, Or Gerlitz, eugenia, Guo Chao, talal, linux-pci,
	Yinghai Lu

When one of children resources does not support MEM_64, MEM_64 for
bridge get reset, so pull down whole pref resource on the bridge under 4G.

If the bridge support pref mem 64, will only allocate that with pref mem64 to
children that support it.
For children resources if they only support pref mem 32, will allocate them
from non pref mem instead.

If the bridge only support 32bit pref mmio, will still have all children pref
mmio under that.

-v2: Add release bridge res support with bridge mem res for pref_mem children res.
-v3: refresh and make it can be applied early before for_each_dev_res patchset.
-v4: fix non-pref mmio 64bit support found by Guo Chao.
-v7: repost as ibm's powerpc system work again when
    1. petiboot boot kernel have this patch.
    2. or mellanox driver added new .shutdown member.
    discussion could be found at:
	 http://marc.info/?t=138968064500001&r=1&w=2
-v8: update comment a bit
     drop change in __pci_assign_resource()
-v9: put back change __pci_assign_resource()
     to keep sizing and assign in order.


Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Tested-by: Guo Chao <yan@linux.vnet.ibm.com>
Tested-by: Wei Yang <weiyang@linux.vnet.ibm.com>

---
 drivers/pci/setup-bus.c |  143 ++++++++++++++++++++++++++++++++----------------
 drivers/pci/setup-res.c |   20 ++++++
 2 files changed, 116 insertions(+), 47 deletions(-)

Index: linux-2.6/drivers/pci/setup-bus.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-bus.c
+++ linux-2.6/drivers/pci/setup-bus.c
@@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(stru
    bus resource of a given type. Note: we intentionally skip
    the bus resources which have already been assigned (that is,
    have non-NULL parent resource). */
-static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned long type)
+static struct resource *find_free_bus_resource(struct pci_bus *bus,
+			 unsigned long type_mask, unsigned long type)
 {
 	int i;
 	struct resource *r;
-	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
 
 	pci_bus_for_each_resource(bus, r, i) {
 		if (r == &ioport_resource || r == &iomem_resource)
@@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus
 		resource_size_t add_size, struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
+	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
+							IORESOURCE_IO);
 	resource_size_t size = 0, size0 = 0, size1 = 0;
 	resource_size_t children_add_size = 0;
 	resource_size_t min_align, align;
@@ -907,6 +907,8 @@ static inline resource_size_t calculate_
  * @bus : the bus
  * @mask: mask the resource flag, then compare it with type
  * @type: the type of free resource from bridge
+ * @type2: second match type
+ * @type3: third match type
  * @min_size : the minimum memory window that must to be allocated
  * @add_size : additional optional memory window
  * @realloc_head : track the additional memory window on this list
@@ -915,15 +917,17 @@ static inline resource_size_t calculate_
  * guarantees that all child resources fit in this size.
  */
 static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
-			 unsigned long type, resource_size_t min_size,
-			resource_size_t add_size,
-			struct list_head *realloc_head)
+			 unsigned long type, unsigned long type2,
+			 unsigned long type3,
+			 resource_size_t min_size, resource_size_t add_size,
+			 struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
 	resource_size_t min_align, align, size, size0, size1;
 	resource_size_t aligns[12];	/* Alignments from 1Mb to 2Gb */
 	int order, max_order;
-	struct resource *b_res = find_free_bus_resource(bus, type);
+	struct resource *b_res = find_free_bus_resource(bus,
+					 mask | IORESOURCE_PREFETCH, type);
 	unsigned int mem64_mask = 0;
 	resource_size_t children_add_size = 0;
 
@@ -944,7 +948,9 @@ static int pbus_size_mem(struct pci_bus
 			struct resource *r = &dev->resource[i];
 			resource_size_t r_size;
 
-			if (r->parent || (r->flags & mask) != type)
+			if (r->parent || ((r->flags & mask) != type &&
+					  (r->flags & mask) != type2 &&
+					  (r->flags & mask) != type3))
 				continue;
 			r_size = resource_size(r);
 #ifdef CONFIG_PCI_IOV
@@ -1117,8 +1123,9 @@ void __ref __pci_bus_size_bridges(struct
 			struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	unsigned long mask, prefmask;
+	unsigned long mask, prefmask, type2 = 0, type3 = 0;
 	resource_size_t additional_mem_size = 0, additional_io_size = 0;
+	struct resource *b_res;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct pci_bus *b = dev->subordinate;
@@ -1163,15 +1170,37 @@ void __ref __pci_bus_size_bridges(struct
 		   has already been allocated by arch code, try
 		   non-prefetchable range for both types of PCI memory
 		   resources. */
+		b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
 		mask = IORESOURCE_MEM;
 		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-		if (pbus_size_mem(bus, prefmask, prefmask,
+		if (b_res[2].flags & IORESOURCE_MEM_64) {
+			prefmask |= IORESOURCE_MEM_64;
+			if (pbus_size_mem(bus, prefmask, prefmask,
+				  prefmask, prefmask,
 				  realloc_head ? 0 : additional_mem_size,
-				  additional_mem_size, realloc_head))
-			mask = prefmask; /* Success, size non-prefetch only. */
-		else
-			additional_mem_size += additional_mem_size;
-		pbus_size_mem(bus, mask, IORESOURCE_MEM,
+				  additional_mem_size, realloc_head)) {
+					/*
+					 * Success, with pref mmio64,
+					 * next will size non-pref or
+					 * non-mmio64 */
+					mask = prefmask;
+					type2 = prefmask & ~IORESOURCE_MEM_64;
+					type3 = prefmask & ~IORESOURCE_PREFETCH;
+			}
+		}
+		if (!type2) {
+			prefmask &= ~IORESOURCE_MEM_64;
+			if (pbus_size_mem(bus, prefmask, prefmask,
+					 prefmask, prefmask,
+					 realloc_head ? 0 : additional_mem_size,
+					 additional_mem_size, realloc_head)) {
+				/* Success, next will size non-prefetch. */
+				mask = prefmask;
+			} else
+				additional_mem_size += additional_mem_size;
+			type2 = type3 = IORESOURCE_MEM;
+		}
+		pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
 				realloc_head ? 0 : additional_mem_size,
 				additional_mem_size, realloc_head);
 		break;
@@ -1257,42 +1286,66 @@ static void __ref __pci_bridge_assign_re
 static void pci_bridge_release_resources(struct pci_bus *bus,
 					  unsigned long type)
 {
-	int idx;
-	bool changed = false;
-	struct pci_dev *dev;
+	struct pci_dev *dev = bus->self;
 	struct resource *r;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
+				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
+	unsigned old_flags = 0;
+	struct resource *b_res;
+	int idx = 1;
 
-	dev = bus->self;
-	for (idx = PCI_BRIDGE_RESOURCES; idx <= PCI_BRIDGE_RESOURCE_END;
-	     idx++) {
-		r = &dev->resource[idx];
-		if ((r->flags & type_mask) != type)
-			continue;
-		if (!r->parent)
-			continue;
-		/*
-		 * if there are children under that, we should release them
-		 *  all
-		 */
-		release_child_resources(r);
-		if (!release_resource(r)) {
-			dev_printk(KERN_DEBUG, &dev->dev,
-				 "resource %d %pR released\n", idx, r);
-			/* keep the old size */
-			r->end = resource_size(r) - 1;
-			r->start = 0;
-			r->flags = 0;
-			changed = true;
-		}
-	}
+	b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
+
+	/*
+	 *     1. if there is io port assign fail, will release bridge
+	 *	  io port.
+	 *     2. if there is non pref mmio assign fail, release bridge
+	 *	  nonpref mmio.
+	 *     3. if there is 64bit pref mmio assign fail, and bridge pref
+	 *	  is 64bit, release bridge pref mmio.
+	 *     4. if there is pref mmio assign fail, and bridge pref is
+	 *	  32bit mmio, release bridge pref mmio
+	 *     5. if there is pref mmio assign fail, and bridge pref is not
+	 *	  assigned, release bridge nonpref mmio.
+	 */
+	if (type & IORESOURCE_IO)
+		idx = 0;
+	else if (!(type & IORESOURCE_PREFETCH))
+		idx = 1;
+	else if ((type & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_MEM_64))
+		idx = 2;
+	else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_PREFETCH))
+		idx = 2;
+	else
+		idx = 1;
+
+	r = &b_res[idx];
+
+	if (!r->parent)
+		return;
+
+	/*
+	 * if there are children under that, we should release them
+	 *  all
+	 */
+	release_child_resources(r);
+	if (!release_resource(r)) {
+		type = old_flags = r->flags & type_mask;
+		dev_printk(KERN_DEBUG, &dev->dev, "resource %d %pR released\n",
+					PCI_BRIDGE_RESOURCES + idx, r);
+		/* keep the old size */
+		r->end = resource_size(r) - 1;
+		r->start = 0;
+		r->flags = 0;
 
-	if (changed) {
 		/* avoiding touch the one without PREF */
 		if (type & IORESOURCE_PREFETCH)
 			type = IORESOURCE_PREFETCH;
 		__pci_setup_bridge(bus, type);
+		/* for next child res under same bridge */
+		r->flags = old_flags;
 	}
 }
 
@@ -1471,7 +1524,7 @@ void pci_assign_unassigned_root_bus_reso
 	LIST_HEAD(fail_head);
 	struct pci_dev_resource *fail_res;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
+				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
 	int pci_try_num = 1;
 	enum enable_type enable_local;
 
Index: linux-2.6/drivers/pci/setup-res.c
===================================================================
--- linux-2.6.orig/drivers/pci/setup-res.c
+++ linux-2.6/drivers/pci/setup-res.c
@@ -211,15 +211,31 @@ static int __pci_assign_resource(struct
 
 	/* First, try exact prefetching match.. */
 	ret = pci_bus_alloc_resource(bus, res, size, align, min,
-				     IORESOURCE_PREFETCH,
+				     IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
 				     pcibios_align_resource, dev);
 
-	if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
+	if (ret < 0 &&
+	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
+	     (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
+		/*
+		 * That failed.
+		 *
+		 * Try 32bit pref
+		 */
+		ret = pci_bus_alloc_resource(bus, res, size, align, min,
+					     IORESOURCE_PREFETCH,
+					     pcibios_align_resource, dev);
+	}
+
+	if (ret < 0 &&
+	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))) {
 		/*
 		 * That failed.
 		 *
 		 * But a prefetching area can handle a non-prefetching
 		 * window (it will just not perform as well).
+		 *
+		 * Also can put 64bit under 32bit range. (below 4g).
 		 */
 		ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
 					     pcibios_align_resource, dev);

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources
  2014-04-17 16:40 ` [PATCH v9] " Yinghai Lu
@ 2014-05-20  3:45   ` Bjorn Helgaas
  2014-05-20  3:45     ` [PATCH v10 1/4] " Bjorn Helgaas
                       ` (4 more replies)
  0 siblings, 5 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-05-20  3:45 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: talal, Wei Yang, Gavin Shan, Benjamin Herrenschmidt, amirv,
	eugenia, Guo Chao, linux-pci, Or Gerlitz, Jack Morgenstein

I'd like to merge these for v3.16.  The first is Yinghai's v9 patch
unchanged except to resolve minor merge conflicts, write a changelog, and
fold in the mem64_mask removal posted previously as [1].

The rest are cleanups and comment changes to try to make the code more
readable.

This does fix a real bug -- in some cases we can't assign space to large
64-bit BARs even though there is plenty of 64-bit space available.  But I
am concerned that we are giving up performance in some cases (32-bit
prefetchable resources below a bridge with a 64-bit prefetchable window
will no longer be prefetchable).  With some additional work, it should be
possible to get that performance back.

Comments welcome.

[1] http://lkml.kernel.org/r/CAE9FiQUKYMPvXLsk84o-gH-j0-wiJGL0Wt1aTW2o4FPSm0PvbQ@mail.gmail.com

---

Bjorn Helgaas (3):
      PCI: Change pbus_size_mem() return values to be more conventional
      PCI: Simplify __pci_assign_resource() coding style
      PCI: Add resource allocation comments

Yinghai Lu (1):
      PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources


 drivers/pci/setup-bus.c |  207 +++++++++++++++++++++++++++++++++--------------
 drivers/pci/setup-res.c |   41 +++++++--
 2 files changed, 176 insertions(+), 72 deletions(-)

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v10 1/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources
  2014-05-20  3:45   ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Bjorn Helgaas
@ 2014-05-20  3:45     ` Bjorn Helgaas
  2014-05-20  3:45     ` [PATCH v10 2/4] PCI: Change pbus_size_mem() return values to be more conventional Bjorn Helgaas
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-05-20  3:45 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: talal, Wei Yang, Gavin Shan, Benjamin Herrenschmidt, amirv,
	eugenia, Guo Chao, linux-pci, Or Gerlitz, Jack Morgenstein

From: Yinghai Lu <yinghai@kernel.org>

This patch changes the way we handle 64-bit prefetchable bridge windows to
make it more likely that we can assign space to all devices.

Previously we put all prefetchable resources in the prefetchable bridge
window.  If any of those resources was 32-bit only, we restricted the
window to be below 4GB.

After this patch, we only put 64-bit prefetchable resources in a 64-bit
prefetchable window.  We put all 32-bit prefetchable resources in the
non-prefetchable window, even if there are no 64-bit prefetchable
resources.

With the previous approach, if there was a 32-bit prefetchable resource
behind a bridge, we forced the bridge's prefetchable window below 4GB,
which meant that even if there was plenty of space above 4GB available, we
couldn't use it, and assignment of large 64-bit resources could fail, as
in the bugzilla below.

The new strategy is:

  1) If there is a 64-bit prefetchable window, we put only 64-bit
     prefetchable resources in it.  Any 32-bit prefetchable resources go in
     the non-prefetchable window.

  2) If there is a 32-bit prefetchable window, we put both 32- and 64-bit
     prefetchable resources in it.

  3) If there is no prefetchable window, all MMIO resources go in the
     non-prefetchable window.

This reduces performance for 32-bit prefetchable resources below a bridge
with a 64-bit prefetchable window.  We previously assigned prefetchable
space, but now we'll assign non-prefetchable space.  This is the case even
if there are no 64-bit prefetchable resources, or if they would all fit
below 4GB.  In those cases, the old strategy would work and would have
better performance.

[bhelgaas: write changelog, add bugzilla link, fold in mem64_mask removal]
Link: https://bugzilla.kernel.org/show_bug.cgi?id=74151
Tested-by: Guo Chao <yan@linux.vnet.ibm.com>
Tested-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/setup-bus.c |  150 +++++++++++++++++++++++++++++++----------------
 drivers/pci/setup-res.c |   20 ++++++
 2 files changed, 117 insertions(+), 53 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 5516657386a0..589f2261ab0e 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -713,12 +713,11 @@ static void pci_bridge_check_ranges(struct pci_bus *bus)
    bus resource of a given type. Note: we intentionally skip
    the bus resources which have already been assigned (that is,
    have non-NULL parent resource). */
-static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned long type)
+static struct resource *find_free_bus_resource(struct pci_bus *bus,
+			 unsigned long type_mask, unsigned long type)
 {
 	int i;
 	struct resource *r;
-	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
 
 	pci_bus_for_each_resource(bus, r, i) {
 		if (r == &ioport_resource || r == &iomem_resource)
@@ -815,7 +814,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
 		resource_size_t add_size, struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
+	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
+							IORESOURCE_IO);
 	resource_size_t size = 0, size0 = 0, size1 = 0;
 	resource_size_t children_add_size = 0;
 	resource_size_t min_align, align;
@@ -907,6 +907,8 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
  * @bus : the bus
  * @mask: mask the resource flag, then compare it with type
  * @type: the type of free resource from bridge
+ * @type2: second match type
+ * @type3: third match type
  * @min_size : the minimum memory window that must to be allocated
  * @add_size : additional optional memory window
  * @realloc_head : track the additional memory window on this list
@@ -915,16 +917,17 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
  * guarantees that all child resources fit in this size.
  */
 static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
-			 unsigned long type, resource_size_t min_size,
-			resource_size_t add_size,
-			struct list_head *realloc_head)
+			 unsigned long type, unsigned long type2,
+			 unsigned long type3,
+			 resource_size_t min_size, resource_size_t add_size,
+			 struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
 	resource_size_t min_align, align, size, size0, size1;
 	resource_size_t aligns[14];	/* Alignments from 1Mb to 8Gb */
 	int order, max_order;
-	struct resource *b_res = find_free_bus_resource(bus, type);
-	unsigned int mem64_mask = 0;
+	struct resource *b_res = find_free_bus_resource(bus,
+					mask | IORESOURCE_PREFETCH, type);
 	resource_size_t children_add_size = 0;
 
 	if (!b_res)
@@ -934,9 +937,6 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 	max_order = 0;
 	size = 0;
 
-	mem64_mask = b_res->flags & IORESOURCE_MEM_64;
-	b_res->flags &= ~IORESOURCE_MEM_64;
-
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		int i;
 
@@ -944,7 +944,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 			struct resource *r = &dev->resource[i];
 			resource_size_t r_size;
 
-			if (r->parent || (r->flags & mask) != type)
+			if (r->parent || ((r->flags & mask) != type &&
+					  (r->flags & mask) != type2 &&
+					  (r->flags & mask) != type3))
 				continue;
 			r_size = resource_size(r);
 #ifdef CONFIG_PCI_IOV
@@ -976,7 +978,6 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 				aligns[order] += align;
 			if (order > max_order)
 				max_order = order;
-			mem64_mask &= r->flags & IORESOURCE_MEM_64;
 
 			if (realloc_head)
 				children_add_size += get_res_add_size(realloc_head, r);
@@ -1001,7 +1002,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 	}
 	b_res->start = min_align;
 	b_res->end = size0 + min_align - 1;
-	b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask;
+	b_res->flags |= IORESOURCE_STARTALIGN;
 	if (size1 > size0 && realloc_head) {
 		add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
 		dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
@@ -1117,8 +1118,9 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
 			struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
-	unsigned long mask, prefmask;
+	unsigned long mask, prefmask, type2 = 0, type3 = 0;
 	resource_size_t additional_mem_size = 0, additional_io_size = 0;
+	struct resource *b_res;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct pci_bus *b = dev->subordinate;
@@ -1163,15 +1165,37 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
 		   has already been allocated by arch code, try
 		   non-prefetchable range for both types of PCI memory
 		   resources. */
+		b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
 		mask = IORESOURCE_MEM;
 		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-		if (pbus_size_mem(bus, prefmask, prefmask,
+		if (b_res[2].flags & IORESOURCE_MEM_64) {
+			prefmask |= IORESOURCE_MEM_64;
+			if (pbus_size_mem(bus, prefmask, prefmask,
+				  prefmask, prefmask,
 				  realloc_head ? 0 : additional_mem_size,
-				  additional_mem_size, realloc_head))
-			mask = prefmask; /* Success, size non-prefetch only. */
-		else
-			additional_mem_size += additional_mem_size;
-		pbus_size_mem(bus, mask, IORESOURCE_MEM,
+				  additional_mem_size, realloc_head)) {
+					/*
+					 * Success, with pref mmio64,
+					 * next will size non-pref or
+					 * non-mmio64 */
+					mask = prefmask;
+					type2 = prefmask & ~IORESOURCE_MEM_64;
+					type3 = prefmask & ~IORESOURCE_PREFETCH;
+			}
+		}
+		if (!type2) {
+			prefmask &= ~IORESOURCE_MEM_64;
+			if (pbus_size_mem(bus, prefmask, prefmask,
+					 prefmask, prefmask,
+					 realloc_head ? 0 : additional_mem_size,
+					 additional_mem_size, realloc_head)) {
+				/* Success, next will size non-prefetch. */
+				mask = prefmask;
+			} else
+				additional_mem_size += additional_mem_size;
+			type2 = type3 = IORESOURCE_MEM;
+		}
+		pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
 				realloc_head ? 0 : additional_mem_size,
 				additional_mem_size, realloc_head);
 		break;
@@ -1257,42 +1281,66 @@ static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
 static void pci_bridge_release_resources(struct pci_bus *bus,
 					  unsigned long type)
 {
-	int idx;
-	bool changed = false;
-	struct pci_dev *dev;
+	struct pci_dev *dev = bus->self;
 	struct resource *r;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
+				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
+	unsigned old_flags = 0;
+	struct resource *b_res;
+	int idx = 1;
 
-	dev = bus->self;
-	for (idx = PCI_BRIDGE_RESOURCES; idx <= PCI_BRIDGE_RESOURCE_END;
-	     idx++) {
-		r = &dev->resource[idx];
-		if ((r->flags & type_mask) != type)
-			continue;
-		if (!r->parent)
-			continue;
-		/*
-		 * if there are children under that, we should release them
-		 *  all
-		 */
-		release_child_resources(r);
-		if (!release_resource(r)) {
-			dev_printk(KERN_DEBUG, &dev->dev,
-				 "resource %d %pR released\n", idx, r);
-			/* keep the old size */
-			r->end = resource_size(r) - 1;
-			r->start = 0;
-			r->flags = 0;
-			changed = true;
-		}
-	}
+	b_res = &dev->resource[PCI_BRIDGE_RESOURCES];
+
+	/*
+	 *     1. if there is io port assign fail, will release bridge
+	 *	  io port.
+	 *     2. if there is non pref mmio assign fail, release bridge
+	 *	  nonpref mmio.
+	 *     3. if there is 64bit pref mmio assign fail, and bridge pref
+	 *	  is 64bit, release bridge pref mmio.
+	 *     4. if there is pref mmio assign fail, and bridge pref is
+	 *	  32bit mmio, release bridge pref mmio
+	 *     5. if there is pref mmio assign fail, and bridge pref is not
+	 *	  assigned, release bridge nonpref mmio.
+	 */
+	if (type & IORESOURCE_IO)
+		idx = 0;
+	else if (!(type & IORESOURCE_PREFETCH))
+		idx = 1;
+	else if ((type & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_MEM_64))
+		idx = 2;
+	else if (!(b_res[2].flags & IORESOURCE_MEM_64) &&
+		 (b_res[2].flags & IORESOURCE_PREFETCH))
+		idx = 2;
+	else
+		idx = 1;
+
+	r = &b_res[idx];
+
+	if (!r->parent)
+		return;
+
+	/*
+	 * if there are children under that, we should release them
+	 *  all
+	 */
+	release_child_resources(r);
+	if (!release_resource(r)) {
+		type = old_flags = r->flags & type_mask;
+		dev_printk(KERN_DEBUG, &dev->dev, "resource %d %pR released\n",
+					PCI_BRIDGE_RESOURCES + idx, r);
+		/* keep the old size */
+		r->end = resource_size(r) - 1;
+		r->start = 0;
+		r->flags = 0;
 
-	if (changed) {
 		/* avoiding touch the one without PREF */
 		if (type & IORESOURCE_PREFETCH)
 			type = IORESOURCE_PREFETCH;
 		__pci_setup_bridge(bus, type);
+		/* for next child res under same bridge */
+		r->flags = old_flags;
 	}
 }
 
@@ -1471,7 +1519,7 @@ void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus)
 	LIST_HEAD(fail_head);
 	struct pci_dev_resource *fail_res;
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
-				  IORESOURCE_PREFETCH;
+				  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
 	int pci_try_num = 1;
 	enum enable_type enable_local;
 
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 7eed671d5586..2473f091a9cc 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -211,15 +211,31 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
 
 	/* First, try exact prefetching match.. */
 	ret = pci_bus_alloc_resource(bus, res, size, align, min,
-				     IORESOURCE_PREFETCH,
+				     IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
 				     pcibios_align_resource, dev);
 
-	if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
+	if (ret < 0 &&
+	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
+	     (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
+		/*
+		 * That failed.
+		 *
+		 * Try 32bit pref
+		 */
+		ret = pci_bus_alloc_resource(bus, res, size, align, min,
+					     IORESOURCE_PREFETCH,
+					     pcibios_align_resource, dev);
+	}
+
+	if (ret < 0 &&
+	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))) {
 		/*
 		 * That failed.
 		 *
 		 * But a prefetching area can handle a non-prefetching
 		 * window (it will just not perform as well).
+		 *
+		 * Also can put 64bit under 32bit range. (below 4g).
 		 */
 		ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
 					     pcibios_align_resource, dev);


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH v10 2/4] PCI: Change pbus_size_mem() return values to be more conventional
  2014-05-20  3:45   ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Bjorn Helgaas
  2014-05-20  3:45     ` [PATCH v10 1/4] " Bjorn Helgaas
@ 2014-05-20  3:45     ` Bjorn Helgaas
  2014-05-22  8:20       ` Wei Yang
  2014-05-20  3:46     ` [PATCH v10 3/4] PCI: Simplify __pci_assign_resource() coding style Bjorn Helgaas
                       ` (2 subsequent siblings)
  4 siblings, 1 reply; 35+ messages in thread
From: Bjorn Helgaas @ 2014-05-20  3:45 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: talal, Wei Yang, Gavin Shan, Benjamin Herrenschmidt, amirv,
	eugenia, Guo Chao, linux-pci, Or Gerlitz, Jack Morgenstein

pbus_size_mem() previously returned 0 for failure and 1 for success.
Change it to return -ENOSPC for failure and 0 for success.

No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/setup-bus.c |   35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 589f2261ab0e..caa07fe6a23a 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -915,6 +915,10 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
  *
  * Calculate the size of the bus and minimal alignment which
  * guarantees that all child resources fit in this size.
+ *
+ * Returns -ENOSPC 0 if there's no available bus resource of the desired type.
+ * Otherwise, sets the bus resource start/end to indicate the required
+ * size, adds things to realloc_head (if supplied), and returns 0.
  */
 static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 			 unsigned long type, unsigned long type2,
@@ -931,7 +935,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 	resource_size_t children_add_size = 0;
 
 	if (!b_res)
-		return 0;
+		return -ENOSPC;
 
 	memset(aligns, 0, sizeof(aligns));
 	max_order = 0;
@@ -998,7 +1002,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 				 "%pR to %pR (unused)\n", b_res,
 				 &bus->busn_res);
 		b_res->flags = 0;
-		return 1;
+		return 0;
 	}
 	b_res->start = min_align;
 	b_res->end = size0 + min_align - 1;
@@ -1009,7 +1013,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
 				 "%pR to %pR add_size %llx\n", b_res,
 				 &bus->busn_res, (unsigned long long)size1-size0);
 	}
-	return 1;
+	return 0;
 }
 
 unsigned long pci_cardbus_resource_alignment(struct resource *res)
@@ -1121,6 +1125,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
 	unsigned long mask, prefmask, type2 = 0, type3 = 0;
 	resource_size_t additional_mem_size = 0, additional_io_size = 0;
 	struct resource *b_res;
+	int ret;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct pci_bus *b = dev->subordinate;
@@ -1170,25 +1175,27 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
 		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
 		if (b_res[2].flags & IORESOURCE_MEM_64) {
 			prefmask |= IORESOURCE_MEM_64;
-			if (pbus_size_mem(bus, prefmask, prefmask,
+			ret = pbus_size_mem(bus, prefmask, prefmask,
 				  prefmask, prefmask,
 				  realloc_head ? 0 : additional_mem_size,
-				  additional_mem_size, realloc_head)) {
-					/*
-					 * Success, with pref mmio64,
-					 * next will size non-pref or
-					 * non-mmio64 */
-					mask = prefmask;
-					type2 = prefmask & ~IORESOURCE_MEM_64;
-					type3 = prefmask & ~IORESOURCE_PREFETCH;
+				  additional_mem_size, realloc_head);
+			if (ret == 0) {
+				/*
+				 * Success, with pref mmio64,
+				 * next will size non-pref or
+				 * non-mmio64 */
+				mask = prefmask;
+				type2 = prefmask & ~IORESOURCE_MEM_64;
+				type3 = prefmask & ~IORESOURCE_PREFETCH;
 			}
 		}
 		if (!type2) {
 			prefmask &= ~IORESOURCE_MEM_64;
-			if (pbus_size_mem(bus, prefmask, prefmask,
+			ret = pbus_size_mem(bus, prefmask, prefmask,
 					 prefmask, prefmask,
 					 realloc_head ? 0 : additional_mem_size,
-					 additional_mem_size, realloc_head)) {
+					 additional_mem_size, realloc_head);
+			if (ret == 0) {
 				/* Success, next will size non-prefetch. */
 				mask = prefmask;
 			} else


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH v10 3/4] PCI: Simplify __pci_assign_resource() coding style
  2014-05-20  3:45   ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Bjorn Helgaas
  2014-05-20  3:45     ` [PATCH v10 1/4] " Bjorn Helgaas
  2014-05-20  3:45     ` [PATCH v10 2/4] PCI: Change pbus_size_mem() return values to be more conventional Bjorn Helgaas
@ 2014-05-20  3:46     ` Bjorn Helgaas
  2014-05-20  3:46     ` [PATCH v10 4/4] PCI: Add resource allocation comments Bjorn Helgaas
  2014-05-21  3:31     ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Wei Yang
  4 siblings, 0 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-05-20  3:46 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: talal, Wei Yang, Gavin Shan, Benjamin Herrenschmidt, amirv,
	eugenia, Guo Chao, linux-pci, Or Gerlitz, Jack Morgenstein

If an allocation succeeds, we can return success immediately.  Then we
don't have to test for success in the subsequent code.

No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/setup-res.c |   10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 2473f091a9cc..3bdac9dc4a88 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -213,9 +213,10 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
 	ret = pci_bus_alloc_resource(bus, res, size, align, min,
 				     IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
 				     pcibios_align_resource, dev);
+	if (ret == 0)
+		return 0;
 
-	if (ret < 0 &&
-	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
+	if ((res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
 	     (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
 		/*
 		 * That failed.
@@ -225,10 +226,11 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
 		ret = pci_bus_alloc_resource(bus, res, size, align, min,
 					     IORESOURCE_PREFETCH,
 					     pcibios_align_resource, dev);
+		if (ret == 0)
+			return 0;
 	}
 
-	if (ret < 0 &&
-	    (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))) {
+	if (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
 		/*
 		 * That failed.
 		 *


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH v10 4/4] PCI: Add resource allocation comments
  2014-05-20  3:45   ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Bjorn Helgaas
                       ` (2 preceding siblings ...)
  2014-05-20  3:46     ` [PATCH v10 3/4] PCI: Simplify __pci_assign_resource() coding style Bjorn Helgaas
@ 2014-05-20  3:46     ` Bjorn Helgaas
  2014-05-21  3:31     ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Wei Yang
  4 siblings, 0 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-05-20  3:46 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: talal, Wei Yang, Gavin Shan, Benjamin Herrenschmidt, amirv,
	eugenia, Guo Chao, linux-pci, Or Gerlitz, Jack Morgenstein

Add comments in the code to match the allocation strategy of 7c671426dfc3
("PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources").

No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/setup-bus.c |   58 +++++++++++++++++++++++++++++++++++------------
 drivers/pci/setup-res.c |   35 +++++++++++++++-------------
 2 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index caa07fe6a23a..758ffbcf24a8 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1159,17 +1159,16 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
 			additional_io_size  = pci_hotplug_io_size;
 			additional_mem_size = pci_hotplug_mem_size;
 		}
-		/*
-		 * Follow thru
-		 */
+		/* Fall through */
 	default:
 		pbus_size_io(bus, realloc_head ? 0 : additional_io_size,
 			     additional_io_size, realloc_head);
-		/* If the bridge supports prefetchable range, size it
-		   separately. If it doesn't, or its prefetchable window
-		   has already been allocated by arch code, try
-		   non-prefetchable range for both types of PCI memory
-		   resources. */
+
+		/*
+		 * If there's a 64-bit prefetchable MMIO window, compute
+		 * the size required to put all 64-bit prefetchable
+		 * resources in it.
+		 */
 		b_res = &bus->self->resource[PCI_BRIDGE_RESOURCES];
 		mask = IORESOURCE_MEM;
 		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
@@ -1179,29 +1178,58 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
 				  prefmask, prefmask,
 				  realloc_head ? 0 : additional_mem_size,
 				  additional_mem_size, realloc_head);
+
+			/*
+			 * If successful, all non-prefetchable resources
+			 * and any 32-bit prefetchable resources will go in
+			 * the non-prefetchable window.
+			 */
 			if (ret == 0) {
-				/*
-				 * Success, with pref mmio64,
-				 * next will size non-pref or
-				 * non-mmio64 */
 				mask = prefmask;
 				type2 = prefmask & ~IORESOURCE_MEM_64;
 				type3 = prefmask & ~IORESOURCE_PREFETCH;
 			}
 		}
+
+		/*
+		 * If there is no 64-bit prefetchable window, compute the
+		 * size required to put all prefetchable resources in the
+		 * 32-bit prefetchable window (if there is one).
+		 */
 		if (!type2) {
 			prefmask &= ~IORESOURCE_MEM_64;
 			ret = pbus_size_mem(bus, prefmask, prefmask,
 					 prefmask, prefmask,
 					 realloc_head ? 0 : additional_mem_size,
 					 additional_mem_size, realloc_head);
-			if (ret == 0) {
-				/* Success, next will size non-prefetch. */
+
+			/*
+			 * If successful, only non-prefetchable resources
+			 * will go in the non-prefetchable window.
+			 */
+			if (ret == 0)
 				mask = prefmask;
-			} else
+			else
 				additional_mem_size += additional_mem_size;
+
 			type2 = type3 = IORESOURCE_MEM;
 		}
+
+		/*
+		 * Compute the size required to put everything else in the
+		 * non-prefetchable window.  This includes:
+		 *
+		 *   - all non-prefetchable resources
+		 *   - 32-bit prefetchable resources if there's a 64-bit
+		 *     prefetchable window or no prefetchable window at all
+		 *   - 64-bit prefetchable resources if there's no
+		 *     prefetchable window at all
+		 *
+		 * Note that the strategy in __pci_assign_resource() must
+		 * match that used here.  Specifically, we cannot put a
+		 * 32-bit prefetchable resource in a 64-bit prefetchable
+		 * window.
+		 */
 		pbus_size_mem(bus, mask, IORESOURCE_MEM, type2, type3,
 				realloc_head ? 0 : additional_mem_size,
 				additional_mem_size, realloc_head);
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 3bdac9dc4a88..3da2542eb4df 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -209,20 +209,25 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
 
 	min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
-	/* First, try exact prefetching match.. */
+	/*
+	 * First, try exact prefetching match.  Even if a 64-bit
+	 * prefetchable bridge window is below 4GB, we can't put a 32-bit
+	 * prefetchable resource in it because pbus_size_mem() assumes a
+	 * 64-bit window will contain no 32-bit resources.  If we assign
+	 * things differently than they were sized, not everything will fit.
+	 */
 	ret = pci_bus_alloc_resource(bus, res, size, align, min,
 				     IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
 				     pcibios_align_resource, dev);
 	if (ret == 0)
 		return 0;
 
+	/*
+	 * If the prefetchable window is only 32 bits wide, we can put
+	 * 64-bit prefetchable resources in it.
+	 */
 	if ((res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) ==
 	     (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
-		/*
-		 * That failed.
-		 *
-		 * Try 32bit pref
-		 */
 		ret = pci_bus_alloc_resource(bus, res, size, align, min,
 					     IORESOURCE_PREFETCH,
 					     pcibios_align_resource, dev);
@@ -230,18 +235,16 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
 			return 0;
 	}
 
-	if (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64)) {
-		/*
-		 * That failed.
-		 *
-		 * But a prefetching area can handle a non-prefetching
-		 * window (it will just not perform as well).
-		 *
-		 * Also can put 64bit under 32bit range. (below 4g).
-		 */
+	/*
+	 * If we didn't find a better match, we can put any memory resource
+	 * in a non-prefetchable window.  If this resource is 32 bits and
+	 * non-prefetchable, the first call already tried the only possibility
+	 * so we don't need to try again.
+	 */
+	if (res->flags & (IORESOURCE_PREFETCH | IORESOURCE_MEM_64))
 		ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
 					     pcibios_align_resource, dev);
-	}
+
 	return ret;
 }
 


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources
  2014-05-20  3:45   ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Bjorn Helgaas
                       ` (3 preceding siblings ...)
  2014-05-20  3:46     ` [PATCH v10 4/4] PCI: Add resource allocation comments Bjorn Helgaas
@ 2014-05-21  3:31     ` Wei Yang
  2014-05-21  4:36       ` Bjorn Helgaas
  4 siblings, 1 reply; 35+ messages in thread
From: Wei Yang @ 2014-05-21  3:31 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, talal, Wei Yang, Gavin Shan, Benjamin Herrenschmidt,
	amirv, eugenia, Guo Chao, linux-pci, Or Gerlitz,
	Jack Morgenstein

Bjorn,

I tried to apply it on the lastest upstrea, but seems not work.

Which tree should I apply to? This one ?
git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git

On Mon, May 19, 2014 at 09:45:45PM -0600, Bjorn Helgaas wrote:
>I'd like to merge these for v3.16.  The first is Yinghai's v9 patch
>unchanged except to resolve minor merge conflicts, write a changelog, and
>fold in the mem64_mask removal posted previously as [1].
>
>The rest are cleanups and comment changes to try to make the code more
>readable.
>
>This does fix a real bug -- in some cases we can't assign space to large
>64-bit BARs even though there is plenty of 64-bit space available.  But I
>am concerned that we are giving up performance in some cases (32-bit
>prefetchable resources below a bridge with a 64-bit prefetchable window
>will no longer be prefetchable).  With some additional work, it should be
>possible to get that performance back.
>
>Comments welcome.
>
>[1] http://lkml.kernel.org/r/CAE9FiQUKYMPvXLsk84o-gH-j0-wiJGL0Wt1aTW2o4FPSm0PvbQ@mail.gmail.com
>
>---
>
>Bjorn Helgaas (3):
>      PCI: Change pbus_size_mem() return values to be more conventional
>      PCI: Simplify __pci_assign_resource() coding style
>      PCI: Add resource allocation comments
>
>Yinghai Lu (1):
>      PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources
>
>
> drivers/pci/setup-bus.c |  207 +++++++++++++++++++++++++++++++++--------------
> drivers/pci/setup-res.c |   41 +++++++--
> 2 files changed, 176 insertions(+), 72 deletions(-)

-- 
Richard Yang
Help you, Help me


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources
  2014-05-21  3:31     ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Wei Yang
@ 2014-05-21  4:36       ` Bjorn Helgaas
  0 siblings, 0 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-05-21  4:36 UTC (permalink / raw)
  To: Wei Yang, Guo Chao
  Cc: Yinghai Lu, talal, Gavin Shan, Benjamin Herrenschmidt,
	Amir Vadai, Eugenia Emantayev, linux-pci, Or Gerlitz,
	Jack Morgenstein

On Tue, May 20, 2014 at 9:31 PM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
> Bjorn,
>
> I tried to apply it on the lastest upstrea, but seems not work.
>
> Which tree should I apply to? This one ?
> git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git

They are on top of my pci/resource branch, i.e.,
http://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/log/?h=pci/resource

BTW, Guo, I'm waiting for your update to
https://bugzilla.kernel.org/show_bug.cgi?id=74151 .  I want to close
the loop with a specific example of how this patch fixes the problem.

> On Mon, May 19, 2014 at 09:45:45PM -0600, Bjorn Helgaas wrote:
>>I'd like to merge these for v3.16.  The first is Yinghai's v9 patch
>>unchanged except to resolve minor merge conflicts, write a changelog, and
>>fold in the mem64_mask removal posted previously as [1].
>>
>>The rest are cleanups and comment changes to try to make the code more
>>readable.
>>
>>This does fix a real bug -- in some cases we can't assign space to large
>>64-bit BARs even though there is plenty of 64-bit space available.  But I
>>am concerned that we are giving up performance in some cases (32-bit
>>prefetchable resources below a bridge with a 64-bit prefetchable window
>>will no longer be prefetchable).  With some additional work, it should be
>>possible to get that performance back.
>>
>>Comments welcome.
>>
>>[1] http://lkml.kernel.org/r/CAE9FiQUKYMPvXLsk84o-gH-j0-wiJGL0Wt1aTW2o4FPSm0PvbQ@mail.gmail.com
>>
>>---
>>
>>Bjorn Helgaas (3):
>>      PCI: Change pbus_size_mem() return values to be more conventional
>>      PCI: Simplify __pci_assign_resource() coding style
>>      PCI: Add resource allocation comments
>>
>>Yinghai Lu (1):
>>      PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources
>>
>>
>> drivers/pci/setup-bus.c |  207 +++++++++++++++++++++++++++++++++--------------
>> drivers/pci/setup-res.c |   41 +++++++--
>> 2 files changed, 176 insertions(+), 72 deletions(-)
>
> --
> Richard Yang
> Help you, Help me
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v10 2/4] PCI: Change pbus_size_mem() return values to be more conventional
  2014-05-20  3:45     ` [PATCH v10 2/4] PCI: Change pbus_size_mem() return values to be more conventional Bjorn Helgaas
@ 2014-05-22  8:20       ` Wei Yang
  2014-05-22 16:59         ` Bjorn Helgaas
  0 siblings, 1 reply; 35+ messages in thread
From: Wei Yang @ 2014-05-22  8:20 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Yinghai Lu, talal, Wei Yang, Gavin Shan, Benjamin Herrenschmidt,
	amirv, eugenia, Guo Chao, linux-pci, Or Gerlitz,
	Jack Morgenstein

On Mon, May 19, 2014 at 09:45:58PM -0600, Bjorn Helgaas wrote:
>pbus_size_mem() previously returned 0 for failure and 1 for success.
>Change it to return -ENOSPC for failure and 0 for success.
>
>No functional change.
>
>Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
>---
> drivers/pci/setup-bus.c |   35 +++++++++++++++++++++--------------
> 1 file changed, 21 insertions(+), 14 deletions(-)
>
>diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
>index 589f2261ab0e..caa07fe6a23a 100644
>--- a/drivers/pci/setup-bus.c
>+++ b/drivers/pci/setup-bus.c
>@@ -915,6 +915,10 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
>  *
>  * Calculate the size of the bus and minimal alignment which
>  * guarantees that all child resources fit in this size.
>+ *
>+ * Returns -ENOSPC 0 if there's no available bus resource of the desired type.
                    ^^^
One small question, the 0 here means?


-- 
Richard Yang
Help you, Help me


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v10 2/4] PCI: Change pbus_size_mem() return values to be more conventional
  2014-05-22  8:20       ` Wei Yang
@ 2014-05-22 16:59         ` Bjorn Helgaas
  0 siblings, 0 replies; 35+ messages in thread
From: Bjorn Helgaas @ 2014-05-22 16:59 UTC (permalink / raw)
  To: Wei Yang
  Cc: Yinghai Lu, talal, Gavin Shan, Benjamin Herrenschmidt,
	Amir Vadai, Eugenia Emantayev, Guo Chao, linux-pci, Or Gerlitz,
	Jack Morgenstein

On Thu, May 22, 2014 at 2:20 AM, Wei Yang <weiyang@linux.vnet.ibm.com> wrote:
> On Mon, May 19, 2014 at 09:45:58PM -0600, Bjorn Helgaas wrote:
>>pbus_size_mem() previously returned 0 for failure and 1 for success.
>>Change it to return -ENOSPC for failure and 0 for success.
>>
>>No functional change.
>>
>>Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
>>---
>> drivers/pci/setup-bus.c |   35 +++++++++++++++++++++--------------
>> 1 file changed, 21 insertions(+), 14 deletions(-)
>>
>>diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
>>index 589f2261ab0e..caa07fe6a23a 100644
>>--- a/drivers/pci/setup-bus.c
>>+++ b/drivers/pci/setup-bus.c
>>@@ -915,6 +915,10 @@ static inline resource_size_t calculate_mem_align(resource_size_t *aligns,
>>  *
>>  * Calculate the size of the bus and minimal alignment which
>>  * guarantees that all child resources fit in this size.
>>+ *
>>+ * Returns -ENOSPC 0 if there's no available bus resource of the desired type.
>                     ^^^
> One small question, the 0 here means?

Just a typo.  Fixed, thanks!

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2014-05-22 17:00 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CAE9FiQXzRQ9S0mwwb58R2icBTtHHJ6NMVFMaxEgtVNP9mZVjZw@mail.gmail.com>
2014-03-07 20:08 ` [PATCH v7] PCI: Try best to allocate pref mmio 64bit above 4g Yinghai Lu
2014-04-04 22:43   ` Bjorn Helgaas
2014-04-08  2:57     ` Guo Chao
2014-04-08  7:18       ` Or Gerlitz
2014-04-08  7:41         ` Wei Yang
2014-04-08  7:55           ` Or Gerlitz
2014-04-08  8:22             ` Guo Chao
2014-04-08 15:02       ` Bjorn Helgaas
2014-04-09  7:52         ` Guo Chao
2014-04-10 17:26           ` Bjorn Helgaas
2014-04-10 21:23             ` Benjamin Herrenschmidt
2014-04-15 11:54             ` Guo Chao
2014-04-16  0:09               ` Bjorn Helgaas
2014-04-16  2:29                 ` Yinghai Lu
2014-04-16 17:06                   ` Bjorn Helgaas
2014-04-17  3:30                     ` Yinghai Lu
2014-04-16  4:16                 ` Yinghai Lu
2014-04-16 17:10                   ` Bjorn Helgaas
2014-04-16  6:30                 ` Benjamin Herrenschmidt
2014-04-16  6:33                   ` Benjamin Herrenschmidt
2014-04-16 17:15                     ` Bjorn Helgaas
2014-04-16 22:11   ` Bjorn Helgaas
2014-04-17  4:20     ` Yinghai Lu
2014-04-17 16:35       ` Yinghai Lu
2014-04-17  4:23 ` [PATCH v8] " Yinghai Lu
2014-04-17 16:40 ` [PATCH v9] " Yinghai Lu
2014-05-20  3:45   ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Bjorn Helgaas
2014-05-20  3:45     ` [PATCH v10 1/4] " Bjorn Helgaas
2014-05-20  3:45     ` [PATCH v10 2/4] PCI: Change pbus_size_mem() return values to be more conventional Bjorn Helgaas
2014-05-22  8:20       ` Wei Yang
2014-05-22 16:59         ` Bjorn Helgaas
2014-05-20  3:46     ` [PATCH v10 3/4] PCI: Simplify __pci_assign_resource() coding style Bjorn Helgaas
2014-05-20  3:46     ` [PATCH v10 4/4] PCI: Add resource allocation comments Bjorn Helgaas
2014-05-21  3:31     ` [PATCH v10 0/4] PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources Wei Yang
2014-05-21  4:36       ` Bjorn Helgaas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.