QEMU-Devel Archive on lore.kernel.org
 help / color / Atom feed
* [RFC for-5.1 0/4] Better handling of attempt NVLink2 unplug
@ 2020-03-26  5:40 David Gibson
  2020-03-26  5:40 ` [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation David Gibson
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: David Gibson @ 2020-03-26  5:40 UTC (permalink / raw)
  To: aik, groug; +Cc: David Gibson, qemu-ppc, clg, qemu-devel

Currently, attempting to unplug an NVLink2 device will generally
result in the guest crashing.  If you're lucky, it instead simply
won't work and remain in a "pending unplug" state indefinitely.

There is code we could we could theoretically improve in qemu to tear
these devices down better.  However since NVLink2 devices aren't hot
pluggable in hardware, the guest side drivers usually don't cope with
that anyway.

So, patch 4/4 blocks attempts to unplug NVLink2 devices.  The others
are some preliminary cleanups to get us towards there.

David Gibson (4):
  spapr: Refactor locating NVLink2 devices for device tree creation
  spapr: Helper to determine if a device is NVLink2 related
  spapr: Fix failure path for attempting to hot unplug PCI bridges
  spapr: Don't allow unplug of NVLink2 devices

 hw/ppc/spapr_pci.c          |   6 ++
 hw/ppc/spapr_pci_nvlink2.c  | 120 +++++++++++++++++++++++++-----------
 include/hw/pci-host/spapr.h |   1 +
 3 files changed, 91 insertions(+), 36 deletions(-)

-- 
2.25.1



^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation
  2020-03-26  5:40 [RFC for-5.1 0/4] Better handling of attempt NVLink2 unplug David Gibson
@ 2020-03-26  5:40 ` David Gibson
  2020-03-26 11:57   ` Greg Kurz
  2020-03-26  5:40 ` [RFC for-5.1 2/4] spapr: Helper to determine if a device is NVLink2 related David Gibson
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: David Gibson @ 2020-03-26  5:40 UTC (permalink / raw)
  To: aik, groug; +Cc: David Gibson, qemu-ppc, clg, qemu-devel

Currently spapr_phb_nvgpu_populate_pcidev_dt() works a little cryptically.
It steps through all the NVLink2 GPUs and NPUs and if they match the device
we're called for, we generate the relevant device tree information.

Make this a little more obvious by introducing helpers to determine it a
given PCI device is an NVLink2 GPU or NPU, returning the NVLink2 slot and
link number information as well.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_pci_nvlink2.c | 115 +++++++++++++++++++++++++------------
 1 file changed, 79 insertions(+), 36 deletions(-)

diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
index 8332d5694e..7d3a685421 100644
--- a/hw/ppc/spapr_pci_nvlink2.c
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -390,13 +390,12 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
 
 }
 
-void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
-                                        SpaprPhbState *sphb)
+static bool is_nvgpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot)
 {
-    int i, j;
+    int i;
 
     if (!sphb->nvgpus) {
-        return;
+        return false;
     }
 
     for (i = 0; i < sphb->nvgpus->num; ++i) {
@@ -406,47 +405,91 @@ void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
         if (!nvslot->gpdev) {
             continue;
         }
+
         if (dev == nvslot->gpdev) {
-            uint32_t npus[nvslot->linknum];
+            if (slot) {
+                *slot = i;
+            }
+            return true;
+        }
+    }
 
-            for (j = 0; j < nvslot->linknum; ++j) {
-                PCIDevice *npdev = nvslot->links[j].npdev;
+    return false;
+}
 
-                npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
-            }
-            _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
-                             j * sizeof(npus[0])));
-            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
-                                   PHANDLE_PCIDEV(sphb, dev))));
+static bool is_nvnpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot, int *link)
+{
+    int i, j;
+
+    if (!sphb->nvgpus) {
+        return false;
+    }
+
+    for (i = 0; i < sphb->nvgpus->num; ++i) {
+        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+
+        /* Skip "slot" without attached GPU */
+        if (!nvslot->gpdev) {
             continue;
         }
 
         for (j = 0; j < nvslot->linknum; ++j) {
-            if (dev != nvslot->links[j].npdev) {
-                continue;
+            if (dev == nvslot->links[j].npdev) {
+                if (slot) {
+                    *slot = i;
+                }
+                if (link) {
+                    *link = j;
+                }
+                return true;
             }
+        }
+    }
 
-            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
-                                   PHANDLE_PCIDEV(sphb, dev))));
-            _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
-                                  PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
-            _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
-                                   PHANDLE_NVLINK(sphb, i, j))));
-            /*
-             * If we ever want to emulate GPU RAM at the same location as on
-             * the host - here is the encoding GPA->TGT:
-             *
-             * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
-             * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
-             * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
-             * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
-             */
-            _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
-                                  PHANDLE_GPURAM(sphb, i)));
-            _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
-                                 nvslot->tgt));
-            _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
-                                  nvslot->links[j].link_speed));
+    return false;
+}
+
+void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
+                                        SpaprPhbState *sphb)
+{
+    int slot, link;
+
+    if (is_nvgpu(dev, sphb, &slot)) {
+        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[slot];
+        uint32_t npus[nvslot->linknum];
+
+        for (link = 0; link < nvslot->linknum; ++link) {
+            PCIDevice *npdev = nvslot->links[link].npdev;
+
+            npus[link] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
         }
+        _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
+                         link * sizeof(npus[0])));
+        _FDT((fdt_setprop_cell(fdt, offset, "phandle",
+                               PHANDLE_PCIDEV(sphb, dev))));
+    } else if (is_nvnpu(dev, sphb, &slot, &link)) {
+        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[slot];
+
+        _FDT((fdt_setprop_cell(fdt, offset, "phandle",
+                               PHANDLE_PCIDEV(sphb, dev))));
+        _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
+                              PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
+                               PHANDLE_NVLINK(sphb, slot, link))));
+        /*
+         * If we ever want to emulate GPU RAM at the same location as
+         * on the host - here is the encoding GPA->TGT:
+         *
+         * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
+         * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
+         * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
+         * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
+         */
+        _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
+                              PHANDLE_GPURAM(sphb, slot)));
+        _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
+                             nvslot->tgt));
+        _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
+                              nvslot->links[link].link_speed));
     }
 }
-- 
2.25.1



^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC for-5.1 2/4] spapr: Helper to determine if a device is NVLink2 related
  2020-03-26  5:40 [RFC for-5.1 0/4] Better handling of attempt NVLink2 unplug David Gibson
  2020-03-26  5:40 ` [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation David Gibson
@ 2020-03-26  5:40 ` David Gibson
  2020-03-26 11:58   ` Greg Kurz
  2020-03-26  5:40 ` [RFC for-5.1 3/4] spapr: Fix failure path for attempting to hot unplug PCI bridges David Gibson
  2020-03-26  5:40 ` [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices David Gibson
  3 siblings, 1 reply; 14+ messages in thread
From: David Gibson @ 2020-03-26  5:40 UTC (permalink / raw)
  To: aik, groug; +Cc: David Gibson, qemu-ppc, clg, qemu-devel

This adds a simple exported helper function which determins if a given
(supposedly) PCI device is actually an NVLink2 device, which has some
special considerations.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_pci_nvlink2.c  | 5 +++++
 include/hw/pci-host/spapr.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
index 7d3a685421..0cec1ae02b 100644
--- a/hw/ppc/spapr_pci_nvlink2.c
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -449,6 +449,11 @@ static bool is_nvnpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot, int *link)
     return false;
 }
 
+bool spapr_phb_is_nvlink_dev(PCIDevice *dev, SpaprPhbState *sphb)
+{
+    return is_nvgpu(dev, sphb, NULL) || is_nvnpu(dev, sphb, NULL, NULL);
+}
+
 void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
                                         SpaprPhbState *sphb)
 {
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 8877ff51fb..eaba4a5825 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -156,6 +156,7 @@ void spapr_phb_nvgpu_free(SpaprPhbState *sphb);
 void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
                                  Error **errp);
 void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt);
+bool spapr_phb_is_nvlink_dev(PCIDevice *dev, SpaprPhbState *sphb);
 void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
                                         SpaprPhbState *sphb);
 #else
-- 
2.25.1



^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC for-5.1 3/4] spapr: Fix failure path for attempting to hot unplug PCI bridges
  2020-03-26  5:40 [RFC for-5.1 0/4] Better handling of attempt NVLink2 unplug David Gibson
  2020-03-26  5:40 ` [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation David Gibson
  2020-03-26  5:40 ` [RFC for-5.1 2/4] spapr: Helper to determine if a device is NVLink2 related David Gibson
@ 2020-03-26  5:40 ` David Gibson
  2020-03-26 12:18   ` Greg Kurz
  2020-03-26  5:40 ` [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices David Gibson
  3 siblings, 1 reply; 14+ messages in thread
From: David Gibson @ 2020-03-26  5:40 UTC (permalink / raw)
  To: aik, groug; +Cc: David Gibson, qemu-ppc, clg, qemu-devel

For various technical reasons we can't currently allow unplug a PCI to PCI
bridge on the pseries machine.  spapr_pci_unplug_request() correctly
generates an error message if that's attempted.

But.. if the given errp is not error_abort or error_fatal, it doesn't
actually stop trying to unplug the bridge anyway.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 709a52780d..55ca9dee1e 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1663,6 +1663,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
 
         if (pc->is_bridge) {
             error_setg(errp, "PCI: Hot unplug of PCI bridges not supported");
+            return;
         }
 
         /* ensure any other present functions are pending unplug */
-- 
2.25.1



^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices
  2020-03-26  5:40 [RFC for-5.1 0/4] Better handling of attempt NVLink2 unplug David Gibson
                   ` (2 preceding siblings ...)
  2020-03-26  5:40 ` [RFC for-5.1 3/4] spapr: Fix failure path for attempting to hot unplug PCI bridges David Gibson
@ 2020-03-26  5:40 ` David Gibson
  2020-03-26 12:27   ` Greg Kurz
  2020-03-28 12:32   ` Alexey Kardashevskiy
  3 siblings, 2 replies; 14+ messages in thread
From: David Gibson @ 2020-03-26  5:40 UTC (permalink / raw)
  To: aik, groug; +Cc: David Gibson, qemu-ppc, clg, qemu-devel

Currently, we can't properly handle unplug of NVLink2 devices, because we
don't have code to tear down their special memory resources.  There's not
a lot of impetus to implement that. Since hardware NVLink2 devices can't
be hot unplugged, the guest side drivers don't usually support unplug
anyway.

Therefore, simply prevent unplug of NVLink2 devices.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 55ca9dee1e..5c8262413a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1666,6 +1666,11 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
             return;
         }
 
+        if (spapr_phb_is_nvlink_dev(pdev, phb)) {
+            error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
+            return;
+        }
+
         /* ensure any other present functions are pending unplug */
         if (PCI_FUNC(pdev->devfn) == 0) {
             for (i = 1; i < 8; i++) {
-- 
2.25.1



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation
  2020-03-26  5:40 ` [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation David Gibson
@ 2020-03-26 11:57   ` Greg Kurz
  2020-03-26 23:55     ` David Gibson
  0 siblings, 1 reply; 14+ messages in thread
From: Greg Kurz @ 2020-03-26 11:57 UTC (permalink / raw)
  To: David Gibson; +Cc: aik, qemu-ppc, clg, qemu-devel

On Thu, 26 Mar 2020 16:40:06 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> Currently spapr_phb_nvgpu_populate_pcidev_dt() works a little cryptically.
> It steps through all the NVLink2 GPUs and NPUs and if they match the device
> we're called for, we generate the relevant device tree information.
> 
> Make this a little more obvious by introducing helpers to determine it a

... to determine if a

> given PCI device is an NVLink2 GPU or NPU, returning the NVLink2 slot and
> link number information as well.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---

LGTM

Reviewed-by: Greg Kurz <groug@kaod.org>

>  hw/ppc/spapr_pci_nvlink2.c | 115 +++++++++++++++++++++++++------------
>  1 file changed, 79 insertions(+), 36 deletions(-)
> 
> diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
> index 8332d5694e..7d3a685421 100644
> --- a/hw/ppc/spapr_pci_nvlink2.c
> +++ b/hw/ppc/spapr_pci_nvlink2.c
> @@ -390,13 +390,12 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
>  
>  }
>  
> -void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
> -                                        SpaprPhbState *sphb)
> +static bool is_nvgpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot)
>  {
> -    int i, j;
> +    int i;
>  
>      if (!sphb->nvgpus) {
> -        return;
> +        return false;
>      }
>  
>      for (i = 0; i < sphb->nvgpus->num; ++i) {
> @@ -406,47 +405,91 @@ void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
>          if (!nvslot->gpdev) {
>              continue;
>          }
> +
>          if (dev == nvslot->gpdev) {
> -            uint32_t npus[nvslot->linknum];
> +            if (slot) {
> +                *slot = i;
> +            }
> +            return true;
> +        }
> +    }
>  
> -            for (j = 0; j < nvslot->linknum; ++j) {
> -                PCIDevice *npdev = nvslot->links[j].npdev;
> +    return false;
> +}
>  
> -                npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
> -            }
> -            _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
> -                             j * sizeof(npus[0])));
> -            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
> -                                   PHANDLE_PCIDEV(sphb, dev))));
> +static bool is_nvnpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot, int *link)
> +{
> +    int i, j;
> +
> +    if (!sphb->nvgpus) {
> +        return false;
> +    }
> +
> +    for (i = 0; i < sphb->nvgpus->num; ++i) {
> +        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
> +
> +        /* Skip "slot" without attached GPU */
> +        if (!nvslot->gpdev) {
>              continue;
>          }
>  
>          for (j = 0; j < nvslot->linknum; ++j) {
> -            if (dev != nvslot->links[j].npdev) {
> -                continue;
> +            if (dev == nvslot->links[j].npdev) {
> +                if (slot) {
> +                    *slot = i;
> +                }
> +                if (link) {
> +                    *link = j;
> +                }
> +                return true;
>              }
> +        }
> +    }
>  
> -            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
> -                                   PHANDLE_PCIDEV(sphb, dev))));
> -            _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
> -                                  PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
> -            _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
> -                                   PHANDLE_NVLINK(sphb, i, j))));
> -            /*
> -             * If we ever want to emulate GPU RAM at the same location as on
> -             * the host - here is the encoding GPA->TGT:
> -             *
> -             * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
> -             * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
> -             * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
> -             * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
> -             */
> -            _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
> -                                  PHANDLE_GPURAM(sphb, i)));
> -            _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
> -                                 nvslot->tgt));
> -            _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
> -                                  nvslot->links[j].link_speed));
> +    return false;
> +}
> +
> +void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
> +                                        SpaprPhbState *sphb)
> +{
> +    int slot, link;
> +
> +    if (is_nvgpu(dev, sphb, &slot)) {
> +        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[slot];
> +        uint32_t npus[nvslot->linknum];
> +
> +        for (link = 0; link < nvslot->linknum; ++link) {
> +            PCIDevice *npdev = nvslot->links[link].npdev;
> +
> +            npus[link] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
>          }
> +        _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
> +                         link * sizeof(npus[0])));
> +        _FDT((fdt_setprop_cell(fdt, offset, "phandle",
> +                               PHANDLE_PCIDEV(sphb, dev))));
> +    } else if (is_nvnpu(dev, sphb, &slot, &link)) {
> +        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[slot];
> +
> +        _FDT((fdt_setprop_cell(fdt, offset, "phandle",
> +                               PHANDLE_PCIDEV(sphb, dev))));
> +        _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
> +                              PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
> +                               PHANDLE_NVLINK(sphb, slot, link))));
> +        /*
> +         * If we ever want to emulate GPU RAM at the same location as
> +         * on the host - here is the encoding GPA->TGT:
> +         *
> +         * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
> +         * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
> +         * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
> +         * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
> +         */
> +        _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
> +                              PHANDLE_GPURAM(sphb, slot)));
> +        _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
> +                             nvslot->tgt));
> +        _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
> +                              nvslot->links[link].link_speed));
>      }
>  }



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 2/4] spapr: Helper to determine if a device is NVLink2 related
  2020-03-26  5:40 ` [RFC for-5.1 2/4] spapr: Helper to determine if a device is NVLink2 related David Gibson
@ 2020-03-26 11:58   ` Greg Kurz
  0 siblings, 0 replies; 14+ messages in thread
From: Greg Kurz @ 2020-03-26 11:58 UTC (permalink / raw)
  To: David Gibson; +Cc: aik, qemu-ppc, clg, qemu-devel

On Thu, 26 Mar 2020 16:40:07 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> This adds a simple exported helper function which determins if a given
> (supposedly) PCI device is actually an NVLink2 device, which has some
> special considerations.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---

Reviewed-by: Greg Kurz <groug@kaod.org>

>  hw/ppc/spapr_pci_nvlink2.c  | 5 +++++
>  include/hw/pci-host/spapr.h | 1 +
>  2 files changed, 6 insertions(+)
> 
> diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
> index 7d3a685421..0cec1ae02b 100644
> --- a/hw/ppc/spapr_pci_nvlink2.c
> +++ b/hw/ppc/spapr_pci_nvlink2.c
> @@ -449,6 +449,11 @@ static bool is_nvnpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot, int *link)
>      return false;
>  }
>  
> +bool spapr_phb_is_nvlink_dev(PCIDevice *dev, SpaprPhbState *sphb)
> +{
> +    return is_nvgpu(dev, sphb, NULL) || is_nvnpu(dev, sphb, NULL, NULL);
> +}
> +
>  void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
>                                          SpaprPhbState *sphb)
>  {
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 8877ff51fb..eaba4a5825 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -156,6 +156,7 @@ void spapr_phb_nvgpu_free(SpaprPhbState *sphb);
>  void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
>                                   Error **errp);
>  void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt);
> +bool spapr_phb_is_nvlink_dev(PCIDevice *dev, SpaprPhbState *sphb);
>  void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
>                                          SpaprPhbState *sphb);
>  #else



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 3/4] spapr: Fix failure path for attempting to hot unplug PCI bridges
  2020-03-26  5:40 ` [RFC for-5.1 3/4] spapr: Fix failure path for attempting to hot unplug PCI bridges David Gibson
@ 2020-03-26 12:18   ` Greg Kurz
  2020-03-26 23:54     ` David Gibson
  0 siblings, 1 reply; 14+ messages in thread
From: Greg Kurz @ 2020-03-26 12:18 UTC (permalink / raw)
  To: David Gibson; +Cc: aik, qemu-ppc, clg, qemu-devel

On Thu, 26 Mar 2020 16:40:08 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> For various technical reasons we can't currently allow unplug a PCI to PCI
> bridge on the pseries machine.  spapr_pci_unplug_request() correctly
> generates an error message if that's attempted.
> 
> But.. if the given errp is not error_abort or error_fatal,

Which is the always case when trying to unplug a device AFAICT:

void qdev_unplug(DeviceState *dev, Error **errp)
{
    DeviceClass *dc = DEVICE_GET_CLASS(dev);
    HotplugHandler *hotplug_ctrl;
    HotplugHandlerClass *hdc;
    Error *local_err = NULL;

    [...]
    hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl);
    if (hdc->unplug_request) {
        hotplug_handler_unplug_request(hotplug_ctrl, dev, &local_err);

And anyway, spapr_pci_unplug_request() shouldn't rely on the caller
passing &error_fatal or &error_abort to do the right thing. Calling
error_setg() without returning right away is a dangerous practice
since it would cause a subsequent call to error_setg() with the
same errp to abort QEMU.

> it doesn't actually stop trying to unplug the bridge anyway.
> 

This looks like a bug fix that could go to 5.0 IMHO.

Maybe add this tag ?

   Fixes: 14e714900f6b "spapr: Allow hot plug/unplug of PCI bridges and devices under PCI bridges"

> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---

Reviewed-by: Greg Kurz <groug@kaod.org>

>  hw/ppc/spapr_pci.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 709a52780d..55ca9dee1e 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1663,6 +1663,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
>  
>          if (pc->is_bridge) {
>              error_setg(errp, "PCI: Hot unplug of PCI bridges not supported");
> +            return;
>          }
>  
>          /* ensure any other present functions are pending unplug */



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices
  2020-03-26  5:40 ` [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices David Gibson
@ 2020-03-26 12:27   ` Greg Kurz
  2020-03-26 23:56     ` David Gibson
  2020-03-28 12:32   ` Alexey Kardashevskiy
  1 sibling, 1 reply; 14+ messages in thread
From: Greg Kurz @ 2020-03-26 12:27 UTC (permalink / raw)
  To: David Gibson; +Cc: aik, qemu-ppc, clg, qemu-devel

On Thu, 26 Mar 2020 16:40:09 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> Currently, we can't properly handle unplug of NVLink2 devices, because we
> don't have code to tear down their special memory resources.  There's not
> a lot of impetus to implement that. Since hardware NVLink2 devices can't
> be hot unplugged, the guest side drivers don't usually support unplug
> anyway.
> 
> Therefore, simply prevent unplug of NVLink2 devices.
> 

This could maybe considered as a valid fix for 5.0 since this prevents
guest crashes IIUC. But since this requires the two preliminary cleanup
patches, I understand you may prefer to postpone that to 5.1.

> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---

Reviewed-by: Greg Kurz <groug@kaod.org>

>  hw/ppc/spapr_pci.c | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 55ca9dee1e..5c8262413a 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1666,6 +1666,11 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
>              return;
>          }
>  
> +        if (spapr_phb_is_nvlink_dev(pdev, phb)) {
> +            error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
> +            return;
> +        }
> +
>          /* ensure any other present functions are pending unplug */
>          if (PCI_FUNC(pdev->devfn) == 0) {
>              for (i = 1; i < 8; i++) {



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 3/4] spapr: Fix failure path for attempting to hot unplug PCI bridges
  2020-03-26 12:18   ` Greg Kurz
@ 2020-03-26 23:54     ` David Gibson
  0 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2020-03-26 23:54 UTC (permalink / raw)
  To: Greg Kurz; +Cc: aik, qemu-ppc, clg, qemu-devel


[-- Attachment #1: Type: text/plain, Size: 2439 bytes --]

On Thu, Mar 26, 2020 at 01:18:24PM +0100, Greg Kurz wrote:
> On Thu, 26 Mar 2020 16:40:08 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > For various technical reasons we can't currently allow unplug a PCI to PCI
> > bridge on the pseries machine.  spapr_pci_unplug_request() correctly
> > generates an error message if that's attempted.
> > 
> > But.. if the given errp is not error_abort or error_fatal,
> 
> Which is the always case when trying to unplug a device AFAICT:
> 
> void qdev_unplug(DeviceState *dev, Error **errp)
> {
>     DeviceClass *dc = DEVICE_GET_CLASS(dev);
>     HotplugHandler *hotplug_ctrl;
>     HotplugHandlerClass *hdc;
>     Error *local_err = NULL;
> 
>     [...]
>     hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl);
>     if (hdc->unplug_request) {
>         hotplug_handler_unplug_request(hotplug_ctrl, dev, &local_err);
> 
> And anyway, spapr_pci_unplug_request() shouldn't rely on the caller
> passing &error_fatal or &error_abort to do the right thing. Calling
> error_setg() without returning right away is a dangerous practice
> since it would cause a subsequent call to error_setg() with the
> same errp to abort QEMU.
> 
> > it doesn't actually stop trying to unplug the bridge anyway.
> > 
> 
> This looks like a bug fix that could go to 5.0 IMHO.

Fair point.  I've added the tag and moved to ppc-for-5.0.

> 
> Maybe add this tag ?
> 
>    Fixes: 14e714900f6b "spapr: Allow hot plug/unplug of PCI bridges and devices under PCI bridges"
> 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> >  hw/ppc/spapr_pci.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > index 709a52780d..55ca9dee1e 100644
> > --- a/hw/ppc/spapr_pci.c
> > +++ b/hw/ppc/spapr_pci.c
> > @@ -1663,6 +1663,7 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
> >  
> >          if (pc->is_bridge) {
> >              error_setg(errp, "PCI: Hot unplug of PCI bridges not supported");
> > +            return;
> >          }
> >  
> >          /* ensure any other present functions are pending unplug */
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation
  2020-03-26 11:57   ` Greg Kurz
@ 2020-03-26 23:55     ` David Gibson
  0 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2020-03-26 23:55 UTC (permalink / raw)
  To: Greg Kurz; +Cc: aik, qemu-ppc, clg, qemu-devel


[-- Attachment #1: Type: text/plain, Size: 7317 bytes --]

On Thu, Mar 26, 2020 at 12:57:38PM +0100, Greg Kurz wrote:
65;5803;1c> On Thu, 26 Mar 2020 16:40:06 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > Currently spapr_phb_nvgpu_populate_pcidev_dt() works a little cryptically.
> > It steps through all the NVLink2 GPUs and NPUs and if they match the device
> > we're called for, we generate the relevant device tree information.
> > 
> > Make this a little more obvious by introducing helpers to determine it a
> 
> ... to determine if a

Fixed, thanks.

> 
> > given PCI device is an NVLink2 GPU or NPU, returning the NVLink2 slot and
> > link number information as well.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> 
> LGTM
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> >  hw/ppc/spapr_pci_nvlink2.c | 115 +++++++++++++++++++++++++------------
> >  1 file changed, 79 insertions(+), 36 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
> > index 8332d5694e..7d3a685421 100644
> > --- a/hw/ppc/spapr_pci_nvlink2.c
> > +++ b/hw/ppc/spapr_pci_nvlink2.c
> > @@ -390,13 +390,12 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
> >  
> >  }
> >  
> > -void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
> > -                                        SpaprPhbState *sphb)
> > +static bool is_nvgpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot)
> >  {
> > -    int i, j;
> > +    int i;
> >  
> >      if (!sphb->nvgpus) {
> > -        return;
> > +        return false;
> >      }
> >  
> >      for (i = 0; i < sphb->nvgpus->num; ++i) {
> > @@ -406,47 +405,91 @@ void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
> >          if (!nvslot->gpdev) {
> >              continue;
> >          }
> > +
> >          if (dev == nvslot->gpdev) {
> > -            uint32_t npus[nvslot->linknum];
> > +            if (slot) {
> > +                *slot = i;
> > +            }
> > +            return true;
> > +        }
> > +    }
> >  
> > -            for (j = 0; j < nvslot->linknum; ++j) {
> > -                PCIDevice *npdev = nvslot->links[j].npdev;
> > +    return false;
> > +}
> >  
> > -                npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
> > -            }
> > -            _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
> > -                             j * sizeof(npus[0])));
> > -            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
> > -                                   PHANDLE_PCIDEV(sphb, dev))));
> > +static bool is_nvnpu(PCIDevice *dev, SpaprPhbState *sphb, int *slot, int *link)
> > +{
> > +    int i, j;
> > +
> > +    if (!sphb->nvgpus) {
> > +        return false;
> > +    }
> > +
> > +    for (i = 0; i < sphb->nvgpus->num; ++i) {
> > +        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
> > +
> > +        /* Skip "slot" without attached GPU */
> > +        if (!nvslot->gpdev) {
> >              continue;
> >          }
> >  
> >          for (j = 0; j < nvslot->linknum; ++j) {
> > -            if (dev != nvslot->links[j].npdev) {
> > -                continue;
> > +            if (dev == nvslot->links[j].npdev) {
> > +                if (slot) {
> > +                    *slot = i;
> > +                }
> > +                if (link) {
> > +                    *link = j;
> > +                }
> > +                return true;
> >              }
> > +        }
> > +    }
> >  
> > -            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
> > -                                   PHANDLE_PCIDEV(sphb, dev))));
> > -            _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
> > -                                  PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
> > -            _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
> > -                                   PHANDLE_NVLINK(sphb, i, j))));
> > -            /*
> > -             * If we ever want to emulate GPU RAM at the same location as on
> > -             * the host - here is the encoding GPA->TGT:
> > -             *
> > -             * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
> > -             * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
> > -             * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
> > -             * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
> > -             */
> > -            _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
> > -                                  PHANDLE_GPURAM(sphb, i)));
> > -            _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
> > -                                 nvslot->tgt));
> > -            _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
> > -                                  nvslot->links[j].link_speed));
> > +    return false;
> > +}
> > +
> > +void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
> > +                                        SpaprPhbState *sphb)
> > +{
> > +    int slot, link;
> > +
> > +    if (is_nvgpu(dev, sphb, &slot)) {
> > +        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[slot];
> > +        uint32_t npus[nvslot->linknum];
> > +
> > +        for (link = 0; link < nvslot->linknum; ++link) {
> > +            PCIDevice *npdev = nvslot->links[link].npdev;
> > +
> > +            npus[link] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
> >          }
> > +        _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
> > +                         link * sizeof(npus[0])));
> > +        _FDT((fdt_setprop_cell(fdt, offset, "phandle",
> > +                               PHANDLE_PCIDEV(sphb, dev))));
> > +    } else if (is_nvnpu(dev, sphb, &slot, &link)) {
> > +        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[slot];
> > +
> > +        _FDT((fdt_setprop_cell(fdt, offset, "phandle",
> > +                               PHANDLE_PCIDEV(sphb, dev))));
> > +        _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
> > +                              PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
> > +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
> > +                               PHANDLE_NVLINK(sphb, slot, link))));
> > +        /*
> > +         * If we ever want to emulate GPU RAM at the same location as
> > +         * on the host - here is the encoding GPA->TGT:
> > +         *
> > +         * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
> > +         * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
> > +         * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
> > +         * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
> > +         */
> > +        _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
> > +                              PHANDLE_GPURAM(sphb, slot)));
> > +        _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
> > +                             nvslot->tgt));
> > +        _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
> > +                              nvslot->links[link].link_speed));
> >      }
> >  }
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices
  2020-03-26 12:27   ` Greg Kurz
@ 2020-03-26 23:56     ` David Gibson
  0 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2020-03-26 23:56 UTC (permalink / raw)
  To: Greg Kurz; +Cc: aik, qemu-ppc, clg, qemu-devel


[-- Attachment #1: Type: text/plain, Size: 1912 bytes --]

On Thu, Mar 26, 2020 at 01:27:40PM +0100, Greg Kurz wrote:
> On Thu, 26 Mar 2020 16:40:09 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > Currently, we can't properly handle unplug of NVLink2 devices, because we
> > don't have code to tear down their special memory resources.  There's not
> > a lot of impetus to implement that. Since hardware NVLink2 devices can't
> > be hot unplugged, the guest side drivers don't usually support unplug
> > anyway.
> > 
> > Therefore, simply prevent unplug of NVLink2 devices.
> > 
> 
> This could maybe considered as a valid fix for 5.0 since this prevents
> guest crashes IIUC. But since this requires the two preliminary cleanup
> patches, I understand you may prefer to postpone that to 5.1.

Yeah, it's arguably a bug, but not a regression, so I'm inclined to
leave it to 5.1.

> 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> >  hw/ppc/spapr_pci.c | 5 +++++
> >  1 file changed, 5 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > index 55ca9dee1e..5c8262413a 100644
> > --- a/hw/ppc/spapr_pci.c
> > +++ b/hw/ppc/spapr_pci.c
> > @@ -1666,6 +1666,11 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
> >              return;
> >          }
> >  
> > +        if (spapr_phb_is_nvlink_dev(pdev, phb)) {
> > +            error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
> > +            return;
> > +        }
> > +
> >          /* ensure any other present functions are pending unplug */
> >          if (PCI_FUNC(pdev->devfn) == 0) {
> >              for (i = 1; i < 8; i++) {
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices
  2020-03-26  5:40 ` [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices David Gibson
  2020-03-26 12:27   ` Greg Kurz
@ 2020-03-28 12:32   ` Alexey Kardashevskiy
  2020-03-31  3:25     ` David Gibson
  1 sibling, 1 reply; 14+ messages in thread
From: Alexey Kardashevskiy @ 2020-03-28 12:32 UTC (permalink / raw)
  To: David Gibson, groug; +Cc: qemu-ppc, clg, qemu-devel



On 26/03/2020 16:40, David Gibson wrote:
> Currently, we can't properly handle unplug of NVLink2 devices, because we
> don't have code to tear down their special memory resources.  There's not
> a lot of impetus to implement that. Since hardware NVLink2 devices can't
> be hot unplugged, the guest side drivers don't usually support unplug
> anyway.
> 
> Therefore, simply prevent unplug of NVLink2 devices.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr_pci.c | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 55ca9dee1e..5c8262413a 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1666,6 +1666,11 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
>              return;
>          }
>  
> +        if (spapr_phb_is_nvlink_dev(pdev, phb)) {
> +            error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
> +            return;
> +        }


Just this would do as well:

Object *po = OBJECT(pdev);
uint64_t tgt = object_property_get_uint(po, "nvlink2-tgt", NULL);

if (tgt) {
     error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
     return;
}

honestly, I admin what 1/4 fixes is cryptic but since there is not going
to be any more new nvlinkX, this does not deserve this many patches imho.

	

> +
>          /* ensure any other present functions are pending unplug */
>          if (PCI_FUNC(pdev->devfn) == 0) {
>              for (i = 1; i < 8; i++) {
> 

-- 
Alexey


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices
  2020-03-28 12:32   ` Alexey Kardashevskiy
@ 2020-03-31  3:25     ` David Gibson
  0 siblings, 0 replies; 14+ messages in thread
From: David Gibson @ 2020-03-31  3:25 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: qemu-devel, qemu-ppc, groug, clg


[-- Attachment #1: Type: text/plain, Size: 1985 bytes --]

On Sat, Mar 28, 2020 at 11:32:18PM +1100, Alexey Kardashevskiy wrote:
> 
> 
> On 26/03/2020 16:40, David Gibson wrote:
> > Currently, we can't properly handle unplug of NVLink2 devices, because we
> > don't have code to tear down their special memory resources.  There's not
> > a lot of impetus to implement that. Since hardware NVLink2 devices can't
> > be hot unplugged, the guest side drivers don't usually support unplug
> > anyway.
> > 
> > Therefore, simply prevent unplug of NVLink2 devices.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr_pci.c | 5 +++++
> >  1 file changed, 5 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > index 55ca9dee1e..5c8262413a 100644
> > --- a/hw/ppc/spapr_pci.c
> > +++ b/hw/ppc/spapr_pci.c
> > @@ -1666,6 +1666,11 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
> >              return;
> >          }
> >  
> > +        if (spapr_phb_is_nvlink_dev(pdev, phb)) {
> > +            error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
> > +            return;
> > +        }
> 
> 
> Just this would do as well:
> 
> Object *po = OBJECT(pdev);
> uint64_t tgt = object_property_get_uint(po, "nvlink2-tgt", NULL);
> 
> if (tgt) {
>      error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
>      return;
> }
> 
> honestly, I admin what 1/4 fixes is cryptic but since there is not going
> to be any more new nvlinkX, this does not deserve this many patches
> imho.

Good point, that is a simpler approach.

> 
> 	
> 
> > +
> >          /* ensure any other present functions are pending unplug */
> >          if (PCI_FUNC(pdev->devfn) == 0) {
> >              for (i = 1; i < 8; i++) {
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-26  5:40 [RFC for-5.1 0/4] Better handling of attempt NVLink2 unplug David Gibson
2020-03-26  5:40 ` [RFC for-5.1 1/4] spapr: Refactor locating NVLink2 devices for device tree creation David Gibson
2020-03-26 11:57   ` Greg Kurz
2020-03-26 23:55     ` David Gibson
2020-03-26  5:40 ` [RFC for-5.1 2/4] spapr: Helper to determine if a device is NVLink2 related David Gibson
2020-03-26 11:58   ` Greg Kurz
2020-03-26  5:40 ` [RFC for-5.1 3/4] spapr: Fix failure path for attempting to hot unplug PCI bridges David Gibson
2020-03-26 12:18   ` Greg Kurz
2020-03-26 23:54     ` David Gibson
2020-03-26  5:40 ` [RFC for-5.1 4/4] spapr: Don't allow unplug of NVLink2 devices David Gibson
2020-03-26 12:27   ` Greg Kurz
2020-03-26 23:56     ` David Gibson
2020-03-28 12:32   ` Alexey Kardashevskiy
2020-03-31  3:25     ` David Gibson

QEMU-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/qemu-devel/0 qemu-devel/git/0.git
	git clone --mirror https://lore.kernel.org/qemu-devel/1 qemu-devel/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 qemu-devel qemu-devel/ https://lore.kernel.org/qemu-devel \
		qemu-devel@nongnu.org
	public-inbox-index qemu-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.nongnu.qemu-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git