All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id
@ 2016-07-27  8:03 Alexey Kardashevskiy
  2016-08-10  5:42 ` Alexey Kardashevskiy
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2016-07-27  8:03 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexey Kardashevskiy, qemu-ppc, David Gibson, Michael Roth

This adds a numa id property to a PHB to allow linking passed PCI device
to CPU/memory. It is up to the management stack to do CPU/memory pinning
to the node with the actual PCI device.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/ppc/spapr_pci.c          | 13 +++++++++++++
 include/hw/pci-host/spapr.h |  2 ++
 2 files changed, 15 insertions(+)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 949c44f..af5394a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -47,6 +47,7 @@
 #include "sysemu/device_tree.h"
 #include "sysemu/kvm.h"
 #include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
 
 #include "hw/vfio/vfio.h"
 
@@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
     DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
     DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
                        (1ULL << 12) | (1ULL << 16)),
+    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
         cpu_to_be32(1),
         cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
     };
+    uint32_t associativity[] = {cpu_to_be32(0x4),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(phb->numa_node)};
     sPAPRTCETable *tcet;
     PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
     sPAPRFDT s_fdt;
@@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
                          &ddw_extensions, sizeof(ddw_extensions)));
     }
 
+    /* Advertise NUMA via ibm,associativity */
+    if (nb_numa_nodes > 1) {
+        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
+                         sizeof(associativity)));
+    }
+
     /* Build the interrupt-map, this must matches what is done
      * in pci_spapr_map_irq
      */
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 5adc603..53c4b2d 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -75,6 +75,8 @@ struct sPAPRPHBState {
     bool ddw_enabled;
     uint64_t page_size_mask;
     uint64_t dma64_win_addr;
+
+    uint32_t numa_node;
 };
 
 #define SPAPR_PCI_MAX_INDEX          255
-- 
2.5.0.rc3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id
  2016-07-27  8:03 [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id Alexey Kardashevskiy
@ 2016-08-10  5:42 ` Alexey Kardashevskiy
  2016-09-13 23:29 ` Michael Roth
  2016-09-23  1:58 ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2 siblings, 0 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2016-08-10  5:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-ppc, David Gibson, Michael Roth, Anton Blanchard

On 27/07/16 18:03, Alexey Kardashevskiy wrote:
> This adds a numa id property to a PHB to allow linking passed PCI device
> to CPU/memory. It is up to the management stack to do CPU/memory pinning
> to the node with the actual PCI device.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Ping?


> ---
>  hw/ppc/spapr_pci.c          | 13 +++++++++++++
>  include/hw/pci-host/spapr.h |  2 ++
>  2 files changed, 15 insertions(+)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 949c44f..af5394a 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -47,6 +47,7 @@
>  #include "sysemu/device_tree.h"
>  #include "sysemu/kvm.h"
>  #include "sysemu/hostmem.h"
> +#include "sysemu/numa.h"
>  
>  #include "hw/vfio/vfio.h"
>  
> @@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
>      DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
>      DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
>                         (1ULL << 12) | (1ULL << 16)),
> +    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>          cpu_to_be32(1),
>          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
>      };
> +    uint32_t associativity[] = {cpu_to_be32(0x4),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(phb->numa_node)};
>      sPAPRTCETable *tcet;
>      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>      sPAPRFDT s_fdt;
> @@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>                           &ddw_extensions, sizeof(ddw_extensions)));
>      }
>  
> +    /* Advertise NUMA via ibm,associativity */
> +    if (nb_numa_nodes > 1) {
> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
> +                         sizeof(associativity)));
> +    }
> +
>      /* Build the interrupt-map, this must matches what is done
>       * in pci_spapr_map_irq
>       */
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 5adc603..53c4b2d 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -75,6 +75,8 @@ struct sPAPRPHBState {
>      bool ddw_enabled;
>      uint64_t page_size_mask;
>      uint64_t dma64_win_addr;
> +
> +    uint32_t numa_node;
>  };
>  
>  #define SPAPR_PCI_MAX_INDEX          255
> 


-- 
Alexey

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id
  2016-07-27  8:03 [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id Alexey Kardashevskiy
  2016-08-10  5:42 ` Alexey Kardashevskiy
@ 2016-09-13 23:29 ` Michael Roth
  2016-09-14  9:39   ` Alexey Kardashevskiy
  2016-09-23  1:58 ` [Qemu-devel] [Qemu-ppc] " David Gibson
  2 siblings, 1 reply; 10+ messages in thread
From: Michael Roth @ 2016-09-13 23:29 UTC (permalink / raw)
  To: Alexey Kardashevskiy, qemu-devel; +Cc: qemu-ppc, David Gibson, sbhat

Quoting Alexey Kardashevskiy (2016-07-27 03:03:38)
> This adds a numa id property to a PHB to allow linking passed PCI device
> to CPU/memory. It is up to the management stack to do CPU/memory pinning
> to the node with the actual PCI device.

It looks like x86 relies on PCIBus->numa_node() method (via
pci_bus_numa_node()) to encode similar PCIBus affinities
into ACPI tables, and currently exposes it via
-device pci-[-express]-expander-bus,numa_node=X.

Maybe we should implement it using this existing
PCIBus->numa_node() interface?

We'd still have to expose numa_node as a spapr-pci-host-bridge
device option though. Not sure if there's a more common way
to expose it that might be easier for libvirt to discover. As it
stands we'd need to add spapr-pci-host-bridge to a libvirt
whitelist that currently only covers pci-expander-bus.

Cc'ing Shiva who was looking into the libvirt side.

One comment below:

> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  hw/ppc/spapr_pci.c          | 13 +++++++++++++
>  include/hw/pci-host/spapr.h |  2 ++
>  2 files changed, 15 insertions(+)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 949c44f..af5394a 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -47,6 +47,7 @@
>  #include "sysemu/device_tree.h"
>  #include "sysemu/kvm.h"
>  #include "sysemu/hostmem.h"
> +#include "sysemu/numa.h"
> 
>  #include "hw/vfio/vfio.h"
> 
> @@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
>      DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
>      DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
>                         (1ULL << 12) | (1ULL << 16)),
> +    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
>      DEFINE_PROP_END_OF_LIST(),
>  };
> 
> @@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>          cpu_to_be32(1),
>          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
>      };
> +    uint32_t associativity[] = {cpu_to_be32(0x4),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(phb->numa_node)};
>      sPAPRTCETable *tcet;
>      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>      sPAPRFDT s_fdt;
> @@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>                           &ddw_extensions, sizeof(ddw_extensions)));
>      }
> 
> +    /* Advertise NUMA via ibm,associativity */
> +    if (nb_numa_nodes > 1) {
> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
> +                         sizeof(associativity)));
> +    }

LoPAPR 15.3 seems to suggest that ibm,associativity-reference-points is
required alongside ibm,associativity for each DT node it appears in,
and since we hardcode "Form 1" affinity it should be done similarly as
the entry we place in the top-level DT node.

> +
>      /* Build the interrupt-map, this must matches what is done
>       * in pci_spapr_map_irq
>       */
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 5adc603..53c4b2d 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -75,6 +75,8 @@ struct sPAPRPHBState {
>      bool ddw_enabled;
>      uint64_t page_size_mask;
>      uint64_t dma64_win_addr;
> +
> +    uint32_t numa_node;
>  };
> 
>  #define SPAPR_PCI_MAX_INDEX          255
> -- 
> 2.5.0.rc3
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id
  2016-09-13 23:29 ` Michael Roth
@ 2016-09-14  9:39   ` Alexey Kardashevskiy
  2016-09-14 12:03     ` Michael Roth
                       ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Alexey Kardashevskiy @ 2016-09-14  9:39 UTC (permalink / raw)
  To: Michael Roth, qemu-devel; +Cc: qemu-ppc, David Gibson, sbhat, Bharata B Rao

On 14/09/16 09:29, Michael Roth wrote:
> Quoting Alexey Kardashevskiy (2016-07-27 03:03:38)
>> This adds a numa id property to a PHB to allow linking passed PCI device
>> to CPU/memory. It is up to the management stack to do CPU/memory pinning
>> to the node with the actual PCI device.
> 
> It looks like x86 relies on PCIBus->numa_node() method (via
> pci_bus_numa_node()) to encode similar PCIBus affinities
> into ACPI tables, and currently exposes it via
> -device pci-[-express]-expander-bus,numa_node=X.



Well, until we allow DMA windows per PCI bus (not per PHB as it is now),
this won't make much sense for us (unless I am missing something here).


> Maybe we should implement it using this existing
> PCIBus->numa_node() interface?
> 
> We'd still have to expose numa_node as a spapr-pci-host-bridge
> device option though. Not sure if there's a more common way
> to expose it that might be easier for libvirt to discover. As it
> stands we'd need to add spapr-pci-host-bridge to a libvirt
> whitelist that currently only covers pci-expander-bus.
> 
> Cc'ing Shiva who was looking into the libvirt side.
> 
> One comment below:
> 
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  hw/ppc/spapr_pci.c          | 13 +++++++++++++
>>  include/hw/pci-host/spapr.h |  2 ++
>>  2 files changed, 15 insertions(+)
>>
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index 949c44f..af5394a 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -47,6 +47,7 @@
>>  #include "sysemu/device_tree.h"
>>  #include "sysemu/kvm.h"
>>  #include "sysemu/hostmem.h"
>> +#include "sysemu/numa.h"
>>
>>  #include "hw/vfio/vfio.h"
>>
>> @@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
>>      DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
>>      DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
>>                         (1ULL << 12) | (1ULL << 16)),
>> +    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>>
>> @@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>          cpu_to_be32(1),
>>          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
>>      };
>> +    uint32_t associativity[] = {cpu_to_be32(0x4),
>> +                                cpu_to_be32(0x0),
>> +                                cpu_to_be32(0x0),
>> +                                cpu_to_be32(0x0),
>> +                                cpu_to_be32(phb->numa_node)};
>>      sPAPRTCETable *tcet;
>>      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>>      sPAPRFDT s_fdt;
>> @@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>>                           &ddw_extensions, sizeof(ddw_extensions)));
>>      }
>>
>> +    /* Advertise NUMA via ibm,associativity */
>> +    if (nb_numa_nodes > 1) {
>> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
>> +                         sizeof(associativity)));
>> +    }
> 
> LoPAPR 15.3 seems to suggest that ibm,associativity-reference-points is
> required alongside ibm,associativity for each DT node it appears in,
> and since we hardcode "Form 1" affinity it should be done similarly as
> the entry we place in the top-level DT node.


Hm, okay, I'll add it. There is a question to Bharata - why do we have 4s
in spapr_create_fdt_skel()'s refpoints? Just a random pick?

vec5[5]==0x80 means we are doing "Form1" and these 4s are far/near distances?


>> +
>>      /* Build the interrupt-map, this must matches what is done
>>       * in pci_spapr_map_irq
>>       */
>> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
>> index 5adc603..53c4b2d 100644
>> --- a/include/hw/pci-host/spapr.h
>> +++ b/include/hw/pci-host/spapr.h
>> @@ -75,6 +75,8 @@ struct sPAPRPHBState {
>>      bool ddw_enabled;
>>      uint64_t page_size_mask;
>>      uint64_t dma64_win_addr;
>> +
>> +    uint32_t numa_node;
>>  };
>>
>>  #define SPAPR_PCI_MAX_INDEX          255
>> -- 
>> 2.5.0.rc3
>>
> 


-- 
Alexey

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id
  2016-09-14  9:39   ` Alexey Kardashevskiy
@ 2016-09-14 12:03     ` Michael Roth
  2016-09-22  4:49       ` David Gibson
  2016-09-19  3:39     ` [Qemu-devel] " Bharata B Rao
  2016-09-22  4:48     ` David Gibson
  2 siblings, 1 reply; 10+ messages in thread
From: Michael Roth @ 2016-09-14 12:03 UTC (permalink / raw)
  To: Alexey Kardashevskiy, qemu-devel
  Cc: qemu-ppc, David Gibson, sbhat, Bharata B Rao

Quoting Alexey Kardashevskiy (2016-09-14 04:39:10)
> On 14/09/16 09:29, Michael Roth wrote:
> > Quoting Alexey Kardashevskiy (2016-07-27 03:03:38)
> >> This adds a numa id property to a PHB to allow linking passed PCI device
> >> to CPU/memory. It is up to the management stack to do CPU/memory pinning
> >> to the node with the actual PCI device.
> > 
> > It looks like x86 relies on PCIBus->numa_node() method (via
> > pci_bus_numa_node()) to encode similar PCIBus affinities
> > into ACPI tables, and currently exposes it via
> > -device pci-[-express]-expander-bus,numa_node=X.
> 
> 
> 
> Well, until we allow DMA windows per PCI bus (not per PHB as it is now),
> this won't make much sense for us (unless I am missing something here).

I didn't consider that it's not a bus-level setting; I think
you're right that re-using the interface to both store/retrieve doesn't
make much sense in that case.

My thought that was that since pci_bus_numa_node() could in theory come
to be relied upon by common PCI code, that we should use it as well. But
even if it doesn't make sense for us to use it, wouldn't it make sense to
still set PCIBus->numa_node (in addition to the PHB-wide value) in the
off-chance that common code does come to rely on pci_bus_numa_node()?

> 
> 
> > Maybe we should implement it using this existing
> > PCIBus->numa_node() interface?
> > 
> > We'd still have to expose numa_node as a spapr-pci-host-bridge
> > device option though. Not sure if there's a more common way
> > to expose it that might be easier for libvirt to discover. As it
> > stands we'd need to add spapr-pci-host-bridge to a libvirt
> > whitelist that currently only covers pci-expander-bus.
> > 
> > Cc'ing Shiva who was looking into the libvirt side.
> > 
> > One comment below:
> > 
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >> ---
> >>  hw/ppc/spapr_pci.c          | 13 +++++++++++++
> >>  include/hw/pci-host/spapr.h |  2 ++
> >>  2 files changed, 15 insertions(+)
> >>
> >> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> >> index 949c44f..af5394a 100644
> >> --- a/hw/ppc/spapr_pci.c
> >> +++ b/hw/ppc/spapr_pci.c
> >> @@ -47,6 +47,7 @@
> >>  #include "sysemu/device_tree.h"
> >>  #include "sysemu/kvm.h"
> >>  #include "sysemu/hostmem.h"
> >> +#include "sysemu/numa.h"
> >>
> >>  #include "hw/vfio/vfio.h"
> >>
> >> @@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
> >>      DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
> >>      DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
> >>                         (1ULL << 12) | (1ULL << 16)),
> >> +    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
> >>      DEFINE_PROP_END_OF_LIST(),
> >>  };
> >>
> >> @@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> >>          cpu_to_be32(1),
> >>          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
> >>      };
> >> +    uint32_t associativity[] = {cpu_to_be32(0x4),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(phb->numa_node)};
> >>      sPAPRTCETable *tcet;
> >>      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
> >>      sPAPRFDT s_fdt;
> >> @@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> >>                           &ddw_extensions, sizeof(ddw_extensions)));
> >>      }
> >>
> >> +    /* Advertise NUMA via ibm,associativity */
> >> +    if (nb_numa_nodes > 1) {
> >> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
> >> +                         sizeof(associativity)));
> >> +    }
> > 
> > LoPAPR 15.3 seems to suggest that ibm,associativity-reference-points is
> > required alongside ibm,associativity for each DT node it appears in,
> > and since we hardcode "Form 1" affinity it should be done similarly as
> > the entry we place in the top-level DT node.
> 
> 
> Hm, okay, I'll add it. There is a question to Bharata - why do we have 4s
> in spapr_create_fdt_skel()'s refpoints? Just a random pick?
> 
> vec5[5]==0x80 means we are doing "Form1" and these 4s are far/near distances?
> 
> 
> >> +
> >>      /* Build the interrupt-map, this must matches what is done
> >>       * in pci_spapr_map_irq
> >>       */
> >> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> >> index 5adc603..53c4b2d 100644
> >> --- a/include/hw/pci-host/spapr.h
> >> +++ b/include/hw/pci-host/spapr.h
> >> @@ -75,6 +75,8 @@ struct sPAPRPHBState {
> >>      bool ddw_enabled;
> >>      uint64_t page_size_mask;
> >>      uint64_t dma64_win_addr;
> >> +
> >> +    uint32_t numa_node;
> >>  };
> >>
> >>  #define SPAPR_PCI_MAX_INDEX          255
> >> -- 
> >> 2.5.0.rc3
> >>
> > 
> 
> 
> -- 
> Alexey
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id
  2016-09-14  9:39   ` Alexey Kardashevskiy
  2016-09-14 12:03     ` Michael Roth
@ 2016-09-19  3:39     ` Bharata B Rao
  2016-09-22  4:48     ` David Gibson
  2 siblings, 0 replies; 10+ messages in thread
From: Bharata B Rao @ 2016-09-19  3:39 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: Michael Roth, qemu-devel, qemu-ppc, David Gibson, sbhat

On Wed, Sep 14, 2016 at 07:39:10PM +1000, Alexey Kardashevskiy wrote:
> On 14/09/16 09:29, Michael Roth wrote:
> > Quoting Alexey Kardashevskiy (2016-07-27 03:03:38)
> >> This adds a numa id property to a PHB to allow linking passed PCI device
> >> to CPU/memory. It is up to the management stack to do CPU/memory pinning
> >> to the node with the actual PCI device.
> > 
> > It looks like x86 relies on PCIBus->numa_node() method (via
> > pci_bus_numa_node()) to encode similar PCIBus affinities
> > into ACPI tables, and currently exposes it via
> > -device pci-[-express]-expander-bus,numa_node=X.
> 
> 
> 
> Well, until we allow DMA windows per PCI bus (not per PHB as it is now),
> this won't make much sense for us (unless I am missing something here).
> 
> 
> > Maybe we should implement it using this existing
> > PCIBus->numa_node() interface?
> > 
> > We'd still have to expose numa_node as a spapr-pci-host-bridge
> > device option though. Not sure if there's a more common way
> > to expose it that might be easier for libvirt to discover. As it
> > stands we'd need to add spapr-pci-host-bridge to a libvirt
> > whitelist that currently only covers pci-expander-bus.
> > 
> > Cc'ing Shiva who was looking into the libvirt side.
> > 
> > One comment below:
> > 
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >> ---
> >>  hw/ppc/spapr_pci.c          | 13 +++++++++++++
> >>  include/hw/pci-host/spapr.h |  2 ++
> >>  2 files changed, 15 insertions(+)
> >>
> >> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> >> index 949c44f..af5394a 100644
> >> --- a/hw/ppc/spapr_pci.c
> >> +++ b/hw/ppc/spapr_pci.c
> >> @@ -47,6 +47,7 @@
> >>  #include "sysemu/device_tree.h"
> >>  #include "sysemu/kvm.h"
> >>  #include "sysemu/hostmem.h"
> >> +#include "sysemu/numa.h"
> >>
> >>  #include "hw/vfio/vfio.h"
> >>
> >> @@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
> >>      DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
> >>      DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
> >>                         (1ULL << 12) | (1ULL << 16)),
> >> +    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
> >>      DEFINE_PROP_END_OF_LIST(),
> >>  };
> >>
> >> @@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> >>          cpu_to_be32(1),
> >>          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
> >>      };
> >> +    uint32_t associativity[] = {cpu_to_be32(0x4),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(phb->numa_node)};
> >>      sPAPRTCETable *tcet;
> >>      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
> >>      sPAPRFDT s_fdt;
> >> @@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> >>                           &ddw_extensions, sizeof(ddw_extensions)));
> >>      }
> >>
> >> +    /* Advertise NUMA via ibm,associativity */
> >> +    if (nb_numa_nodes > 1) {
> >> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
> >> +                         sizeof(associativity)));
> >> +    }
> > 
> > LoPAPR 15.3 seems to suggest that ibm,associativity-reference-points is
> > required alongside ibm,associativity for each DT node it appears in,
> > and since we hardcode "Form 1" affinity it should be done similarly as
> > the entry we place in the top-level DT node.
> 
> 
> Hm, okay, I'll add it. There is a question to Bharata - why do we have 4s
> in spapr_create_fdt_skel()'s refpoints? Just a random pick?

I remember basing it on what I saw in an LPAR>

> 
> vec5[5]==0x80 means we are doing "Form1" and these 4s are far/near distances?

This comment from https://github.com/open-power/skiboot/blob/master/core/affinity.c should explain things:

/*
 *
 * We currently construct our associativity properties as such:
 *
 * - For "chip" devices (bridges, memory, ...), 4 entries:
 *
 *     - CCM node ID
 *     - HW card ID
 *     - HW module ID
 *     - Chip ID
 *
 *   The information is constructed based on the chip ID which (unlike
 *   pHyp) is our HW chip ID (aka "XSCOM" chip ID). We use it to retrieve
 *   the other properties from the corresponding chip/xscom node in the
 *   device-tree. If those properties are absent, 0 is used.
 *
 * - For "core" devices, we add a 5th entry:
 *
 *     - Core ID
 *
 *   Here too, we do not use the "cooked" HW processor ID from HDAT but
 *   instead use the real HW core ID which is basically the interrupt
 *   server number of thread 0 on that core.
 *
 *
 * The ibm,associativity-reference-points property is currently set to
 * 4,4 indicating that the chip ID is our only reference point. This
 * should be extended to encompass the node IDs eventually.
 */

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id
  2016-09-14  9:39   ` Alexey Kardashevskiy
  2016-09-14 12:03     ` Michael Roth
  2016-09-19  3:39     ` [Qemu-devel] " Bharata B Rao
@ 2016-09-22  4:48     ` David Gibson
  2 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2016-09-22  4:48 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: Michael Roth, qemu-devel, qemu-ppc, sbhat, Bharata B Rao

[-- Attachment #1: Type: text/plain, Size: 5035 bytes --]

On Wed, Sep 14, 2016 at 07:39:10PM +1000, Alexey Kardashevskiy wrote:
> On 14/09/16 09:29, Michael Roth wrote:
> > Quoting Alexey Kardashevskiy (2016-07-27 03:03:38)
> >> This adds a numa id property to a PHB to allow linking passed PCI device
> >> to CPU/memory. It is up to the management stack to do CPU/memory pinning
> >> to the node with the actual PCI device.
> > 
> > It looks like x86 relies on PCIBus->numa_node() method (via
> > pci_bus_numa_node()) to encode similar PCIBus affinities
> > into ACPI tables, and currently exposes it via
> > -device pci-[-express]-expander-bus,numa_node=X.
> 
> Well, until we allow DMA windows per PCI bus (not per PHB as it is now),
> this won't make much sense for us (unless I am missing something
> here).

I can't actually see any sane way we could have NUMA associativity of
PCI at any finer granularity than the host bridge level.  I suspect
the only reason it works that way on x86 is due to some of the weird
stuff PC does to make what are effectively different host bridges
appear as different buses in a single logical domain.

> 
> > Maybe we should implement it using this existing
> > PCIBus->numa_node() interface?
> > 
> > We'd still have to expose numa_node as a spapr-pci-host-bridge
> > device option though. Not sure if there's a more common way
> > to expose it that might be easier for libvirt to discover. As it
> > stands we'd need to add spapr-pci-host-bridge to a libvirt
> > whitelist that currently only covers pci-expander-bus.
> > 
> > Cc'ing Shiva who was looking into the libvirt side.

I think we should change the actual name of the option to "numa_node"
to match the option used on the pxb, though.

> > 
> > One comment below:
> > 
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >> ---
> >>  hw/ppc/spapr_pci.c          | 13 +++++++++++++
> >>  include/hw/pci-host/spapr.h |  2 ++
> >>  2 files changed, 15 insertions(+)
> >>
> >> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> >> index 949c44f..af5394a 100644
> >> --- a/hw/ppc/spapr_pci.c
> >> +++ b/hw/ppc/spapr_pci.c
> >> @@ -47,6 +47,7 @@
> >>  #include "sysemu/device_tree.h"
> >>  #include "sysemu/kvm.h"
> >>  #include "sysemu/hostmem.h"
> >> +#include "sysemu/numa.h"
> >>
> >>  #include "hw/vfio/vfio.h"
> >>
> >> @@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
> >>      DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
> >>      DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
> >>                         (1ULL << 12) | (1ULL << 16)),
> >> +    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
> >>      DEFINE_PROP_END_OF_LIST(),
> >>  };
> >>
> >> @@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> >>          cpu_to_be32(1),
> >>          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
> >>      };
> >> +    uint32_t associativity[] = {cpu_to_be32(0x4),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(0x0),
> >> +                                cpu_to_be32(phb->numa_node)};
> >>      sPAPRTCETable *tcet;
> >>      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
> >>      sPAPRFDT s_fdt;
> >> @@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> >>                           &ddw_extensions, sizeof(ddw_extensions)));
> >>      }
> >>
> >> +    /* Advertise NUMA via ibm,associativity */
> >> +    if (nb_numa_nodes > 1) {
> >> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
> >> +                         sizeof(associativity)));
> >> +    }
> > 
> > LoPAPR 15.3 seems to suggest that ibm,associativity-reference-points is
> > required alongside ibm,associativity for each DT node it appears in,
> > and since we hardcode "Form 1" affinity it should be done similarly as
> > the entry we place in the top-level DT node.
> 
> 
> Hm, okay, I'll add it. There is a question to Bharata - why do we have 4s
> in spapr_create_fdt_skel()'s refpoints? Just a random pick?
> 
> vec5[5]==0x80 means we are doing "Form1" and these 4s are far/near distances?
> 
> 
> >> +
> >>      /* Build the interrupt-map, this must matches what is done
> >>       * in pci_spapr_map_irq
> >>       */
> >> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> >> index 5adc603..53c4b2d 100644
> >> --- a/include/hw/pci-host/spapr.h
> >> +++ b/include/hw/pci-host/spapr.h
> >> @@ -75,6 +75,8 @@ struct sPAPRPHBState {
> >>      bool ddw_enabled;
> >>      uint64_t page_size_mask;
> >>      uint64_t dma64_win_addr;
> >> +
> >> +    uint32_t numa_node;
> >>  };
> >>
> >>  #define SPAPR_PCI_MAX_INDEX          255
> >>
> > 
> 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id
  2016-09-14 12:03     ` Michael Roth
@ 2016-09-22  4:49       ` David Gibson
  2016-09-23  1:59         ` [Qemu-devel] [Qemu-ppc] " David Gibson
  0 siblings, 1 reply; 10+ messages in thread
From: David Gibson @ 2016-09-22  4:49 UTC (permalink / raw)
  To: Michael Roth
  Cc: Alexey Kardashevskiy, qemu-devel, qemu-ppc, sbhat, Bharata B Rao

[-- Attachment #1: Type: text/plain, Size: 5592 bytes --]

On Wed, Sep 14, 2016 at 07:03:50AM -0500, Michael Roth wrote:
> Quoting Alexey Kardashevskiy (2016-09-14 04:39:10)
> > On 14/09/16 09:29, Michael Roth wrote:
> > > Quoting Alexey Kardashevskiy (2016-07-27 03:03:38)
> > >> This adds a numa id property to a PHB to allow linking passed PCI device
> > >> to CPU/memory. It is up to the management stack to do CPU/memory pinning
> > >> to the node with the actual PCI device.
> > > 
> > > It looks like x86 relies on PCIBus->numa_node() method (via
> > > pci_bus_numa_node()) to encode similar PCIBus affinities
> > > into ACPI tables, and currently exposes it via
> > > -device pci-[-express]-expander-bus,numa_node=X.
> > 
> > 
> > 
> > Well, until we allow DMA windows per PCI bus (not per PHB as it is now),
> > this won't make much sense for us (unless I am missing something here).
> 
> I didn't consider that it's not a bus-level setting; I think
> you're right that re-using the interface to both store/retrieve doesn't
> make much sense in that case.
> 
> My thought that was that since pci_bus_numa_node() could in theory come
> to be relied upon by common PCI code, that we should use it as well. But
> even if it doesn't make sense for us to use it, wouldn't it make sense to
> still set PCIBus->numa_node (in addition to the PHB-wide value) in the
> off-chance that common code does come to rely on
> pci_bus_numa_node()?

Yes, it would be a good idea to set the PCIBus node value to inherit
the one that's set for the host bridge, just in case any generic code
looks at it in future.

> 
> > 
> > 
> > > Maybe we should implement it using this existing
> > > PCIBus->numa_node() interface?
> > > 
> > > We'd still have to expose numa_node as a spapr-pci-host-bridge
> > > device option though. Not sure if there's a more common way
> > > to expose it that might be easier for libvirt to discover. As it
> > > stands we'd need to add spapr-pci-host-bridge to a libvirt
> > > whitelist that currently only covers pci-expander-bus.
> > > 
> > > Cc'ing Shiva who was looking into the libvirt side.
> > > 
> > > One comment below:
> > > 
> > >>
> > >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> > >> ---
> > >>  hw/ppc/spapr_pci.c          | 13 +++++++++++++
> > >>  include/hw/pci-host/spapr.h |  2 ++
> > >>  2 files changed, 15 insertions(+)
> > >>
> > >> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > >> index 949c44f..af5394a 100644
> > >> --- a/hw/ppc/spapr_pci.c
> > >> +++ b/hw/ppc/spapr_pci.c
> > >> @@ -47,6 +47,7 @@
> > >>  #include "sysemu/device_tree.h"
> > >>  #include "sysemu/kvm.h"
> > >>  #include "sysemu/hostmem.h"
> > >> +#include "sysemu/numa.h"
> > >>
> > >>  #include "hw/vfio/vfio.h"
> > >>
> > >> @@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
> > >>      DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
> > >>      DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
> > >>                         (1ULL << 12) | (1ULL << 16)),
> > >> +    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
> > >>      DEFINE_PROP_END_OF_LIST(),
> > >>  };
> > >>
> > >> @@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> > >>          cpu_to_be32(1),
> > >>          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
> > >>      };
> > >> +    uint32_t associativity[] = {cpu_to_be32(0x4),
> > >> +                                cpu_to_be32(0x0),
> > >> +                                cpu_to_be32(0x0),
> > >> +                                cpu_to_be32(0x0),
> > >> +                                cpu_to_be32(phb->numa_node)};
> > >>      sPAPRTCETable *tcet;
> > >>      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
> > >>      sPAPRFDT s_fdt;
> > >> @@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> > >>                           &ddw_extensions, sizeof(ddw_extensions)));
> > >>      }
> > >>
> > >> +    /* Advertise NUMA via ibm,associativity */
> > >> +    if (nb_numa_nodes > 1) {
> > >> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
> > >> +                         sizeof(associativity)));
> > >> +    }
> > > 
> > > LoPAPR 15.3 seems to suggest that ibm,associativity-reference-points is
> > > required alongside ibm,associativity for each DT node it appears in,
> > > and since we hardcode "Form 1" affinity it should be done similarly as
> > > the entry we place in the top-level DT node.
> > 
> > 
> > Hm, okay, I'll add it. There is a question to Bharata - why do we have 4s
> > in spapr_create_fdt_skel()'s refpoints? Just a random pick?
> > 
> > vec5[5]==0x80 means we are doing "Form1" and these 4s are far/near distances?
> > 
> > 
> > >> +
> > >>      /* Build the interrupt-map, this must matches what is done
> > >>       * in pci_spapr_map_irq
> > >>       */
> > >> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> > >> index 5adc603..53c4b2d 100644
> > >> --- a/include/hw/pci-host/spapr.h
> > >> +++ b/include/hw/pci-host/spapr.h
> > >> @@ -75,6 +75,8 @@ struct sPAPRPHBState {
> > >>      bool ddw_enabled;
> > >>      uint64_t page_size_mask;
> > >>      uint64_t dma64_win_addr;
> > >> +
> > >> +    uint32_t numa_node;
> > >>  };
> > >>
> > >>  #define SPAPR_PCI_MAX_INDEX          255
> > >>
> > > 
> > 
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH qemu] spapr_pci: Add numa node id
  2016-07-27  8:03 [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id Alexey Kardashevskiy
  2016-08-10  5:42 ` Alexey Kardashevskiy
  2016-09-13 23:29 ` Michael Roth
@ 2016-09-23  1:58 ` David Gibson
  2 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2016-09-23  1:58 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: qemu-devel, qemu-ppc, Michael Roth

[-- Attachment #1: Type: text/plain, Size: 2935 bytes --]

On Wed, Jul 27, 2016 at 06:03:38PM +1000, Alexey Kardashevskiy wrote:
> This adds a numa id property to a PHB to allow linking passed PCI device
> to CPU/memory. It is up to the management stack to do CPU/memory pinning
> to the node with the actual PCI device.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

I've applied this to ppc-for-2.8, renaming the property to "numa_node"
to match the similar option for pxb.

> ---
>  hw/ppc/spapr_pci.c          | 13 +++++++++++++
>  include/hw/pci-host/spapr.h |  2 ++
>  2 files changed, 15 insertions(+)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 949c44f..af5394a 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -47,6 +47,7 @@
>  #include "sysemu/device_tree.h"
>  #include "sysemu/kvm.h"
>  #include "sysemu/hostmem.h"
> +#include "sysemu/numa.h"
>  
>  #include "hw/vfio/vfio.h"
>  
> @@ -1544,6 +1545,7 @@ static Property spapr_phb_properties[] = {
>      DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
>      DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
>                         (1ULL << 12) | (1ULL << 16)),
> +    DEFINE_PROP_UINT32("node", sPAPRPHBState, numa_node, -1),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -1805,6 +1807,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>          cpu_to_be32(1),
>          cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
>      };
> +    uint32_t associativity[] = {cpu_to_be32(0x4),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(0x0),
> +                                cpu_to_be32(phb->numa_node)};
>      sPAPRTCETable *tcet;
>      PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
>      sPAPRFDT s_fdt;
> @@ -1837,6 +1844,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>                           &ddw_extensions, sizeof(ddw_extensions)));
>      }
>  
> +    /* Advertise NUMA via ibm,associativity */
> +    if (nb_numa_nodes > 1) {
> +        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
> +                         sizeof(associativity)));
> +    }
> +
>      /* Build the interrupt-map, this must matches what is done
>       * in pci_spapr_map_irq
>       */
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 5adc603..53c4b2d 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -75,6 +75,8 @@ struct sPAPRPHBState {
>      bool ddw_enabled;
>      uint64_t page_size_mask;
>      uint64_t dma64_win_addr;
> +
> +    uint32_t numa_node;
>  };
>  
>  #define SPAPR_PCI_MAX_INDEX          255

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [Qemu-ppc] [PATCH qemu] spapr_pci: Add numa node id
  2016-09-22  4:49       ` David Gibson
@ 2016-09-23  1:59         ` David Gibson
  0 siblings, 0 replies; 10+ messages in thread
From: David Gibson @ 2016-09-23  1:59 UTC (permalink / raw)
  To: Michael Roth; +Cc: sbhat, qemu-ppc, qemu-devel, Bharata B Rao

[-- Attachment #1: Type: text/plain, Size: 1975 bytes --]

On Thu, Sep 22, 2016 at 02:49:34PM +1000, David Gibson wrote:
> On Wed, Sep 14, 2016 at 07:03:50AM -0500, Michael Roth wrote:
> > Quoting Alexey Kardashevskiy (2016-09-14 04:39:10)
> > > On 14/09/16 09:29, Michael Roth wrote:
> > > > Quoting Alexey Kardashevskiy (2016-07-27 03:03:38)
> > > >> This adds a numa id property to a PHB to allow linking passed PCI device
> > > >> to CPU/memory. It is up to the management stack to do CPU/memory pinning
> > > >> to the node with the actual PCI device.
> > > > 
> > > > It looks like x86 relies on PCIBus->numa_node() method (via
> > > > pci_bus_numa_node()) to encode similar PCIBus affinities
> > > > into ACPI tables, and currently exposes it via
> > > > -device pci-[-express]-expander-bus,numa_node=X.
> > > 
> > > 
> > > 
> > > Well, until we allow DMA windows per PCI bus (not per PHB as it is now),
> > > this won't make much sense for us (unless I am missing something here).
> > 
> > I didn't consider that it's not a bus-level setting; I think
> > you're right that re-using the interface to both store/retrieve doesn't
> > make much sense in that case.
> > 
> > My thought that was that since pci_bus_numa_node() could in theory come
> > to be relied upon by common PCI code, that we should use it as well. But
> > even if it doesn't make sense for us to use it, wouldn't it make sense to
> > still set PCIBus->numa_node (in addition to the PHB-wide value) in the
> > off-chance that common code does come to rely on
> > pci_bus_numa_node()?
> 
> Yes, it would be a good idea to set the PCIBus node value to inherit
> the one that's set for the host bridge, just in case any generic code
> looks at it in future.

But that can be a followup patch, I've applied this to ppc-for-2.8
now.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-09-23  2:49 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-27  8:03 [Qemu-devel] [PATCH qemu] spapr_pci: Add numa node id Alexey Kardashevskiy
2016-08-10  5:42 ` Alexey Kardashevskiy
2016-09-13 23:29 ` Michael Roth
2016-09-14  9:39   ` Alexey Kardashevskiy
2016-09-14 12:03     ` Michael Roth
2016-09-22  4:49       ` David Gibson
2016-09-23  1:59         ` [Qemu-devel] [Qemu-ppc] " David Gibson
2016-09-19  3:39     ` [Qemu-devel] " Bharata B Rao
2016-09-22  4:48     ` David Gibson
2016-09-23  1:58 ` [Qemu-devel] [Qemu-ppc] " David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.