All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [RFCv2 0/2] Don't allocate vcpus on the same guest socket to different NUMA nodes
@ 2015-09-04  4:51 David Gibson
  2015-09-04  4:51 ` [Qemu-devel] [RFCv2 1/2] pseries: Fix incorrect calculation of threads per socket for chip-id David Gibson
  2015-09-04  4:51 ` [Qemu-devel] [RFCv2 2/2] spapr: Fix default NUMA node allocation for threads David Gibson
  0 siblings, 2 replies; 6+ messages in thread
From: David Gibson @ 2015-09-04  4:51 UTC (permalink / raw)
  To: aik, mdroth; +Cc: David Gibson, qemu-ppc, qemu-devel, agraf

As requested here's an augmented of the patch to stop guest sockets on
the pseries machine type from being split across NUMA nodes.  This
adds a new patch to first clean up the socket ID calculation that
exists for creating the ibm,chip-id property.

David Gibson (2):
  pseries: Fix incorrect calculation of threads per socket for chip-id
  spapr: Fix default NUMA node allocation for threads

 hw/ppc/spapr.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

-- 
2.4.3

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFCv2 1/2] pseries: Fix incorrect calculation of threads per socket for chip-id
  2015-09-04  4:51 [Qemu-devel] [RFCv2 0/2] Don't allocate vcpus on the same guest socket to different NUMA nodes David Gibson
@ 2015-09-04  4:51 ` David Gibson
  2015-09-04  4:56   ` David Gibson
  2015-09-04  4:51 ` [Qemu-devel] [RFCv2 2/2] spapr: Fix default NUMA node allocation for threads David Gibson
  1 sibling, 1 reply; 6+ messages in thread
From: David Gibson @ 2015-09-04  4:51 UTC (permalink / raw)
  To: aik, mdroth; +Cc: David Gibson, qemu-ppc, qemu-devel, agraf

The device tree presented to pseries machine type guests includes an
ibm,chip-id property which gives essentially the socket number of each
vcpu core (individual vcpu threads don't get a node in the device
tree).

To calculate this, it uses a vcpus_per_socket variable computed as
(smp_cpus / #sockets).  This is correct for the usual case where
smp_cpus == smp_threads * smp_cores * #sockets.

However, you can start QEMU with the number of cores and threads
mismatching the total number of vcpus (whether that _should_ be
permitted is a topic for another day).  It's a bit hard to say what
the "real" number of vcpus per socket here is, but for most purposes
(smp_threads * smp_cores) will more meaningfully match how QEMU
behaves with respect to socket boundaries.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 783763f..dbc295b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -595,8 +595,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
     uint32_t page_sizes_prop[64];
     size_t page_sizes_prop_size;
     QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
-    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
-    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+    uint32_t vcpus_per_socket = smp_threads * smp_cores;
     uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
 
     _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
@@ -665,7 +664,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
     }
 
     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
-                           cs->cpu_index / cpus_per_socket)));
+                           cs->cpu_index / vcpus_per_socket)));
 
     _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
                       pft_size_prop, sizeof(pft_size_prop))));
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Qemu-devel] [RFCv2 2/2] spapr: Fix default NUMA node allocation for threads
  2015-09-04  4:51 [Qemu-devel] [RFCv2 0/2] Don't allocate vcpus on the same guest socket to different NUMA nodes David Gibson
  2015-09-04  4:51 ` [Qemu-devel] [RFCv2 1/2] pseries: Fix incorrect calculation of threads per socket for chip-id David Gibson
@ 2015-09-04  4:51 ` David Gibson
  2015-09-05 10:12   ` Alexey Kardashevskiy
  1 sibling, 1 reply; 6+ messages in thread
From: David Gibson @ 2015-09-04  4:51 UTC (permalink / raw)
  To: aik, mdroth; +Cc: David Gibson, qemu-ppc, qemu-devel, agraf

At present, if guest numa nodes are requested, but the cpus in each node
are not specified, spapr just uses the default behaviour or assigning each
vcpu round-robin to nodes.

If smp_threads != 1, that will assign adjacent threads in a core to
different NUMA nodes.  As well as being just weird, that's a configuration
that can't be represented in the device tree we give to the guest, which
means the guest and qemu end up with different ideas of the NUMA topology.

This patch implements mc->cpu_index_to_socket_id in the spapr code to
make sure vcpus get assigned to nodes only at the socket granularity.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index dbc295b..8adef2d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2162,6 +2162,13 @@ static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
     return NULL;
 }
 
+static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
+{
+    /* Allocate to NUMA nodes on a "socket" basis (not that concept of
+     * socket means much for the paravirtualized PAPR platform) */
+    return cpu_index / smp_threads / smp_cores;
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2183,6 +2190,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->get_hotplug_handler = spapr_get_hotpug_handler;
     hc->plug = spapr_machine_device_plug;
     hc->unplug = spapr_machine_device_unplug;
+    mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
 
     smc->dr_lmb_enabled = false;
     fwc->get_dev_path = spapr_get_fw_dev_path;
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [RFCv2 1/2] pseries: Fix incorrect calculation of threads per socket for chip-id
  2015-09-04  4:51 ` [Qemu-devel] [RFCv2 1/2] pseries: Fix incorrect calculation of threads per socket for chip-id David Gibson
@ 2015-09-04  4:56   ` David Gibson
  2015-09-05 10:12     ` Alexey Kardashevskiy
  0 siblings, 1 reply; 6+ messages in thread
From: David Gibson @ 2015-09-04  4:56 UTC (permalink / raw)
  To: aik, mdroth; +Cc: qemu-ppc, qemu-devel, agraf

[-- Attachment #1: Type: text/plain, Size: 2513 bytes --]

On Fri, Sep 04, 2015 at 02:51:39PM +1000, David Gibson wrote:
> The device tree presented to pseries machine type guests includes an
> ibm,chip-id property which gives essentially the socket number of each
> vcpu core (individual vcpu threads don't get a node in the device
> tree).
> 
> To calculate this, it uses a vcpus_per_socket variable computed as
> (smp_cpus / #sockets).  This is correct for the usual case where
> smp_cpus == smp_threads * smp_cores * #sockets.
> 
> However, you can start QEMU with the number of cores and threads
> mismatching the total number of vcpus (whether that _should_ be
> permitted is a topic for another day).  It's a bit hard to say what
> the "real" number of vcpus per socket here is, but for most purposes
> (smp_threads * smp_cores) will more meaningfully match how QEMU
> behaves with respect to socket boundaries.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

Gah, sorry, there's a compile error in this.  I got sidetracked
between writing and posting and forgot I hadn't fixed it yet.

> ---
>  hw/ppc/spapr.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 783763f..dbc295b 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -595,8 +595,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
>      uint32_t page_sizes_prop[64];
>      size_t page_sizes_prop_size;
>      QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
> -    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
> -    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
> +    uint32_t vcpus_per_socket = smp_threads * smp_cores;
>      uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
>  
>      _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
> @@ -665,7 +664,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
>      }
>  
>      _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
> -                           cs->cpu_index / cpus_per_socket)));
> +                           cs->cpu_index / vcpus_per_socket)));
>  
>      _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
>                        pft_size_prop, sizeof(pft_size_prop))));

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [RFCv2 1/2] pseries: Fix incorrect calculation of threads per socket for chip-id
  2015-09-04  4:56   ` David Gibson
@ 2015-09-05 10:12     ` Alexey Kardashevskiy
  0 siblings, 0 replies; 6+ messages in thread
From: Alexey Kardashevskiy @ 2015-09-05 10:12 UTC (permalink / raw)
  To: David Gibson, mdroth; +Cc: qemu-ppc, qemu-devel, agraf

On 09/04/2015 02:56 PM, David Gibson wrote:
> On Fri, Sep 04, 2015 at 02:51:39PM +1000, David Gibson wrote:
>> The device tree presented to pseries machine type guests includes an
>> ibm,chip-id property which gives essentially the socket number of each
>> vcpu core (individual vcpu threads don't get a node in the device
>> tree).
>>
>> To calculate this, it uses a vcpus_per_socket variable computed as
>> (smp_cpus / #sockets).  This is correct for the usual case where
>> smp_cpus == smp_threads * smp_cores * #sockets.
>>
>> However, you can start QEMU with the number of cores and threads
>> mismatching the total number of vcpus (whether that _should_ be
>> permitted is a topic for another day).  It's a bit hard to say what
>> the "real" number of vcpus per socket here is, but for most purposes
>> (smp_threads * smp_cores) will more meaningfully match how QEMU
>> behaves with respect to socket boundaries.
>>
>> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
>
> Gah, sorry, there's a compile error in this.  I got sidetracked
> between writing and posting and forgot I hadn't fixed it yet.

The error, I believe, is "opts" not being used anymore?
If so,

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>	


>
>> ---
>>   hw/ppc/spapr.c | 5 ++---
>>   1 file changed, 2 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 783763f..dbc295b 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -595,8 +595,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
>>       uint32_t page_sizes_prop[64];
>>       size_t page_sizes_prop_size;
>>       QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
>> -    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
>> -    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
>> +    uint32_t vcpus_per_socket = smp_threads * smp_cores;
>>       uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
>>
>>       _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
>> @@ -665,7 +664,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
>>       }
>>
>>       _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
>> -                           cs->cpu_index / cpus_per_socket)));
>> +                           cs->cpu_index / vcpus_per_socket)));
>>
>>       _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
>>                         pft_size_prop, sizeof(pft_size_prop))));
>


-- 
Alexey

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [RFCv2 2/2] spapr: Fix default NUMA node allocation for threads
  2015-09-04  4:51 ` [Qemu-devel] [RFCv2 2/2] spapr: Fix default NUMA node allocation for threads David Gibson
@ 2015-09-05 10:12   ` Alexey Kardashevskiy
  0 siblings, 0 replies; 6+ messages in thread
From: Alexey Kardashevskiy @ 2015-09-05 10:12 UTC (permalink / raw)
  To: David Gibson, mdroth; +Cc: qemu-ppc, qemu-devel, agraf

On 09/04/2015 02:51 PM, David Gibson wrote:
> At present, if guest numa nodes are requested, but the cpus in each node
> are not specified, spapr just uses the default behaviour or assigning each
> vcpu round-robin to nodes.
>
> If smp_threads != 1, that will assign adjacent threads in a core to
> different NUMA nodes.  As well as being just weird, that's a configuration
> that can't be represented in the device tree we give to the guest, which
> means the guest and qemu end up with different ideas of the NUMA topology.
>
> This patch implements mc->cpu_index_to_socket_id in the spapr code to
> make sure vcpus get assigned to nodes only at the socket granularity.
>
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>	


> ---
>   hw/ppc/spapr.c | 8 ++++++++
>   1 file changed, 8 insertions(+)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index dbc295b..8adef2d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2162,6 +2162,13 @@ static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
>       return NULL;
>   }
>
> +static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
> +{
> +    /* Allocate to NUMA nodes on a "socket" basis (not that concept of
> +     * socket means much for the paravirtualized PAPR platform) */
> +    return cpu_index / smp_threads / smp_cores;
> +}
> +
>   static void spapr_machine_class_init(ObjectClass *oc, void *data)
>   {
>       MachineClass *mc = MACHINE_CLASS(oc);
> @@ -2183,6 +2190,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>       mc->get_hotplug_handler = spapr_get_hotpug_handler;
>       hc->plug = spapr_machine_device_plug;
>       hc->unplug = spapr_machine_device_unplug;
> +    mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
>
>       smc->dr_lmb_enabled = false;
>       fwc->get_dev_path = spapr_get_fw_dev_path;
>


-- 
Alexey

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-09-05 10:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-04  4:51 [Qemu-devel] [RFCv2 0/2] Don't allocate vcpus on the same guest socket to different NUMA nodes David Gibson
2015-09-04  4:51 ` [Qemu-devel] [RFCv2 1/2] pseries: Fix incorrect calculation of threads per socket for chip-id David Gibson
2015-09-04  4:56   ` David Gibson
2015-09-05 10:12     ` Alexey Kardashevskiy
2015-09-04  4:51 ` [Qemu-devel] [RFCv2 2/2] spapr: Fix default NUMA node allocation for threads David Gibson
2015-09-05 10:12   ` Alexey Kardashevskiy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.