All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/2] Boot time cpupools
@ 2021-11-17  9:57 Luca Fancellu
  2021-11-17  9:57 ` [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time Luca Fancellu
                   ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Luca Fancellu @ 2021-11-17  9:57 UTC (permalink / raw)
  To: xen-devel
  Cc: bertrand.marquis, wei.chen, Stefano Stabellini, Julien Grall,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Juergen Gross, Dario Faggioli,
	Anthony PERARD

Currently Xen creates a default cpupool0 that contains all the cpu brought up
during boot and it assumes that the platform has only one kind of CPU.
This assumption does not hold on big.LITTLE platform, but putting different
type of CPU in the same cpupool can result in instability and security issues
for the domains running on the pool.

For this reason this serie introduces an architecture specific way to create
different cpupool at boot time, this is particularly useful on ARM big.LITTLE
platform where there might be the need to have different cpupools for each type
of core, but also systems using NUMA can have different cpu pool for each node.

To test this serie, the hmp_unsafe Xen boot argument is passed to allow Xen
starting different CPUs from the boot core.

Luca Fancellu (2):
  xen/cpupool: Create different cpupools at boot time
  tools/cpupools: Give a name to unnamed cpupools

 tools/libs/light/libxl_utils.c | 13 ++++--
 xen/arch/arm/Kconfig           | 15 ++++++
 xen/arch/arm/Makefile          |  1 +
 xen/arch/arm/cpupool.c         | 84 ++++++++++++++++++++++++++++++++++
 xen/common/sched/cpupool.c     |  5 +-
 xen/include/xen/cpupool.h      | 30 ++++++++++++
 6 files changed, 142 insertions(+), 6 deletions(-)
 create mode 100644 xen/arch/arm/cpupool.c
 create mode 100644 xen/include/xen/cpupool.h

-- 
2.17.1



^ permalink raw reply	[flat|nested] 27+ messages in thread

* [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time
  2021-11-17  9:57 [RFC PATCH 0/2] Boot time cpupools Luca Fancellu
@ 2021-11-17  9:57 ` Luca Fancellu
  2021-11-17 11:05   ` Juergen Gross
  2021-11-18  2:59   ` Stefano Stabellini
  2021-11-17  9:57 ` [RFC PATCH 2/2] tools/cpupools: Give a name to unnamed cpupools Luca Fancellu
  2021-11-17 10:26 ` [RFC PATCH 0/2] Boot time cpupools Julien Grall
  2 siblings, 2 replies; 27+ messages in thread
From: Luca Fancellu @ 2021-11-17  9:57 UTC (permalink / raw)
  To: xen-devel
  Cc: bertrand.marquis, wei.chen, Stefano Stabellini, Julien Grall,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Juergen Gross, Dario Faggioli

Introduce an architecture specific way to create different
cpupool at boot time, this is particularly useful on ARM
biglittle system where there might be the need to have
different cpupools for each type of core, but also systems
using NUMA can have different cpu pool for each node.

Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
---
 xen/arch/arm/Kconfig       | 15 +++++++
 xen/arch/arm/Makefile      |  1 +
 xen/arch/arm/cpupool.c     | 84 ++++++++++++++++++++++++++++++++++++++
 xen/common/sched/cpupool.c |  5 ++-
 xen/include/xen/cpupool.h  | 30 ++++++++++++++
 5 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 xen/arch/arm/cpupool.c
 create mode 100644 xen/include/xen/cpupool.h

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index ecfa6822e4..4d7cc9f3bc 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -33,6 +33,21 @@ config ACPI
 	  Advanced Configuration and Power Interface (ACPI) support for Xen is
 	  an alternative to device tree on ARM64.
 
+config BOOT_TIME_CPUPOOLS
+	bool "Create cpupools per cpu type at boot time."
+	depends on ARM
+	default n
+	help
+
+	  Creates, during boot, a cpu pool for each type of CPU found on
+	  the system. This option is useful on system with heterogeneous
+	  types of core.
+
+config MAX_BOOT_TIME_CPUPOOLS
+	depends on BOOT_TIME_CPUPOOLS
+	int "Maximum number of cpupools that can be created at boot time."
+	default 16
+
 config GICV3
 	bool "GICv3 driver"
 	depends on ARM_64 && !NEW_VGIC
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
index 07f634508e..0bb8b84750 100644
--- a/xen/arch/arm/Makefile
+++ b/xen/arch/arm/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_HAS_ALTERNATIVE) += alternative.o
 obj-y += bootfdt.init.o
 obj-y += cpuerrata.o
 obj-y += cpufeature.o
+obj-$(CONFIG_BOOT_TIME_CPUPOOLS) += cpupool.o
 obj-y += decode.o
 obj-y += device.o
 obj-$(CONFIG_IOREQ_SERVER) += dm.o
diff --git a/xen/arch/arm/cpupool.c b/xen/arch/arm/cpupool.c
new file mode 100644
index 0000000000..550521e352
--- /dev/null
+++ b/xen/arch/arm/cpupool.c
@@ -0,0 +1,84 @@
+/******************************************************************************
+ * cpupool.c
+ *
+ * (C) 2021, arm
+ */
+
+#include <xen/cpumask.h>
+#include <xen/cpupool.h>
+#include <xen/err.h>
+#include <xen/sched.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include "../../common/sched/private.h"
+
+typedef struct {
+    register_t     midr;
+    struct cpupool *pool;
+} pool_t;
+
+static pool_t __initdata pool_cpu_map[CONFIG_MAX_BOOT_TIME_CPUPOOLS];
+
+void __init arch_allocate_cpupools(const cpumask_t *cpu_online_map,
+                                   cpupool_create_t cpupool_create)
+{
+    unsigned int cpu, i;
+
+    /* First pool is the default pool0 associated with midr of the boot core */
+    pool_cpu_map[0].midr = system_cpuinfo.midr.bits;
+    pool_cpu_map[0].pool = cpupool0;
+
+    for_each_cpu ( cpu, cpu_online_map )
+    {
+        for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
+            if ( !pool_cpu_map[i].pool ||
+                 (cpu_data[cpu].midr.bits == pool_cpu_map[i].midr) )
+                break;
+
+        if ( i < CONFIG_MAX_BOOT_TIME_CPUPOOLS )
+        {
+            if ( !pool_cpu_map[i].pool )
+            {
+                /* There is no pool for this cpu midr, create it */
+                pool_cpu_map[i].midr = cpu_data[cpu].midr.bits;
+                debugtrace_printk("Create pool %u for cpu MIDR: 0x%"
+                                  PRIregister"\n", i, pool_cpu_map[i].midr);
+                pool_cpu_map[i].pool =
+                    cpupool_create(i, scheduler_get_default()->sched_id);
+                BUG_ON(IS_ERR(pool_cpu_map[i].pool));
+                cpupool_put(pool_cpu_map[i].pool);
+            }
+        }
+        else
+            panic("Could not create cpupool, maximum number reached (%u)",
+                  (unsigned int)(CONFIG_MAX_BOOT_TIME_CPUPOOLS));
+    }
+
+    /* Print useful information about the pools */
+    for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
+        if ( pool_cpu_map[i].pool )
+            printk(XENLOG_INFO "Pool-%u contains cpu with MIDR: 0x%"
+                   PRIregister"\n", i, pool_cpu_map[i].midr);
+}
+
+struct cpupool *__init arch_get_cpupool(unsigned int cpu)
+{
+    unsigned int i;
+
+    for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
+        if ( pool_cpu_map[i].pool &&
+             (cpu_data[cpu].midr.bits == pool_cpu_map[i].midr) )
+            return pool_cpu_map[i].pool;
+
+    return cpupool0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index 8c6e6eb9cc..7389f04e28 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -13,6 +13,7 @@
 
 #include <xen/cpu.h>
 #include <xen/cpumask.h>
+#include <xen/cpupool.h>
 #include <xen/guest_access.h>
 #include <xen/hypfs.h>
 #include <xen/init.h>
@@ -1231,12 +1232,14 @@ static int __init cpupool_init(void)
     cpupool_put(cpupool0);
     register_cpu_notifier(&cpu_nfb);
 
+    arch_allocate_cpupools(&cpu_online_map, cpupool_create);
+
     spin_lock(&cpupool_lock);
 
     cpumask_copy(&cpupool_free_cpus, &cpu_online_map);
 
     for_each_cpu ( cpu, &cpupool_free_cpus )
-        cpupool_assign_cpu_locked(cpupool0, cpu);
+        cpupool_assign_cpu_locked(arch_get_cpupool(cpu), cpu);
 
     spin_unlock(&cpupool_lock);
 
diff --git a/xen/include/xen/cpupool.h b/xen/include/xen/cpupool.h
new file mode 100644
index 0000000000..4b50af9e3d
--- /dev/null
+++ b/xen/include/xen/cpupool.h
@@ -0,0 +1,30 @@
+#ifndef __XEN_CPUPOOL_H
+#define __XEN_CPUPOOL_H
+
+#include <xen/sched.h>
+
+typedef struct cpupool* (*cpupool_create_t)(unsigned int, unsigned int);
+
+#ifdef CONFIG_BOOT_TIME_CPUPOOLS
+
+void arch_allocate_cpupools(const cpumask_t *cpu_online_map,
+                            cpupool_create_t cpupool_create);
+
+struct cpupool *arch_get_cpupool(unsigned int cpu);
+
+#else
+
+static inline __init
+void arch_allocate_cpupools(const cpumask_t *cpu_online_map,
+                            cpupool_create_t cpupool_create)
+{
+}
+
+static inline struct cpupool *__init arch_get_cpupool(unsigned int cpu)
+{
+    return cpupool0;
+}
+
+#endif
+
+#endif /* __XEN_CPUPOOL_H */
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [RFC PATCH 2/2] tools/cpupools: Give a name to unnamed cpupools
  2021-11-17  9:57 [RFC PATCH 0/2] Boot time cpupools Luca Fancellu
  2021-11-17  9:57 ` [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time Luca Fancellu
@ 2021-11-17  9:57 ` Luca Fancellu
  2021-11-17 11:10   ` Juergen Gross
  2021-11-17 10:26 ` [RFC PATCH 0/2] Boot time cpupools Julien Grall
  2 siblings, 1 reply; 27+ messages in thread
From: Luca Fancellu @ 2021-11-17  9:57 UTC (permalink / raw)
  To: xen-devel
  Cc: bertrand.marquis, wei.chen, Ian Jackson, Wei Liu, Anthony PERARD,
	Juergen Gross

With the introduction of boot time cpupools, Xen can
create at boot time many cpupools different from the
cpupool with id 0.

Since these newly created cpupools can't have an
entry in Xenstore, name them with the same convention
used for the cpupool 0: Pool-<cpupool id>.

Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
---
 tools/libs/light/libxl_utils.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tools/libs/light/libxl_utils.c b/tools/libs/light/libxl_utils.c
index 4699c4a0a3..d97d91ca98 100644
--- a/tools/libs/light/libxl_utils.c
+++ b/tools/libs/light/libxl_utils.c
@@ -147,13 +147,16 @@ int libxl_cpupool_qualifier_to_cpupoolid(libxl_ctx *ctx, const char *p,
 char *libxl_cpupoolid_to_name(libxl_ctx *ctx, uint32_t poolid)
 {
     unsigned int len;
-    char path[strlen("/local/pool") + 12];
+    char buffer[strlen("/local/pool") + 12];
     char *s;
 
-    snprintf(path, sizeof(path), "/local/pool/%d/name", poolid);
-    s = xs_read(ctx->xsh, XBT_NULL, path, &len);
-    if (!s && (poolid == 0))
-        return strdup("Pool-0");
+    snprintf(buffer, sizeof(buffer), "/local/pool/%d/name", poolid);
+    s = xs_read(ctx->xsh, XBT_NULL, buffer, &len);
+    if (!s)
+    {
+        snprintf(buffer, sizeof(buffer), "Pool-%d", poolid);
+        return strdup(buffer);
+    }
     return s;
 }
 
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17  9:57 [RFC PATCH 0/2] Boot time cpupools Luca Fancellu
  2021-11-17  9:57 ` [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time Luca Fancellu
  2021-11-17  9:57 ` [RFC PATCH 2/2] tools/cpupools: Give a name to unnamed cpupools Luca Fancellu
@ 2021-11-17 10:26 ` Julien Grall
  2021-11-17 10:41   ` Juergen Gross
  2021-11-17 11:16   ` Bertrand Marquis
  2 siblings, 2 replies; 27+ messages in thread
From: Julien Grall @ 2021-11-17 10:26 UTC (permalink / raw)
  To: Luca Fancellu, xen-devel
  Cc: bertrand.marquis, wei.chen, Stefano Stabellini,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Juergen Gross, Dario Faggioli,
	Anthony PERARD

Hi Luca,

On 17/11/2021 09:57, Luca Fancellu wrote:
> Currently Xen creates a default cpupool0 that contains all the cpu brought up
> during boot and it assumes that the platform has only one kind of CPU.
> This assumption does not hold on big.LITTLE platform, but putting different
> type of CPU in the same cpupool can result in instability and security issues
> for the domains running on the pool.

I agree that you can't move a LITTLE vCPU to a big pCPU. However...

> 
> For this reason this serie introduces an architecture specific way to create
> different cpupool at boot time, this is particularly useful on ARM big.LITTLE
> platform where there might be the need to have different cpupools for each type
> of core, but also systems using NUMA can have different cpu pool for each node.

... from my understanding, all the vCPUs of a domain have to be in the 
same cpupool. So with this approach it is not possible:
    1) to have a mix of LITTLE and big vCPUs in the domain
    2) to create a domain spanning across two NUMA nodes

So I think we need to make sure that any solutions we go through will 
not prevent us to implement those setups.

I can see two options here:
   1) Allowing a domain vCPUs to be on a different cpupool
   2) Introducing CPU class (see [1])

I can't remember why Dario suggested 2) rather than 1) in the past. 
@Dario, do you remember it?

Cheers,

[1] https://lore.kernel.org/xen-devel/1481135379.3445.142.camel@citrix.com/

> 
> To test this serie, the hmp_unsafe Xen boot argument is passed to allow Xen
> starting different CPUs from the boot core.
> 
> Luca Fancellu (2):
>    xen/cpupool: Create different cpupools at boot time
>    tools/cpupools: Give a name to unnamed cpupools
> 
>   tools/libs/light/libxl_utils.c | 13 ++++--
>   xen/arch/arm/Kconfig           | 15 ++++++
>   xen/arch/arm/Makefile          |  1 +
>   xen/arch/arm/cpupool.c         | 84 ++++++++++++++++++++++++++++++++++
>   xen/common/sched/cpupool.c     |  5 +-
>   xen/include/xen/cpupool.h      | 30 ++++++++++++
>   6 files changed, 142 insertions(+), 6 deletions(-)
>   create mode 100644 xen/arch/arm/cpupool.c
>   create mode 100644 xen/include/xen/cpupool.h
> 

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17 10:26 ` [RFC PATCH 0/2] Boot time cpupools Julien Grall
@ 2021-11-17 10:41   ` Juergen Gross
  2021-11-17 11:16   ` Bertrand Marquis
  1 sibling, 0 replies; 27+ messages in thread
From: Juergen Gross @ 2021-11-17 10:41 UTC (permalink / raw)
  To: Julien Grall, Luca Fancellu, xen-devel
  Cc: bertrand.marquis, wei.chen, Stefano Stabellini,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Dario Faggioli, Anthony PERARD


[-- Attachment #1.1.1: Type: text/plain, Size: 2003 bytes --]

On 17.11.21 11:26, Julien Grall wrote:
> Hi Luca,
> 
> On 17/11/2021 09:57, Luca Fancellu wrote:
>> Currently Xen creates a default cpupool0 that contains all the cpu 
>> brought up
>> during boot and it assumes that the platform has only one kind of CPU.
>> This assumption does not hold on big.LITTLE platform, but putting 
>> different
>> type of CPU in the same cpupool can result in instability and security 
>> issues
>> for the domains running on the pool.
> 
> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
> 
>>
>> For this reason this serie introduces an architecture specific way to 
>> create
>> different cpupool at boot time, this is particularly useful on ARM 
>> big.LITTLE
>> platform where there might be the need to have different cpupools for 
>> each type
>> of core, but also systems using NUMA can have different cpu pool for 
>> each node.
> 
> ... from my understanding, all the vCPUs of a domain have to be in the 
> same cpupool. So with this approach it is not possible:
>     1) to have a mix of LITTLE and big vCPUs in the domain
>     2) to create a domain spanning across two NUMA nodes
> 
> So I think we need to make sure that any solutions we go through will 
> not prevent us to implement those setups.
> 
> I can see two options here:
>    1) Allowing a domain vCPUs to be on a different cpupool
>    2) Introducing CPU class (see [1])
> 
> I can't remember why Dario suggested 2) rather than 1) in the past. 
> @Dario, do you remember it?

Having vcpus of a domain in different cpupools would probably require a
major scheduling rework due to several accounting information being per
cpupool today with some of that data being additionally per domain. Not
to mention the per-domain scheduling parameters, which would need to
cope with different schedulers suddenly (imagine one cpupool using
credit2 and the other rt).

I'd rather use implicit pinning e.g. via a cpu class.


Juergen


[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time
  2021-11-17  9:57 ` [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time Luca Fancellu
@ 2021-11-17 11:05   ` Juergen Gross
  2021-11-18  3:01     ` Stefano Stabellini
  2021-11-18  2:59   ` Stefano Stabellini
  1 sibling, 1 reply; 27+ messages in thread
From: Juergen Gross @ 2021-11-17 11:05 UTC (permalink / raw)
  To: Luca Fancellu, xen-devel
  Cc: bertrand.marquis, wei.chen, Stefano Stabellini, Julien Grall,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Dario Faggioli


[-- Attachment #1.1.1: Type: text/plain, Size: 7760 bytes --]

On 17.11.21 10:57, Luca Fancellu wrote:
> Introduce an architecture specific way to create different
> cpupool at boot time, this is particularly useful on ARM
> biglittle system where there might be the need to have
> different cpupools for each type of core, but also systems
> using NUMA can have different cpu pool for each node.
> 
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> ---
>   xen/arch/arm/Kconfig       | 15 +++++++
>   xen/arch/arm/Makefile      |  1 +
>   xen/arch/arm/cpupool.c     | 84 ++++++++++++++++++++++++++++++++++++++
>   xen/common/sched/cpupool.c |  5 ++-
>   xen/include/xen/cpupool.h  | 30 ++++++++++++++
>   5 files changed, 134 insertions(+), 1 deletion(-)
>   create mode 100644 xen/arch/arm/cpupool.c
>   create mode 100644 xen/include/xen/cpupool.h
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index ecfa6822e4..4d7cc9f3bc 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -33,6 +33,21 @@ config ACPI
>   	  Advanced Configuration and Power Interface (ACPI) support for Xen is
>   	  an alternative to device tree on ARM64.
>   
> +config BOOT_TIME_CPUPOOLS
> +	bool "Create cpupools per cpu type at boot time."
> +	depends on ARM
> +	default n
> +	help
> +
> +	  Creates, during boot, a cpu pool for each type of CPU found on
> +	  the system. This option is useful on system with heterogeneous
> +	  types of core.
> +
> +config MAX_BOOT_TIME_CPUPOOLS
> +	depends on BOOT_TIME_CPUPOOLS
> +	int "Maximum number of cpupools that can be created at boot time."
> +	default 16
> +
>   config GICV3
>   	bool "GICv3 driver"
>   	depends on ARM_64 && !NEW_VGIC
> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
> index 07f634508e..0bb8b84750 100644
> --- a/xen/arch/arm/Makefile
> +++ b/xen/arch/arm/Makefile
> @@ -13,6 +13,7 @@ obj-$(CONFIG_HAS_ALTERNATIVE) += alternative.o
>   obj-y += bootfdt.init.o
>   obj-y += cpuerrata.o
>   obj-y += cpufeature.o
> +obj-$(CONFIG_BOOT_TIME_CPUPOOLS) += cpupool.o
>   obj-y += decode.o
>   obj-y += device.o
>   obj-$(CONFIG_IOREQ_SERVER) += dm.o
> diff --git a/xen/arch/arm/cpupool.c b/xen/arch/arm/cpupool.c
> new file mode 100644
> index 0000000000..550521e352
> --- /dev/null
> +++ b/xen/arch/arm/cpupool.c
> @@ -0,0 +1,84 @@
> +/******************************************************************************
> + * cpupool.c
> + *
> + * (C) 2021, arm
> + */
> +
> +#include <xen/cpumask.h>
> +#include <xen/cpupool.h>
> +#include <xen/err.h>
> +#include <xen/sched.h>
> +#include <asm/cpufeature.h>
> +#include <asm/processor.h>
> +#include "../../common/sched/private.h"

No, please don't do that.

You should try to add architecture agnostic service functions in
common/sched/cpupool.c removing the need to include that file here.

> +
> +typedef struct {
> +    register_t     midr;
> +    struct cpupool *pool;
> +} pool_t;
> +
> +static pool_t __initdata pool_cpu_map[CONFIG_MAX_BOOT_TIME_CPUPOOLS];
> +
> +void __init arch_allocate_cpupools(const cpumask_t *cpu_online_map,
> +                                   cpupool_create_t cpupool_create)

Drop the cpupool_create parameter here and ...

> +{
> +    unsigned int cpu, i;
> +
> +    /* First pool is the default pool0 associated with midr of the boot core */
> +    pool_cpu_map[0].midr = system_cpuinfo.midr.bits;
> +    pool_cpu_map[0].pool = cpupool0;
> +
> +    for_each_cpu ( cpu, cpu_online_map )
> +    {
> +        for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
> +            if ( !pool_cpu_map[i].pool ||
> +                 (cpu_data[cpu].midr.bits == pool_cpu_map[i].midr) )
> +                break;
> +
> +        if ( i < CONFIG_MAX_BOOT_TIME_CPUPOOLS )
> +        {
> +            if ( !pool_cpu_map[i].pool )
> +            {
> +                /* There is no pool for this cpu midr, create it */
> +                pool_cpu_map[i].midr = cpu_data[cpu].midr.bits;
> +                debugtrace_printk("Create pool %u for cpu MIDR: 0x%"
> +                                  PRIregister"\n", i, pool_cpu_map[i].midr);
> +                pool_cpu_map[i].pool =
> +                    cpupool_create(i, scheduler_get_default()->sched_id);
> +                BUG_ON(IS_ERR(pool_cpu_map[i].pool));
> +                cpupool_put(pool_cpu_map[i].pool);

... call a new wrapper in common/sched/cpupool.c taking just the id as
parameter (e.g. "cpupool *cpupool_create_default(unsigned int id)")
which will use the default scheduler and do the cpupool_create() and
cpupool_put() calls internally.

> +            }
> +        }
> +        else
> +            panic("Could not create cpupool, maximum number reached (%u)",
> +                  (unsigned int)(CONFIG_MAX_BOOT_TIME_CPUPOOLS));
> +    }
> +
> +    /* Print useful information about the pools */
> +    for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
> +        if ( pool_cpu_map[i].pool )
> +            printk(XENLOG_INFO "Pool-%u contains cpu with MIDR: 0x%"
> +                   PRIregister"\n", i, pool_cpu_map[i].midr);
> +}
> +
> +struct cpupool *__init arch_get_cpupool(unsigned int cpu)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
> +        if ( pool_cpu_map[i].pool &&
> +             (cpu_data[cpu].midr.bits == pool_cpu_map[i].midr) )
> +            return pool_cpu_map[i].pool;
> +
> +    return cpupool0;
> +}
I don't think this return can be reached.

What about adding an "ASSERT_UNREACHABLE()" here and return NULL instead?

> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
> index 8c6e6eb9cc..7389f04e28 100644
> --- a/xen/common/sched/cpupool.c
> +++ b/xen/common/sched/cpupool.c
> @@ -13,6 +13,7 @@
>   
>   #include <xen/cpu.h>
>   #include <xen/cpumask.h>
> +#include <xen/cpupool.h>
>   #include <xen/guest_access.h>
>   #include <xen/hypfs.h>
>   #include <xen/init.h>
> @@ -1231,12 +1232,14 @@ static int __init cpupool_init(void)
>       cpupool_put(cpupool0);
>       register_cpu_notifier(&cpu_nfb);
>   
> +    arch_allocate_cpupools(&cpu_online_map, cpupool_create);
> +
>       spin_lock(&cpupool_lock);
>   
>       cpumask_copy(&cpupool_free_cpus, &cpu_online_map);
>   
>       for_each_cpu ( cpu, &cpupool_free_cpus )
> -        cpupool_assign_cpu_locked(cpupool0, cpu);
> +        cpupool_assign_cpu_locked(arch_get_cpupool(cpu), cpu);
>   
>       spin_unlock(&cpupool_lock);
>   
> diff --git a/xen/include/xen/cpupool.h b/xen/include/xen/cpupool.h
> new file mode 100644
> index 0000000000..4b50af9e3d
> --- /dev/null
> +++ b/xen/include/xen/cpupool.h
> @@ -0,0 +1,30 @@
> +#ifndef __XEN_CPUPOOL_H
> +#define __XEN_CPUPOOL_H
> +
> +#include <xen/sched.h>
> +
> +typedef struct cpupool* (*cpupool_create_t)(unsigned int, unsigned int);
> +
> +#ifdef CONFIG_BOOT_TIME_CPUPOOLS
> +
> +void arch_allocate_cpupools(const cpumask_t *cpu_online_map,
> +                            cpupool_create_t cpupool_create);
> +
> +struct cpupool *arch_get_cpupool(unsigned int cpu);
> +
> +#else
> +
> +static inline __init
> +void arch_allocate_cpupools(const cpumask_t *cpu_online_map,
> +                            cpupool_create_t cpupool_create)
> +{
> +}
> +
> +static inline struct cpupool *__init arch_get_cpupool(unsigned int cpu)
> +{
> +    return cpupool0;
> +}
> +
> +#endif
> +
> +#endif /* __XEN_CPUPOOL_H */
> 

I think this can all go into include/xen/sched.h.


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 2/2] tools/cpupools: Give a name to unnamed cpupools
  2021-11-17  9:57 ` [RFC PATCH 2/2] tools/cpupools: Give a name to unnamed cpupools Luca Fancellu
@ 2021-11-17 11:10   ` Juergen Gross
  2021-11-17 11:52     ` Luca Fancellu
  0 siblings, 1 reply; 27+ messages in thread
From: Juergen Gross @ 2021-11-17 11:10 UTC (permalink / raw)
  To: Luca Fancellu, xen-devel
  Cc: bertrand.marquis, wei.chen, Ian Jackson, Wei Liu, Anthony PERARD


[-- Attachment #1.1.1: Type: text/plain, Size: 1665 bytes --]

On 17.11.21 10:57, Luca Fancellu wrote:
> With the introduction of boot time cpupools, Xen can
> create at boot time many cpupools different from the
> cpupool with id 0.
> 
> Since these newly created cpupools can't have an
> entry in Xenstore, name them with the same convention
> used for the cpupool 0: Pool-<cpupool id>.
> 
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> ---
>   tools/libs/light/libxl_utils.c | 13 ++++++++-----
>   1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/libs/light/libxl_utils.c b/tools/libs/light/libxl_utils.c
> index 4699c4a0a3..d97d91ca98 100644
> --- a/tools/libs/light/libxl_utils.c
> +++ b/tools/libs/light/libxl_utils.c
> @@ -147,13 +147,16 @@ int libxl_cpupool_qualifier_to_cpupoolid(libxl_ctx *ctx, const char *p,
>   char *libxl_cpupoolid_to_name(libxl_ctx *ctx, uint32_t poolid)
>   {
>       unsigned int len;
> -    char path[strlen("/local/pool") + 12];
> +    char buffer[strlen("/local/pool") + 12];
>       char *s;
>   
> -    snprintf(path, sizeof(path), "/local/pool/%d/name", poolid);
> -    s = xs_read(ctx->xsh, XBT_NULL, path, &len);
> -    if (!s && (poolid == 0))
> -        return strdup("Pool-0");
> +    snprintf(buffer, sizeof(buffer), "/local/pool/%d/name", poolid);
> +    s = xs_read(ctx->xsh, XBT_NULL, buffer, &len);
> +    if (!s)
> +    {
> +        snprintf(buffer, sizeof(buffer), "Pool-%d", poolid);
> +        return strdup(buffer);
> +    }
>       return s;
>   }
>   
> 

This breaks libxl_cpupoolid_is_valid(), as it will now return always
true, regardless whether the poolid is existing or not.


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17 10:26 ` [RFC PATCH 0/2] Boot time cpupools Julien Grall
  2021-11-17 10:41   ` Juergen Gross
@ 2021-11-17 11:16   ` Bertrand Marquis
  2021-11-17 11:21     ` Juergen Gross
  2021-11-17 11:48     ` Julien Grall
  1 sibling, 2 replies; 27+ messages in thread
From: Bertrand Marquis @ 2021-11-17 11:16 UTC (permalink / raw)
  To: Julien Grall
  Cc: Luca Fancellu, Xen-devel, Wei Chen, Stefano Stabellini,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Juergen Gross, Dario Faggioli,
	Anthony PERARD

Hi Julien,

> On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
> 
> Hi Luca,
> 
> On 17/11/2021 09:57, Luca Fancellu wrote:
>> Currently Xen creates a default cpupool0 that contains all the cpu brought up
>> during boot and it assumes that the platform has only one kind of CPU.
>> This assumption does not hold on big.LITTLE platform, but putting different
>> type of CPU in the same cpupool can result in instability and security issues
>> for the domains running on the pool.
> 
> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
> 
>> For this reason this serie introduces an architecture specific way to create
>> different cpupool at boot time, this is particularly useful on ARM big.LITTLE
>> platform where there might be the need to have different cpupools for each type
>> of core, but also systems using NUMA can have different cpu pool for each node.
> 
> ... from my understanding, all the vCPUs of a domain have to be in the same cpupool. So with this approach it is not possible:
>   1) to have a mix of LITTLE and big vCPUs in the domain
>   2) to create a domain spanning across two NUMA nodes
> 
> So I think we need to make sure that any solutions we go through will not prevent us to implement those setups.

The point of this patch is to make all cores available without breaking the current behaviour of existing system.

Someone not using cpupool will keep running on the same cores as before.
Someone wanting to use the other cores could assign a guest to the other(s) cpupool (big.LITTLE is just an example with 2 but there are now cores with 3 types of cores).
Someone wanting to build something different can now create new cpupools in Dom0 and assign the cores they want to is to build a guest having access to different types of cores.

The point here is just to make the “other” cores accessible and park them in cpupools so that current behaviour is not changed.

> 
> I can see two options here:
>  1) Allowing a domain vCPUs to be on a different cpupool
>  2) Introducing CPU class (see [1])
> 
> I can't remember why Dario suggested 2) rather than 1) in the past. @Dario, do you remember it?

I think 1) is definitely interesting and something that could be looked at in the future.

This serie just aims at making all cores available without breaking backward compatibility which is a good improvement but does not contradict the 2 options you are suggesting.

Cheers
Bertrand

> 
> Cheers,
> 
> [1] https://lore.kernel.org/xen-devel/1481135379.3445.142.camel@citrix.com/
> 
>> To test this serie, the hmp_unsafe Xen boot argument is passed to allow Xen
>> starting different CPUs from the boot core.
>> Luca Fancellu (2):
>>   xen/cpupool: Create different cpupools at boot time
>>   tools/cpupools: Give a name to unnamed cpupools
>>  tools/libs/light/libxl_utils.c | 13 ++++--
>>  xen/arch/arm/Kconfig           | 15 ++++++
>>  xen/arch/arm/Makefile          |  1 +
>>  xen/arch/arm/cpupool.c         | 84 ++++++++++++++++++++++++++++++++++
>>  xen/common/sched/cpupool.c     |  5 +-
>>  xen/include/xen/cpupool.h      | 30 ++++++++++++
>>  6 files changed, 142 insertions(+), 6 deletions(-)
>>  create mode 100644 xen/arch/arm/cpupool.c
>>  create mode 100644 xen/include/xen/cpupool.h
> 
> -- 
> Julien Grall


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17 11:16   ` Bertrand Marquis
@ 2021-11-17 11:21     ` Juergen Gross
  2021-11-17 11:48     ` Julien Grall
  1 sibling, 0 replies; 27+ messages in thread
From: Juergen Gross @ 2021-11-17 11:21 UTC (permalink / raw)
  To: Bertrand Marquis, Julien Grall
  Cc: Luca Fancellu, Xen-devel, Wei Chen, Stefano Stabellini,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Dario Faggioli, Anthony PERARD


[-- Attachment #1.1.1: Type: text/plain, Size: 2594 bytes --]

On 17.11.21 12:16, Bertrand Marquis wrote:
> Hi Julien,
> 
>> On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
>>
>> Hi Luca,
>>
>> On 17/11/2021 09:57, Luca Fancellu wrote:
>>> Currently Xen creates a default cpupool0 that contains all the cpu brought up
>>> during boot and it assumes that the platform has only one kind of CPU.
>>> This assumption does not hold on big.LITTLE platform, but putting different
>>> type of CPU in the same cpupool can result in instability and security issues
>>> for the domains running on the pool.
>>
>> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
>>
>>> For this reason this serie introduces an architecture specific way to create
>>> different cpupool at boot time, this is particularly useful on ARM big.LITTLE
>>> platform where there might be the need to have different cpupools for each type
>>> of core, but also systems using NUMA can have different cpu pool for each node.
>>
>> ... from my understanding, all the vCPUs of a domain have to be in the same cpupool. So with this approach it is not possible:
>>    1) to have a mix of LITTLE and big vCPUs in the domain
>>    2) to create a domain spanning across two NUMA nodes
>>
>> So I think we need to make sure that any solutions we go through will not prevent us to implement those setups.
> 
> The point of this patch is to make all cores available without breaking the current behaviour of existing system.

May I suggest to add a boot parameter for being able to control this
behavior by other means than compile time configuration?

> 
> Someone not using cpupool will keep running on the same cores as before.
> Someone wanting to use the other cores could assign a guest to the other(s) cpupool (big.LITTLE is just an example with 2 but there are now cores with 3 types of cores).
> Someone wanting to build something different can now create new cpupools in Dom0 and assign the cores they want to is to build a guest having access to different types of cores.
> 
> The point here is just to make the “other” cores accessible and park them in cpupools so that current behaviour is not changed.
> 
>>
>> I can see two options here:
>>   1) Allowing a domain vCPUs to be on a different cpupool
>>   2) Introducing CPU class (see [1])
>>
>> I can't remember why Dario suggested 2) rather than 1) in the past. @Dario, do you remember it?
> 
> I think 1) is definitely interesting and something that could be looked at in the future.

 From scheduler point of view this is IMO a nightmare.


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17 11:16   ` Bertrand Marquis
  2021-11-17 11:21     ` Juergen Gross
@ 2021-11-17 11:48     ` Julien Grall
  2021-11-17 12:07       ` Bertrand Marquis
  1 sibling, 1 reply; 27+ messages in thread
From: Julien Grall @ 2021-11-17 11:48 UTC (permalink / raw)
  To: Bertrand Marquis
  Cc: Luca Fancellu, Xen-devel, Wei Chen, Stefano Stabellini,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Juergen Gross, Dario Faggioli,
	Anthony PERARD

On 17/11/2021 11:16, Bertrand Marquis wrote:
> Hi Julien,

Hi,

>> On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
>>
>> Hi Luca,
>>
>> On 17/11/2021 09:57, Luca Fancellu wrote:
>>> Currently Xen creates a default cpupool0 that contains all the cpu brought up
>>> during boot and it assumes that the platform has only one kind of CPU.
>>> This assumption does not hold on big.LITTLE platform, but putting different
>>> type of CPU in the same cpupool can result in instability and security issues
>>> for the domains running on the pool.
>>
>> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
>>
>>> For this reason this serie introduces an architecture specific way to create
>>> different cpupool at boot time, this is particularly useful on ARM big.LITTLE
>>> platform where there might be the need to have different cpupools for each type
>>> of core, but also systems using NUMA can have different cpu pool for each node.
>>
>> ... from my understanding, all the vCPUs of a domain have to be in the same cpupool. So with this approach it is not possible:
>>    1) to have a mix of LITTLE and big vCPUs in the domain
>>    2) to create a domain spanning across two NUMA nodes
>>
>> So I think we need to make sure that any solutions we go through will not prevent us to implement those setups.
> 
> The point of this patch is to make all cores available without breaking the current behaviour of existing system.

I might be missing some context here. By breaking current behavior, do 
you mean user that may want to add "hmp-unsafe" on the command line?

Cheers,

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 2/2] tools/cpupools: Give a name to unnamed cpupools
  2021-11-17 11:10   ` Juergen Gross
@ 2021-11-17 11:52     ` Luca Fancellu
  2021-11-17 12:06       ` Juergen Gross
  0 siblings, 1 reply; 27+ messages in thread
From: Luca Fancellu @ 2021-11-17 11:52 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Xen-devel, Bertrand Marquis, wei.chen, Ian Jackson, Wei Liu,
	Anthony PERARD



> On 17 Nov 2021, at 11:10, Juergen Gross <jgross@suse.com> wrote:
> 
> On 17.11.21 10:57, Luca Fancellu wrote:
>> With the introduction of boot time cpupools, Xen can
>> create at boot time many cpupools different from the
>> cpupool with id 0.
>> Since these newly created cpupools can't have an
>> entry in Xenstore, name them with the same convention
>> used for the cpupool 0: Pool-<cpupool id>.
>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
>> ---
>>  tools/libs/light/libxl_utils.c | 13 ++++++++-----
>>  1 file changed, 8 insertions(+), 5 deletions(-)
>> diff --git a/tools/libs/light/libxl_utils.c b/tools/libs/light/libxl_utils.c
>> index 4699c4a0a3..d97d91ca98 100644
>> --- a/tools/libs/light/libxl_utils.c
>> +++ b/tools/libs/light/libxl_utils.c
>> @@ -147,13 +147,16 @@ int libxl_cpupool_qualifier_to_cpupoolid(libxl_ctx *ctx, const char *p,
>>  char *libxl_cpupoolid_to_name(libxl_ctx *ctx, uint32_t poolid)
>>  {
>>      unsigned int len;
>> -    char path[strlen("/local/pool") + 12];
>> +    char buffer[strlen("/local/pool") + 12];
>>      char *s;
>>  -    snprintf(path, sizeof(path), "/local/pool/%d/name", poolid);
>> -    s = xs_read(ctx->xsh, XBT_NULL, path, &len);
>> -    if (!s && (poolid == 0))
>> -        return strdup("Pool-0");
>> +    snprintf(buffer, sizeof(buffer), "/local/pool/%d/name", poolid);
>> +    s = xs_read(ctx->xsh, XBT_NULL, buffer, &len);
>> +    if (!s)
>> +    {
>> +        snprintf(buffer, sizeof(buffer), "Pool-%d", poolid);
>> +        return strdup(buffer);
>> +    }
>>      return s;
>>  }
>>  
> 
> This breaks libxl_cpupoolid_is_valid(), as it will now return always
> true, regardless whether the poolid is existing or not.

Hi Juergen,

Yes right, do you think I can use safely xc_cpupool_getinfo(…) when there is no entry
in xenstore?
I would check that the returned cpupool id is the same and if it isn’t or if I get a null
result, then I will return NULL to ensure libxl_cpupoolid_is_valid(…) works again.

Cheers,
Luca

> 
> 
> Juergen
> <OpenPGP_0xB0DE9DD628BF132F.asc>



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 2/2] tools/cpupools: Give a name to unnamed cpupools
  2021-11-17 11:52     ` Luca Fancellu
@ 2021-11-17 12:06       ` Juergen Gross
  0 siblings, 0 replies; 27+ messages in thread
From: Juergen Gross @ 2021-11-17 12:06 UTC (permalink / raw)
  To: Luca Fancellu
  Cc: Xen-devel, Bertrand Marquis, wei.chen, Ian Jackson, Wei Liu,
	Anthony PERARD


[-- Attachment #1.1.1: Type: text/plain, Size: 2456 bytes --]

On 17.11.21 12:52, Luca Fancellu wrote:
> 
> 
>> On 17 Nov 2021, at 11:10, Juergen Gross <jgross@suse.com> wrote:
>>
>> On 17.11.21 10:57, Luca Fancellu wrote:
>>> With the introduction of boot time cpupools, Xen can
>>> create at boot time many cpupools different from the
>>> cpupool with id 0.
>>> Since these newly created cpupools can't have an
>>> entry in Xenstore, name them with the same convention
>>> used for the cpupool 0: Pool-<cpupool id>.
>>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
>>> ---
>>>   tools/libs/light/libxl_utils.c | 13 ++++++++-----
>>>   1 file changed, 8 insertions(+), 5 deletions(-)
>>> diff --git a/tools/libs/light/libxl_utils.c b/tools/libs/light/libxl_utils.c
>>> index 4699c4a0a3..d97d91ca98 100644
>>> --- a/tools/libs/light/libxl_utils.c
>>> +++ b/tools/libs/light/libxl_utils.c
>>> @@ -147,13 +147,16 @@ int libxl_cpupool_qualifier_to_cpupoolid(libxl_ctx *ctx, const char *p,
>>>   char *libxl_cpupoolid_to_name(libxl_ctx *ctx, uint32_t poolid)
>>>   {
>>>       unsigned int len;
>>> -    char path[strlen("/local/pool") + 12];
>>> +    char buffer[strlen("/local/pool") + 12];
>>>       char *s;
>>>   -    snprintf(path, sizeof(path), "/local/pool/%d/name", poolid);
>>> -    s = xs_read(ctx->xsh, XBT_NULL, path, &len);
>>> -    if (!s && (poolid == 0))
>>> -        return strdup("Pool-0");
>>> +    snprintf(buffer, sizeof(buffer), "/local/pool/%d/name", poolid);
>>> +    s = xs_read(ctx->xsh, XBT_NULL, buffer, &len);
>>> +    if (!s)
>>> +    {
>>> +        snprintf(buffer, sizeof(buffer), "Pool-%d", poolid);
>>> +        return strdup(buffer);
>>> +    }
>>>       return s;
>>>   }
>>>   
>>
>> This breaks libxl_cpupoolid_is_valid(), as it will now return always
>> true, regardless whether the poolid is existing or not.
> 
> Hi Juergen,
> 
> Yes right, do you think I can use safely xc_cpupool_getinfo(…) when there is no entry
> in xenstore?
> I would check that the returned cpupool id is the same and if it isn’t or if I get a null
> result, then I will return NULL to ensure libxl_cpupoolid_is_valid(…) works again.

An alternative might be to let tools/helpers/xen-init-dom0.c let write
the missing cpupool entries (including for Pool-0) and drop the
poolid == 0 special casing from libxl_cpupoolid_to_name().

This should be rather easy by using xc_cpupool_getinfo() until it finds
no further cpupool.


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17 11:48     ` Julien Grall
@ 2021-11-17 12:07       ` Bertrand Marquis
  2021-11-17 19:10         ` Julien Grall
  0 siblings, 1 reply; 27+ messages in thread
From: Bertrand Marquis @ 2021-11-17 12:07 UTC (permalink / raw)
  To: Julien Grall
  Cc: Luca Fancellu, Xen-devel, Wei Chen, Stefano Stabellini,
	Volodymyr Babchuk, Andrew Cooper, George Dunlap, Ian Jackson,
	Jan Beulich, Wei Liu, Juergen Gross, Dario Faggioli,
	Anthony PERARD

Hi Julien,

> On 17 Nov 2021, at 11:48, Julien Grall <julien@xen.org> wrote:
> 
> On 17/11/2021 11:16, Bertrand Marquis wrote:
>> Hi Julien,
> 
> Hi,
> 
>>> On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
>>> 
>>> Hi Luca,
>>> 
>>> On 17/11/2021 09:57, Luca Fancellu wrote:
>>>> Currently Xen creates a default cpupool0 that contains all the cpu brought up
>>>> during boot and it assumes that the platform has only one kind of CPU.
>>>> This assumption does not hold on big.LITTLE platform, but putting different
>>>> type of CPU in the same cpupool can result in instability and security issues
>>>> for the domains running on the pool.
>>> 
>>> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
>>> 
>>>> For this reason this serie introduces an architecture specific way to create
>>>> different cpupool at boot time, this is particularly useful on ARM big.LITTLE
>>>> platform where there might be the need to have different cpupools for each type
>>>> of core, but also systems using NUMA can have different cpu pool for each node.
>>> 
>>> ... from my understanding, all the vCPUs of a domain have to be in the same cpupool. So with this approach it is not possible:
>>>   1) to have a mix of LITTLE and big vCPUs in the domain
>>>   2) to create a domain spanning across two NUMA nodes
>>> 
>>> So I think we need to make sure that any solutions we go through will not prevent us to implement those setups.
>> The point of this patch is to make all cores available without breaking the current behaviour of existing system.
> 
> I might be missing some context here. By breaking current behavior, do you mean user that may want to add "hmp-unsafe" on the command line?

Right, with hmp-unsafe the behaviour is now the same as without, only extra cores are parked in other cpupools.

So you have a point in fact that behaviour is changed for someone who was using hmp-unsafe before if this is activated.
The command line argument suggested by Jurgen definitely makes sense here.

We could instead do the following:
- when this is activated in the configuration, boot all cores and park them in different pools (depending on command line argument). Current behaviour not change if other pools are not used (but more cores will be on in xen)
- when hmp-unsafe is on, this feature will be turned of (if activated in configuration) and all cores would be added in the same pool.

What do you think ?

Cheers
Bertrand

> 
> Cheers,
> 
> -- 
> Julien Grall



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17 12:07       ` Bertrand Marquis
@ 2021-11-17 19:10         ` Julien Grall
  2021-11-18  2:19           ` Stefano Stabellini
  2021-11-18 15:29           ` Oleksandr Tyshchenko
  0 siblings, 2 replies; 27+ messages in thread
From: Julien Grall @ 2021-11-17 19:10 UTC (permalink / raw)
  To: Bertrand Marquis, Oleksandr Tyshchenko, Stefano Stabellini
  Cc: Luca Fancellu, Xen-devel, Wei Chen, Volodymyr Babchuk,
	George Dunlap, Juergen Gross, Dario Faggioli

(Prunning some CC to just leave Arm and sched folks)

On 17/11/2021 12:07, Bertrand Marquis wrote:
> Hi Julien,

Hi Bertrand,

>> On 17 Nov 2021, at 11:48, Julien Grall <julien@xen.org> wrote:
>>
>> On 17/11/2021 11:16, Bertrand Marquis wrote:
>>> Hi Julien,
>>
>> Hi,
>>
>>>> On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
>>>>
>>>> Hi Luca,
>>>>
>>>> On 17/11/2021 09:57, Luca Fancellu wrote:
>>>>> Currently Xen creates a default cpupool0 that contains all the cpu brought up
>>>>> during boot and it assumes that the platform has only one kind of CPU.
>>>>> This assumption does not hold on big.LITTLE platform, but putting different
>>>>> type of CPU in the same cpupool can result in instability and security issues
>>>>> for the domains running on the pool.
>>>>
>>>> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
>>>>
>>>>> For this reason this serie introduces an architecture specific way to create
>>>>> different cpupool at boot time, this is particularly useful on ARM big.LITTLE
>>>>> platform where there might be the need to have different cpupools for each type
>>>>> of core, but also systems using NUMA can have different cpu pool for each node.
>>>>
>>>> ... from my understanding, all the vCPUs of a domain have to be in the same cpupool. So with this approach it is not possible:
>>>>    1) to have a mix of LITTLE and big vCPUs in the domain
>>>>    2) to create a domain spanning across two NUMA nodes
>>>>
>>>> So I think we need to make sure that any solutions we go through will not prevent us to implement those setups.
>>> The point of this patch is to make all cores available without breaking the current behaviour of existing system.
>>
>> I might be missing some context here. By breaking current behavior, do you mean user that may want to add "hmp-unsafe" on the command line?
> 
> Right, with hmp-unsafe the behaviour is now the same as without, only extra cores are parked in other cpupools.
> 
> So you have a point in fact that behaviour is changed for someone who was using hmp-unsafe before if this is activated.
> The command line argument suggested by Jurgen definitely makes sense here.
> 
> We could instead do the following:
> - when this is activated in the configuration, boot all cores and park them in different pools (depending on command line argument). Current behaviour not change if other pools are not used (but more cores will be on in xen)

 From my understanding, it is possible to move a pCPU in/out a pool 
afterwards. So the security concern with big.LITTLE is still present, 
even though it would be difficult to hit it.

I am also concerned that it would be more difficult to detect any 
misconfiguration. So I think this option would still need to be turned 
on only if hmp-unsafe are the new command line one are both on.

If we want to enable it without hmp-unsafe on, we would need to at least 
lock the pools.

> - when hmp-unsafe is on, this feature will be turned of (if activated in configuration) and all cores would be added in the same pool.
> 
> What do you think ?

I am split. On one hand, this is making easier for someone to try 
big.LITTLE as you don't have manually pin vCPUs. On the other hand, this 
is handling a single use-case and you would need to use hmp-unsafe and 
pinning if you want to get more exotic setup (e.g. a domain with 
big.LITTLE).

Another possible solution is to pin dom0 vCPUs (AFAIK they are just 
sticky by default) and then create the pools from dom0 userspace. My 
assumption is for dom0less we would want to use pinning instead.

That said I would like to hear from Xilinx and EPAM as, IIRC, they are 
already using hmp-unsafe in production.

Cheers,

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17 19:10         ` Julien Grall
@ 2021-11-18  2:19           ` Stefano Stabellini
  2021-11-18  5:19             ` Juergen Gross
  2021-11-19 18:02             ` Julien Grall
  2021-11-18 15:29           ` Oleksandr Tyshchenko
  1 sibling, 2 replies; 27+ messages in thread
From: Stefano Stabellini @ 2021-11-18  2:19 UTC (permalink / raw)
  To: Julien Grall
  Cc: Bertrand Marquis, Oleksandr Tyshchenko, Stefano Stabellini,
	Luca Fancellu, Xen-devel, Wei Chen, Volodymyr Babchuk,
	George Dunlap, Juergen Gross, Dario Faggioli

On Wed, 17 Nov 2021, Julien Grall wrote:
> > > > > On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
> > > > > 
> > > > > Hi Luca,
> > > > > 
> > > > > On 17/11/2021 09:57, Luca Fancellu wrote:
> > > > > > Currently Xen creates a default cpupool0 that contains all the cpu
> > > > > > brought up
> > > > > > during boot and it assumes that the platform has only one kind of
> > > > > > CPU.
> > > > > > This assumption does not hold on big.LITTLE platform, but putting
> > > > > > different
> > > > > > type of CPU in the same cpupool can result in instability and
> > > > > > security issues
> > > > > > for the domains running on the pool.
> > > > > 
> > > > > I agree that you can't move a LITTLE vCPU to a big pCPU. However...
> > > > > 
> > > > > > For this reason this serie introduces an architecture specific way
> > > > > > to create
> > > > > > different cpupool at boot time, this is particularly useful on ARM
> > > > > > big.LITTLE
> > > > > > platform where there might be the need to have different cpupools
> > > > > > for each type
> > > > > > of core, but also systems using NUMA can have different cpu pool for
> > > > > > each node.
> > > > > 
> > > > > ... from my understanding, all the vCPUs of a domain have to be in the
> > > > > same cpupool. So with this approach it is not possible:
> > > > >    1) to have a mix of LITTLE and big vCPUs in the domain
> > > > >    2) to create a domain spanning across two NUMA nodes
> > > > > 
> > > > > So I think we need to make sure that any solutions we go through will
> > > > > not prevent us to implement those setups.
> > > > The point of this patch is to make all cores available without breaking
> > > > the current behaviour of existing system.
> > > 
> > > I might be missing some context here. By breaking current behavior, do you
> > > mean user that may want to add "hmp-unsafe" on the command line?
> > 
> > Right, with hmp-unsafe the behaviour is now the same as without, only extra
> > cores are parked in other cpupools.
> > 
> > So you have a point in fact that behaviour is changed for someone who was
> > using hmp-unsafe before if this is activated.
> > The command line argument suggested by Jurgen definitely makes sense here.
> > 
> > We could instead do the following:
> > - when this is activated in the configuration, boot all cores and park them
> > in different pools (depending on command line argument). Current behaviour
> > not change if other pools are not used (but more cores will be on in xen)
> 
> From my understanding, it is possible to move a pCPU in/out a pool afterwards.
> So the security concern with big.LITTLE is still present, even though it would
> be difficult to hit it.

As far as I know moving a pCPU in/out of a pool is something that cannot
happen automatically: it requires manual intervention to the user and it
is uncommon. We could print a warning or simply return error to prevent
the action from happening. Or something like:

"This action might result in memory corruptions and invalid behavior. Do
you want to continue? [Y/N]


> I am also concerned that it would be more difficult to detect any
> misconfiguration. So I think this option would still need to be turned on only
> if hmp-unsafe are the new command line one are both on.
> 
> If we want to enable it without hmp-unsafe on, we would need to at least lock
> the pools.

Locking the pools is a good idea.

My preference is not to tie this feature to the hmp-unsafe command line,
more on this below.


> > - when hmp-unsafe is on, this feature will be turned of (if activated in
> > configuration) and all cores would be added in the same pool.
> > 
> > What do you think ?
> 
> I am split. On one hand, this is making easier for someone to try big.LITTLE
> as you don't have manually pin vCPUs. On the other hand, this is handling a
> single use-case and you would need to use hmp-unsafe and pinning if you want
> to get more exotic setup (e.g. a domain with big.LITTLE).
> 
> Another possible solution is to pin dom0 vCPUs (AFAIK they are just sticky by
> default) and then create the pools from dom0 userspace. My assumption is for
> dom0less we would want to use pinning instead.
> 
> That said I would like to hear from Xilinx and EPAM as, IIRC, they are already
> using hmp-unsafe in production.

This discussion has been very interesting, it is cool to hear new ideas
like this one. I have a couple of thoughts to share.

First I think that the ability of creating cpupools at boot time is
super important. It goes way beyond big.LITTLE. It would be incredibly
useful to separate real-time (sched=null) and non-real-time
(sched=credit2) workloads. I think it will only become more important
going forward so I'd love to see an option to configure cpupools that
works for dom0less. It could be based on device tree properties rather
than kconfig options.

It is true that if we had the devicetree-based cpupool configuration I
mentioned, then somebody could use it to create cpupools matching
big.LITTLE. So "in theory" it solves the problem. However, I think that
for big.LITTLE it would be suboptimal. For big.LITTLE it would be best
if Xen configured the cpupools automatically rather than based on the
device tree configuration. That way, it is going to work automatically
without extra steps even in the simplest Xen setups.

So I think that it is a good idea to have a command line option (better
than a kconfig option) to trigger the MIDR-based cpupool creation at
boot time. The option could be called midr-cpupools=on/off or
hw-cpupools=on/off for example.

In terms of whether it should be the default or not, I don't feel
strongly about it. Unfortunately we (Xilinx) rely on a number of
non-default options already so we are already in the situation where we
have to be extra-careful at the options passed. I don't think that
adding one more would make a significant difference either way.


But my preference is *not* to tie the new command line option with
hmp-unsafe because if you use midr-cpupools and don't mess with the
pools then it is actually safe. We could even lock the cpupools like
Julien suggested or warn/return error on changing the cpupools. In this
scenario, it would be detrimental to also pass hmp-unsafe: it would
allow actually unsafe configurations that the user wanted to avoid by
using midr-cpupools. It would end up disabling checks we could put in
place to make midr-cpupools safer.

So in short I think it should be:

- midr-cpupools alone
cpupools created at boot, warning/errors on changing cpupools

- midr-cpupools + hmp-unsafe
cpupools created at boot, changing cpupools is allowed (we could still
warn but no errors)

- hmp-unsafe alone
same as today with hmp-unsafe


For the default as I said I don't have a strong preference. I think
midr-cpupools could be "on" be default.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time
  2021-11-17  9:57 ` [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time Luca Fancellu
  2021-11-17 11:05   ` Juergen Gross
@ 2021-11-18  2:59   ` Stefano Stabellini
  1 sibling, 0 replies; 27+ messages in thread
From: Stefano Stabellini @ 2021-11-18  2:59 UTC (permalink / raw)
  To: Luca Fancellu
  Cc: xen-devel, bertrand.marquis, wei.chen, Stefano Stabellini,
	Julien Grall, Volodymyr Babchuk, Andrew Cooper, George Dunlap,
	Ian Jackson, Jan Beulich, Wei Liu, Juergen Gross, Dario Faggioli

On Wed, 17 Nov 2021, Luca Fancellu wrote:
> Introduce an architecture specific way to create different
> cpupool at boot time, this is particularly useful on ARM
> biglittle system where there might be the need to have
> different cpupools for each type of core, but also systems
> using NUMA can have different cpu pool for each node.
> 
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> ---
>  xen/arch/arm/Kconfig       | 15 +++++++
>  xen/arch/arm/Makefile      |  1 +
>  xen/arch/arm/cpupool.c     | 84 ++++++++++++++++++++++++++++++++++++++
>  xen/common/sched/cpupool.c |  5 ++-
>  xen/include/xen/cpupool.h  | 30 ++++++++++++++
>  5 files changed, 134 insertions(+), 1 deletion(-)
>  create mode 100644 xen/arch/arm/cpupool.c
>  create mode 100644 xen/include/xen/cpupool.h
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index ecfa6822e4..4d7cc9f3bc 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -33,6 +33,21 @@ config ACPI
>  	  Advanced Configuration and Power Interface (ACPI) support for Xen is
>  	  an alternative to device tree on ARM64.
>  
> +config BOOT_TIME_CPUPOOLS
> +	bool "Create cpupools per cpu type at boot time."
> +	depends on ARM
> +	default n
> +	help
> +
> +	  Creates, during boot, a cpu pool for each type of CPU found on
> +	  the system. This option is useful on system with heterogeneous
> +	  types of core.

Let's use a command line option instead (or in addition?). The code is
so small that I would probably not bother with the kconfig option.


> +config MAX_BOOT_TIME_CPUPOOLS
> +	depends on BOOT_TIME_CPUPOOLS
> +	int "Maximum number of cpupools that can be created at boot time."
> +	default 16

Maybe it could be just a #define


>  config GICV3
>  	bool "GICv3 driver"
>  	depends on ARM_64 && !NEW_VGIC
> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
> index 07f634508e..0bb8b84750 100644
> --- a/xen/arch/arm/Makefile
> +++ b/xen/arch/arm/Makefile
> @@ -13,6 +13,7 @@ obj-$(CONFIG_HAS_ALTERNATIVE) += alternative.o
>  obj-y += bootfdt.init.o
>  obj-y += cpuerrata.o
>  obj-y += cpufeature.o
> +obj-$(CONFIG_BOOT_TIME_CPUPOOLS) += cpupool.o
>  obj-y += decode.o
>  obj-y += device.o
>  obj-$(CONFIG_IOREQ_SERVER) += dm.o
> diff --git a/xen/arch/arm/cpupool.c b/xen/arch/arm/cpupool.c
> new file mode 100644
> index 0000000000..550521e352
> --- /dev/null
> +++ b/xen/arch/arm/cpupool.c
> @@ -0,0 +1,84 @@
> +/******************************************************************************
> + * cpupool.c
> + *
> + * (C) 2021, arm
> + */
> +
> +#include <xen/cpumask.h>
> +#include <xen/cpupool.h>
> +#include <xen/err.h>
> +#include <xen/sched.h>
> +#include <asm/cpufeature.h>
> +#include <asm/processor.h>
> +#include "../../common/sched/private.h"
> +
> +typedef struct {
> +    register_t     midr;
> +    struct cpupool *pool;
> +} pool_t;
> +
> +static pool_t __initdata pool_cpu_map[CONFIG_MAX_BOOT_TIME_CPUPOOLS];
> +
> +void __init arch_allocate_cpupools(const cpumask_t *cpu_online_map,
> +                                   cpupool_create_t cpupool_create)
> +{
> +    unsigned int cpu, i;
> +
> +    /* First pool is the default pool0 associated with midr of the boot core */
> +    pool_cpu_map[0].midr = system_cpuinfo.midr.bits;
> +    pool_cpu_map[0].pool = cpupool0;
> +
> +    for_each_cpu ( cpu, cpu_online_map )
> +    {
> +        for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
> +            if ( !pool_cpu_map[i].pool ||
> +                 (cpu_data[cpu].midr.bits == pool_cpu_map[i].midr) )
> +                break;

Please use { } for readability


> +        if ( i < CONFIG_MAX_BOOT_TIME_CPUPOOLS )
> +        {
> +            if ( !pool_cpu_map[i].pool )
> +            {
> +                /* There is no pool for this cpu midr, create it */
> +                pool_cpu_map[i].midr = cpu_data[cpu].midr.bits;
> +                debugtrace_printk("Create pool %u for cpu MIDR: 0x%"
> +                                  PRIregister"\n", i, pool_cpu_map[i].midr);
> +                pool_cpu_map[i].pool =
> +                    cpupool_create(i, scheduler_get_default()->sched_id);
> +                BUG_ON(IS_ERR(pool_cpu_map[i].pool));
> +                cpupool_put(pool_cpu_map[i].pool);
> +            }
> +        }
> +        else
> +            panic("Could not create cpupool, maximum number reached (%u)",
> +                  (unsigned int)(CONFIG_MAX_BOOT_TIME_CPUPOOLS));
> +    }
> +
> +    /* Print useful information about the pools */
> +    for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
> +        if ( pool_cpu_map[i].pool )
> +            printk(XENLOG_INFO "Pool-%u contains cpu with MIDR: 0x%"
> +                   PRIregister"\n", i, pool_cpu_map[i].midr);

Please use { } for readability


> +}
> +
> +struct cpupool *__init arch_get_cpupool(unsigned int cpu)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
> +        if ( pool_cpu_map[i].pool &&
> +             (cpu_data[cpu].midr.bits == pool_cpu_map[i].midr) )
> +            return pool_cpu_map[i].pool;
> +
> +    return cpupool0;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
> index 8c6e6eb9cc..7389f04e28 100644
> --- a/xen/common/sched/cpupool.c
> +++ b/xen/common/sched/cpupool.c
> @@ -13,6 +13,7 @@
>  
>  #include <xen/cpu.h>
>  #include <xen/cpumask.h>
> +#include <xen/cpupool.h>
>  #include <xen/guest_access.h>
>  #include <xen/hypfs.h>
>  #include <xen/init.h>
> @@ -1231,12 +1232,14 @@ static int __init cpupool_init(void)
>      cpupool_put(cpupool0);
>      register_cpu_notifier(&cpu_nfb);
>  
> +    arch_allocate_cpupools(&cpu_online_map, cpupool_create);
> +
>      spin_lock(&cpupool_lock);
>  
>      cpumask_copy(&cpupool_free_cpus, &cpu_online_map);
>  
>      for_each_cpu ( cpu, &cpupool_free_cpus )
> -        cpupool_assign_cpu_locked(cpupool0, cpu);
> +        cpupool_assign_cpu_locked(arch_get_cpupool(cpu), cpu);
>  
>      spin_unlock(&cpupool_lock);
>  
> diff --git a/xen/include/xen/cpupool.h b/xen/include/xen/cpupool.h
> new file mode 100644
> index 0000000000..4b50af9e3d
> --- /dev/null
> +++ b/xen/include/xen/cpupool.h
> @@ -0,0 +1,30 @@
> +#ifndef __XEN_CPUPOOL_H
> +#define __XEN_CPUPOOL_H
> +
> +#include <xen/sched.h>
> +
> +typedef struct cpupool* (*cpupool_create_t)(unsigned int, unsigned int);
> +
> +#ifdef CONFIG_BOOT_TIME_CPUPOOLS
> +
> +void arch_allocate_cpupools(const cpumask_t *cpu_online_map,
> +                            cpupool_create_t cpupool_create);
> +
> +struct cpupool *arch_get_cpupool(unsigned int cpu);
> +
> +#else
> +
> +static inline __init
> +void arch_allocate_cpupools(const cpumask_t *cpu_online_map,
> +                            cpupool_create_t cpupool_create)
> +{
> +}
> +
> +static inline struct cpupool *__init arch_get_cpupool(unsigned int cpu)
> +{
> +    return cpupool0;
> +}
> +
> +#endif
> +
> +#endif /* __XEN_CPUPOOL_H */
> -- 
> 2.17.1
> 


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time
  2021-11-17 11:05   ` Juergen Gross
@ 2021-11-18  3:01     ` Stefano Stabellini
  2021-11-18  5:08       ` Juergen Gross
  0 siblings, 1 reply; 27+ messages in thread
From: Stefano Stabellini @ 2021-11-18  3:01 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Luca Fancellu, xen-devel, bertrand.marquis, wei.chen,
	Stefano Stabellini, Julien Grall, Volodymyr Babchuk,
	Andrew Cooper, George Dunlap, Ian Jackson, Jan Beulich, Wei Liu,
	Dario Faggioli

On Wed, 17 Nov 2021, Juergen Gross wrote:
> On 17.11.21 10:57, Luca Fancellu wrote:
> > Introduce an architecture specific way to create different
> > cpupool at boot time, this is particularly useful on ARM
> > biglittle system where there might be the need to have
> > different cpupools for each type of core, but also systems
> > using NUMA can have different cpu pool for each node.
> > 
> > Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> > ---
> >   xen/arch/arm/Kconfig       | 15 +++++++
> >   xen/arch/arm/Makefile      |  1 +
> >   xen/arch/arm/cpupool.c     | 84 ++++++++++++++++++++++++++++++++++++++
> >   xen/common/sched/cpupool.c |  5 ++-
> >   xen/include/xen/cpupool.h  | 30 ++++++++++++++
> >   5 files changed, 134 insertions(+), 1 deletion(-)
> >   create mode 100644 xen/arch/arm/cpupool.c
> >   create mode 100644 xen/include/xen/cpupool.h
> > 
> > diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> > index ecfa6822e4..4d7cc9f3bc 100644
> > --- a/xen/arch/arm/Kconfig
> > +++ b/xen/arch/arm/Kconfig
> > @@ -33,6 +33,21 @@ config ACPI
> >   	  Advanced Configuration and Power Interface (ACPI) support for Xen is
> >   	  an alternative to device tree on ARM64.
> >   +config BOOT_TIME_CPUPOOLS
> > +	bool "Create cpupools per cpu type at boot time."
> > +	depends on ARM
> > +	default n
> > +	help
> > +
> > +	  Creates, during boot, a cpu pool for each type of CPU found on
> > +	  the system. This option is useful on system with heterogeneous
> > +	  types of core.
> > +
> > +config MAX_BOOT_TIME_CPUPOOLS
> > +	depends on BOOT_TIME_CPUPOOLS
> > +	int "Maximum number of cpupools that can be created at boot time."
> > +	default 16
> > +
> >   config GICV3
> >   	bool "GICv3 driver"
> >   	depends on ARM_64 && !NEW_VGIC
> > diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
> > index 07f634508e..0bb8b84750 100644
> > --- a/xen/arch/arm/Makefile
> > +++ b/xen/arch/arm/Makefile
> > @@ -13,6 +13,7 @@ obj-$(CONFIG_HAS_ALTERNATIVE) += alternative.o
> >   obj-y += bootfdt.init.o
> >   obj-y += cpuerrata.o
> >   obj-y += cpufeature.o
> > +obj-$(CONFIG_BOOT_TIME_CPUPOOLS) += cpupool.o
> >   obj-y += decode.o
> >   obj-y += device.o
> >   obj-$(CONFIG_IOREQ_SERVER) += dm.o
> > diff --git a/xen/arch/arm/cpupool.c b/xen/arch/arm/cpupool.c
> > new file mode 100644
> > index 0000000000..550521e352
> > --- /dev/null
> > +++ b/xen/arch/arm/cpupool.c
> > @@ -0,0 +1,84 @@
> > +/******************************************************************************
> > + * cpupool.c
> > + *
> > + * (C) 2021, arm
> > + */
> > +
> > +#include <xen/cpumask.h>
> > +#include <xen/cpupool.h>
> > +#include <xen/err.h>
> > +#include <xen/sched.h>
> > +#include <asm/cpufeature.h>
> > +#include <asm/processor.h>
> > +#include "../../common/sched/private.h"
> 
> No, please don't do that.
> 
> You should try to add architecture agnostic service functions in
> common/sched/cpupool.c removing the need to include that file here.
> 
> > +
> > +typedef struct {
> > +    register_t     midr;
> > +    struct cpupool *pool;
> > +} pool_t;
> > +
> > +static pool_t __initdata pool_cpu_map[CONFIG_MAX_BOOT_TIME_CPUPOOLS];
> > +
> > +void __init arch_allocate_cpupools(const cpumask_t *cpu_online_map,
> > +                                   cpupool_create_t cpupool_create)
> 
> Drop the cpupool_create parameter here and ...
> 
> > +{
> > +    unsigned int cpu, i;
> > +
> > +    /* First pool is the default pool0 associated with midr of the boot
> > core */
> > +    pool_cpu_map[0].midr = system_cpuinfo.midr.bits;
> > +    pool_cpu_map[0].pool = cpupool0;
> > +
> > +    for_each_cpu ( cpu, cpu_online_map )
> > +    {
> > +        for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
> > +            if ( !pool_cpu_map[i].pool ||
> > +                 (cpu_data[cpu].midr.bits == pool_cpu_map[i].midr) )
> > +                break;
> > +
> > +        if ( i < CONFIG_MAX_BOOT_TIME_CPUPOOLS )
> > +        {
> > +            if ( !pool_cpu_map[i].pool )
> > +            {
> > +                /* There is no pool for this cpu midr, create it */
> > +                pool_cpu_map[i].midr = cpu_data[cpu].midr.bits;
> > +                debugtrace_printk("Create pool %u for cpu MIDR: 0x%"
> > +                                  PRIregister"\n", i,
> > pool_cpu_map[i].midr);
> > +                pool_cpu_map[i].pool =
> > +                    cpupool_create(i, scheduler_get_default()->sched_id);
> > +                BUG_ON(IS_ERR(pool_cpu_map[i].pool));
> > +                cpupool_put(pool_cpu_map[i].pool);
> 
> ... call a new wrapper in common/sched/cpupool.c taking just the id as
> parameter (e.g. "cpupool *cpupool_create_default(unsigned int id)")
> which will use the default scheduler and do the cpupool_create() and
> cpupool_put() calls internally.

What if sched=something is also passed on the command line? Shouldn't
that be used for all cpupools? Or maybe sched=something works because it
changes the "default scheduler"?


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time
  2021-11-18  3:01     ` Stefano Stabellini
@ 2021-11-18  5:08       ` Juergen Gross
  0 siblings, 0 replies; 27+ messages in thread
From: Juergen Gross @ 2021-11-18  5:08 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Luca Fancellu, xen-devel, bertrand.marquis, wei.chen,
	Julien Grall, Volodymyr Babchuk, Andrew Cooper, George Dunlap,
	Ian Jackson, Jan Beulich, Wei Liu, Dario Faggioli


[-- Attachment #1.1.1: Type: text/plain, Size: 5218 bytes --]

On 18.11.21 04:01, Stefano Stabellini wrote:
> On Wed, 17 Nov 2021, Juergen Gross wrote:
>> On 17.11.21 10:57, Luca Fancellu wrote:
>>> Introduce an architecture specific way to create different
>>> cpupool at boot time, this is particularly useful on ARM
>>> biglittle system where there might be the need to have
>>> different cpupools for each type of core, but also systems
>>> using NUMA can have different cpu pool for each node.
>>>
>>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
>>> ---
>>>    xen/arch/arm/Kconfig       | 15 +++++++
>>>    xen/arch/arm/Makefile      |  1 +
>>>    xen/arch/arm/cpupool.c     | 84 ++++++++++++++++++++++++++++++++++++++
>>>    xen/common/sched/cpupool.c |  5 ++-
>>>    xen/include/xen/cpupool.h  | 30 ++++++++++++++
>>>    5 files changed, 134 insertions(+), 1 deletion(-)
>>>    create mode 100644 xen/arch/arm/cpupool.c
>>>    create mode 100644 xen/include/xen/cpupool.h
>>>
>>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>>> index ecfa6822e4..4d7cc9f3bc 100644
>>> --- a/xen/arch/arm/Kconfig
>>> +++ b/xen/arch/arm/Kconfig
>>> @@ -33,6 +33,21 @@ config ACPI
>>>    	  Advanced Configuration and Power Interface (ACPI) support for Xen is
>>>    	  an alternative to device tree on ARM64.
>>>    +config BOOT_TIME_CPUPOOLS
>>> +	bool "Create cpupools per cpu type at boot time."
>>> +	depends on ARM
>>> +	default n
>>> +	help
>>> +
>>> +	  Creates, during boot, a cpu pool for each type of CPU found on
>>> +	  the system. This option is useful on system with heterogeneous
>>> +	  types of core.
>>> +
>>> +config MAX_BOOT_TIME_CPUPOOLS
>>> +	depends on BOOT_TIME_CPUPOOLS
>>> +	int "Maximum number of cpupools that can be created at boot time."
>>> +	default 16
>>> +
>>>    config GICV3
>>>    	bool "GICv3 driver"
>>>    	depends on ARM_64 && !NEW_VGIC
>>> diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
>>> index 07f634508e..0bb8b84750 100644
>>> --- a/xen/arch/arm/Makefile
>>> +++ b/xen/arch/arm/Makefile
>>> @@ -13,6 +13,7 @@ obj-$(CONFIG_HAS_ALTERNATIVE) += alternative.o
>>>    obj-y += bootfdt.init.o
>>>    obj-y += cpuerrata.o
>>>    obj-y += cpufeature.o
>>> +obj-$(CONFIG_BOOT_TIME_CPUPOOLS) += cpupool.o
>>>    obj-y += decode.o
>>>    obj-y += device.o
>>>    obj-$(CONFIG_IOREQ_SERVER) += dm.o
>>> diff --git a/xen/arch/arm/cpupool.c b/xen/arch/arm/cpupool.c
>>> new file mode 100644
>>> index 0000000000..550521e352
>>> --- /dev/null
>>> +++ b/xen/arch/arm/cpupool.c
>>> @@ -0,0 +1,84 @@
>>> +/******************************************************************************
>>> + * cpupool.c
>>> + *
>>> + * (C) 2021, arm
>>> + */
>>> +
>>> +#include <xen/cpumask.h>
>>> +#include <xen/cpupool.h>
>>> +#include <xen/err.h>
>>> +#include <xen/sched.h>
>>> +#include <asm/cpufeature.h>
>>> +#include <asm/processor.h>
>>> +#include "../../common/sched/private.h"
>>
>> No, please don't do that.
>>
>> You should try to add architecture agnostic service functions in
>> common/sched/cpupool.c removing the need to include that file here.
>>
>>> +
>>> +typedef struct {
>>> +    register_t     midr;
>>> +    struct cpupool *pool;
>>> +} pool_t;
>>> +
>>> +static pool_t __initdata pool_cpu_map[CONFIG_MAX_BOOT_TIME_CPUPOOLS];
>>> +
>>> +void __init arch_allocate_cpupools(const cpumask_t *cpu_online_map,
>>> +                                   cpupool_create_t cpupool_create)
>>
>> Drop the cpupool_create parameter here and ...
>>
>>> +{
>>> +    unsigned int cpu, i;
>>> +
>>> +    /* First pool is the default pool0 associated with midr of the boot
>>> core */
>>> +    pool_cpu_map[0].midr = system_cpuinfo.midr.bits;
>>> +    pool_cpu_map[0].pool = cpupool0;
>>> +
>>> +    for_each_cpu ( cpu, cpu_online_map )
>>> +    {
>>> +        for ( i = 0; i < CONFIG_MAX_BOOT_TIME_CPUPOOLS; i++ )
>>> +            if ( !pool_cpu_map[i].pool ||
>>> +                 (cpu_data[cpu].midr.bits == pool_cpu_map[i].midr) )
>>> +                break;
>>> +
>>> +        if ( i < CONFIG_MAX_BOOT_TIME_CPUPOOLS )
>>> +        {
>>> +            if ( !pool_cpu_map[i].pool )
>>> +            {
>>> +                /* There is no pool for this cpu midr, create it */
>>> +                pool_cpu_map[i].midr = cpu_data[cpu].midr.bits;
>>> +                debugtrace_printk("Create pool %u for cpu MIDR: 0x%"
>>> +                                  PRIregister"\n", i,
>>> pool_cpu_map[i].midr);
>>> +                pool_cpu_map[i].pool =
>>> +                    cpupool_create(i, scheduler_get_default()->sched_id);
>>> +                BUG_ON(IS_ERR(pool_cpu_map[i].pool));
>>> +                cpupool_put(pool_cpu_map[i].pool);
>>
>> ... call a new wrapper in common/sched/cpupool.c taking just the id as
>> parameter (e.g. "cpupool *cpupool_create_default(unsigned int id)")
>> which will use the default scheduler and do the cpupool_create() and
>> cpupool_put() calls internally.
> 
> What if sched=something is also passed on the command line? Shouldn't
> that be used for all cpupools? Or maybe sched=something works because it
> changes the "default scheduler"?
> 

Yes.


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-18  2:19           ` Stefano Stabellini
@ 2021-11-18  5:19             ` Juergen Gross
  2021-11-18 17:27               ` Stefano Stabellini
  2021-12-07  9:27               ` Luca Fancellu
  2021-11-19 18:02             ` Julien Grall
  1 sibling, 2 replies; 27+ messages in thread
From: Juergen Gross @ 2021-11-18  5:19 UTC (permalink / raw)
  To: Stefano Stabellini, Julien Grall
  Cc: Bertrand Marquis, Oleksandr Tyshchenko, Luca Fancellu, Xen-devel,
	Wei Chen, Volodymyr Babchuk, George Dunlap, Dario Faggioli


[-- Attachment #1.1.1: Type: text/plain, Size: 7985 bytes --]

On 18.11.21 03:19, Stefano Stabellini wrote:
> On Wed, 17 Nov 2021, Julien Grall wrote:
>>>>>> On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
>>>>>>
>>>>>> Hi Luca,
>>>>>>
>>>>>> On 17/11/2021 09:57, Luca Fancellu wrote:
>>>>>>> Currently Xen creates a default cpupool0 that contains all the cpu
>>>>>>> brought up
>>>>>>> during boot and it assumes that the platform has only one kind of
>>>>>>> CPU.
>>>>>>> This assumption does not hold on big.LITTLE platform, but putting
>>>>>>> different
>>>>>>> type of CPU in the same cpupool can result in instability and
>>>>>>> security issues
>>>>>>> for the domains running on the pool.
>>>>>>
>>>>>> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
>>>>>>
>>>>>>> For this reason this serie introduces an architecture specific way
>>>>>>> to create
>>>>>>> different cpupool at boot time, this is particularly useful on ARM
>>>>>>> big.LITTLE
>>>>>>> platform where there might be the need to have different cpupools
>>>>>>> for each type
>>>>>>> of core, but also systems using NUMA can have different cpu pool for
>>>>>>> each node.
>>>>>>
>>>>>> ... from my understanding, all the vCPUs of a domain have to be in the
>>>>>> same cpupool. So with this approach it is not possible:
>>>>>>     1) to have a mix of LITTLE and big vCPUs in the domain
>>>>>>     2) to create a domain spanning across two NUMA nodes
>>>>>>
>>>>>> So I think we need to make sure that any solutions we go through will
>>>>>> not prevent us to implement those setups.
>>>>> The point of this patch is to make all cores available without breaking
>>>>> the current behaviour of existing system.
>>>>
>>>> I might be missing some context here. By breaking current behavior, do you
>>>> mean user that may want to add "hmp-unsafe" on the command line?
>>>
>>> Right, with hmp-unsafe the behaviour is now the same as without, only extra
>>> cores are parked in other cpupools.
>>>
>>> So you have a point in fact that behaviour is changed for someone who was
>>> using hmp-unsafe before if this is activated.
>>> The command line argument suggested by Jurgen definitely makes sense here.
>>>
>>> We could instead do the following:
>>> - when this is activated in the configuration, boot all cores and park them
>>> in different pools (depending on command line argument). Current behaviour
>>> not change if other pools are not used (but more cores will be on in xen)
>>
>>  From my understanding, it is possible to move a pCPU in/out a pool afterwards.
>> So the security concern with big.LITTLE is still present, even though it would
>> be difficult to hit it.
> 
> As far as I know moving a pCPU in/out of a pool is something that cannot
> happen automatically: it requires manual intervention to the user and it
> is uncommon. We could print a warning or simply return error to prevent
> the action from happening. Or something like:
> 
> "This action might result in memory corruptions and invalid behavior. Do
> you want to continue? [Y/N]

This should only be rejected if the source and target pool are not
compatible. So a cpupool could be attributed to allow only specific
cpus (and maybe domains?) in it.

Otherwise it would be impossible to create new cpupools after boot on
such a system and populating them with cpus.

>> I am also concerned that it would be more difficult to detect any
>> misconfiguration. So I think this option would still need to be turned on only
>> if hmp-unsafe are the new command line one are both on.
>>
>> If we want to enable it without hmp-unsafe on, we would need to at least lock
>> the pools.
> 
> Locking the pools is a good idea.

This would be another option, yes.

> My preference is not to tie this feature to the hmp-unsafe command line,
> more on this below.

I agree.

>>> - when hmp-unsafe is on, this feature will be turned of (if activated in
>>> configuration) and all cores would be added in the same pool.
>>>
>>> What do you think ?
>>
>> I am split. On one hand, this is making easier for someone to try big.LITTLE
>> as you don't have manually pin vCPUs. On the other hand, this is handling a
>> single use-case and you would need to use hmp-unsafe and pinning if you want
>> to get more exotic setup (e.g. a domain with big.LITTLE).
>>
>> Another possible solution is to pin dom0 vCPUs (AFAIK they are just sticky by
>> default) and then create the pools from dom0 userspace. My assumption is for
>> dom0less we would want to use pinning instead.
>>
>> That said I would like to hear from Xilinx and EPAM as, IIRC, they are already
>> using hmp-unsafe in production.
> 
> This discussion has been very interesting, it is cool to hear new ideas
> like this one. I have a couple of thoughts to share.
> 
> First I think that the ability of creating cpupools at boot time is
> super important. It goes way beyond big.LITTLE. It would be incredibly
> useful to separate real-time (sched=null) and non-real-time
> (sched=credit2) workloads. I think it will only become more important
> going forward so I'd love to see an option to configure cpupools that
> works for dom0less. It could be based on device tree properties rather
> than kconfig options.

I think device tree AND command line option should be possible (think of
x86 here).

> It is true that if we had the devicetree-based cpupool configuration I
> mentioned, then somebody could use it to create cpupools matching
> big.LITTLE. So "in theory" it solves the problem. However, I think that
> for big.LITTLE it would be suboptimal. For big.LITTLE it would be best
> if Xen configured the cpupools automatically rather than based on the
> device tree configuration. That way, it is going to work automatically
> without extra steps even in the simplest Xen setups.
> 
> So I think that it is a good idea to have a command line option (better
> than a kconfig option) to trigger the MIDR-based cpupool creation at
> boot time. The option could be called midr-cpupools=on/off or
> hw-cpupools=on/off for example.

I'd rather go for:

cpupools=<options>

With e.g. <options>:

- "auto-midr": split system into cpupools based on MIDR
- "auto-numa": split system into cpupools based on NUMA nodes
- "cpus=<list of cpus>[,sched=<scheduler>]

This would be rather flexible without adding more and more options
doing similar things. Other sub-options could be added rather easily.

> In terms of whether it should be the default or not, I don't feel
> strongly about it. Unfortunately we (Xilinx) rely on a number of
> non-default options already so we are already in the situation where we
> have to be extra-careful at the options passed. I don't think that
> adding one more would make a significant difference either way.
> 
> 
> But my preference is *not* to tie the new command line option with
> hmp-unsafe because if you use midr-cpupools and don't mess with the
> pools then it is actually safe. We could even lock the cpupools like
> Julien suggested or warn/return error on changing the cpupools. In this
> scenario, it would be detrimental to also pass hmp-unsafe: it would
> allow actually unsafe configurations that the user wanted to avoid by
> using midr-cpupools. It would end up disabling checks we could put in
> place to make midr-cpupools safer.
> 
> So in short I think it should be:
> 
> - midr-cpupools alone
> cpupools created at boot, warning/errors on changing cpupools
> 
> - midr-cpupools + hmp-unsafe
> cpupools created at boot, changing cpupools is allowed (we could still
> warn but no errors)

I'd rather add an explicit ",locked" option to above cpupools parameter.

> 
> - hmp-unsafe alone
> same as today with hmp-unsafe
> 
> 
> For the default as I said I don't have a strong preference. I think
> midr-cpupools could be "on" be default.
> 

What about making this a Kconfig option?


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3135 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-17 19:10         ` Julien Grall
  2021-11-18  2:19           ` Stefano Stabellini
@ 2021-11-18 15:29           ` Oleksandr Tyshchenko
  1 sibling, 0 replies; 27+ messages in thread
From: Oleksandr Tyshchenko @ 2021-11-18 15:29 UTC (permalink / raw)
  To: Julien Grall
  Cc: Bertrand Marquis, Oleksandr Tyshchenko, Stefano Stabellini,
	Luca Fancellu, Xen-devel, Wei Chen, Volodymyr Babchuk,
	George Dunlap, Juergen Gross, Dario Faggioli

[-- Attachment #1: Type: text/plain, Size: 5569 bytes --]

On Wed, Nov 17, 2021 at 9:11 PM Julien Grall <julien@xen.org> wrote:

Hi Julien, all

[Sorry for the possible format issues]

(Prunning some CC to just leave Arm and sched folks)
>
> On 17/11/2021 12:07, Bertrand Marquis wrote:
> > Hi Julien,
>
> Hi Bertrand,
>
> >> On 17 Nov 2021, at 11:48, Julien Grall <julien@xen.org> wrote:
> >>
> >> On 17/11/2021 11:16, Bertrand Marquis wrote:
> >>> Hi Julien,
> >>
> >> Hi,
> >>
> >>>> On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
> >>>>
> >>>> Hi Luca,
> >>>>
> >>>> On 17/11/2021 09:57, Luca Fancellu wrote:
> >>>>> Currently Xen creates a default cpupool0 that contains all the cpu
> brought up
> >>>>> during boot and it assumes that the platform has only one kind of
> CPU.
> >>>>> This assumption does not hold on big.LITTLE platform, but putting
> different
> >>>>> type of CPU in the same cpupool can result in instability and
> security issues
> >>>>> for the domains running on the pool.
> >>>>
> >>>> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
> >>>>
> >>>>> For this reason this serie introduces an architecture specific way
> to create
> >>>>> different cpupool at boot time, this is particularly useful on ARM
> big.LITTLE
> >>>>> platform where there might be the need to have different cpupools
> for each type
> >>>>> of core, but also systems using NUMA can have different cpu pool for
> each node.
> >>>>
> >>>> ... from my understanding, all the vCPUs of a domain have to be in
> the same cpupool. So with this approach it is not possible:
> >>>>    1) to have a mix of LITTLE and big vCPUs in the domain
> >>>>    2) to create a domain spanning across two NUMA nodes
> >>>>
> >>>> So I think we need to make sure that any solutions we go through will
> not prevent us to implement those setups.
> >>> The point of this patch is to make all cores available without
> breaking the current behaviour of existing system.
> >>
> >> I might be missing some context here. By breaking current behavior, do
> you mean user that may want to add "hmp-unsafe" on the command line?
> >
> > Right, with hmp-unsafe the behaviour is now the same as without, only
> extra cores are parked in other cpupools.
> >
> > So you have a point in fact that behaviour is changed for someone who
> was using hmp-unsafe before if this is activated.
> > The command line argument suggested by Jurgen definitely makes sense
> here.
> >
> > We could instead do the following:
> > - when this is activated in the configuration, boot all cores and park
> them in different pools (depending on command line argument). Current
> behaviour not change if other pools are not used (but more cores will be on
> in xen)
>
>  From my understanding, it is possible to move a pCPU in/out a pool
> afterwards. So the security concern with big.LITTLE is still present,
> even though it would be difficult to hit it.
>
> I am also concerned that it would be more difficult to detect any
> misconfiguration. So I think this option would still need to be turned
> on only if hmp-unsafe are the new command line one are both on.
>
> If we want to enable it without hmp-unsafe on, we would need to at least
> lock the pools.
>
> > - when hmp-unsafe is on, this feature will be turned of (if activated in
> configuration) and all cores would be added in the same pool.
> >
> > What do you think ?
>
> I am split. On one hand, this is making easier for someone to try
> big.LITTLE as you don't have manually pin vCPUs. On the other hand, this
> is handling a single use-case and you would need to use hmp-unsafe and
> pinning if you want to get more exotic setup (e.g. a domain with
> big.LITTLE).
>
> Another possible solution is to pin dom0 vCPUs (AFAIK they are just
> sticky by default) and then create the pools from dom0 userspace. My
> assumption is for dom0less we would want to use pinning instead.
>
> That said I would like to hear from Xilinx and EPAM as, IIRC, they are
> already using hmp-unsafe in production.
>


We have been using hmp-unsafe since it's introduction, yes we are aware of
possible consequences of enabling that option (as different CPU types might
have different errata, cache lines, PA bits (?), etc), so we are trying to
deal with it carefully.
In our target system we pin Dom0's vCPUs to the pCPUs of the same type from
userspace via "xl vcpu-pin" command, for other domains more exotic
configuration can be used.

I share Stefano's opinion not to tie new feature (boot time MIDR-based
cpupools) to existing hmp-unsafe option and create new command line option
to control new feature.

The proposed algorithm (copy it here for the convenience) looks fine to me.
"So in short I think it should be:
- midr-cpupools alone
cpupools created at boot, warning/errors on changing cpupools
- midr-cpupools + hmp-unsafe
cpupools created at boot, changing cpupools is allowed (we could still
warn but no errors)
- hmp-unsafe alone
same as today with hmp-unsafe"

For the default behaviour I also don't have a strong preference.  One thing
is clear: default behaviour should be safe. I think, the command line
option is preferred over the config option as new feature can be
enabled/disabled without a need to re-build Xen, moreover, if we follow the
proposed algorithm route, the behaviour of new feature at runtime (whether
the changing of cpupools is allowed or not) are going to be depended on the
hmp-unsafe state which is under command line control currently. But, there
is no strong preference here as well.



>
> Cheers,
>
> --
> Julien Grall
>
>

-- 
Regards,

Oleksandr Tyshchenko

[-- Attachment #2: Type: text/html, Size: 7573 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-18  5:19             ` Juergen Gross
@ 2021-11-18 17:27               ` Stefano Stabellini
  2021-12-07  9:27               ` Luca Fancellu
  1 sibling, 0 replies; 27+ messages in thread
From: Stefano Stabellini @ 2021-11-18 17:27 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Stefano Stabellini, Julien Grall, Bertrand Marquis,
	Oleksandr Tyshchenko, Luca Fancellu, Xen-devel, Wei Chen,
	Volodymyr Babchuk, George Dunlap, Dario Faggioli

I like all your suggestions below!


On Thu, 18 Nov 2021, Juergen Gross wrote:
> On 18.11.21 03:19, Stefano Stabellini wrote:
> > On Wed, 17 Nov 2021, Julien Grall wrote:
> > > > > > > On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
> > > > > > > 
> > > > > > > Hi Luca,
> > > > > > > 
> > > > > > > On 17/11/2021 09:57, Luca Fancellu wrote:
> > > > > > > > Currently Xen creates a default cpupool0 that contains all the
> > > > > > > > cpu
> > > > > > > > brought up
> > > > > > > > during boot and it assumes that the platform has only one kind
> > > > > > > > of
> > > > > > > > CPU.
> > > > > > > > This assumption does not hold on big.LITTLE platform, but
> > > > > > > > putting
> > > > > > > > different
> > > > > > > > type of CPU in the same cpupool can result in instability and
> > > > > > > > security issues
> > > > > > > > for the domains running on the pool.
> > > > > > > 
> > > > > > > I agree that you can't move a LITTLE vCPU to a big pCPU.
> > > > > > > However...
> > > > > > > 
> > > > > > > > For this reason this serie introduces an architecture specific
> > > > > > > > way
> > > > > > > > to create
> > > > > > > > different cpupool at boot time, this is particularly useful on
> > > > > > > > ARM
> > > > > > > > big.LITTLE
> > > > > > > > platform where there might be the need to have different
> > > > > > > > cpupools
> > > > > > > > for each type
> > > > > > > > of core, but also systems using NUMA can have different cpu pool
> > > > > > > > for
> > > > > > > > each node.
> > > > > > > 
> > > > > > > ... from my understanding, all the vCPUs of a domain have to be in
> > > > > > > the
> > > > > > > same cpupool. So with this approach it is not possible:
> > > > > > >     1) to have a mix of LITTLE and big vCPUs in the domain
> > > > > > >     2) to create a domain spanning across two NUMA nodes
> > > > > > > 
> > > > > > > So I think we need to make sure that any solutions we go through
> > > > > > > will
> > > > > > > not prevent us to implement those setups.
> > > > > > The point of this patch is to make all cores available without
> > > > > > breaking
> > > > > > the current behaviour of existing system.
> > > > > 
> > > > > I might be missing some context here. By breaking current behavior, do
> > > > > you
> > > > > mean user that may want to add "hmp-unsafe" on the command line?
> > > > 
> > > > Right, with hmp-unsafe the behaviour is now the same as without, only
> > > > extra
> > > > cores are parked in other cpupools.
> > > > 
> > > > So you have a point in fact that behaviour is changed for someone who
> > > > was
> > > > using hmp-unsafe before if this is activated.
> > > > The command line argument suggested by Jurgen definitely makes sense
> > > > here.
> > > > 
> > > > We could instead do the following:
> > > > - when this is activated in the configuration, boot all cores and park
> > > > them
> > > > in different pools (depending on command line argument). Current
> > > > behaviour
> > > > not change if other pools are not used (but more cores will be on in
> > > > xen)
> > > 
> > >  From my understanding, it is possible to move a pCPU in/out a pool
> > > afterwards.
> > > So the security concern with big.LITTLE is still present, even though it
> > > would
> > > be difficult to hit it.
> > 
> > As far as I know moving a pCPU in/out of a pool is something that cannot
> > happen automatically: it requires manual intervention to the user and it
> > is uncommon. We could print a warning or simply return error to prevent
> > the action from happening. Or something like:
> > 
> > "This action might result in memory corruptions and invalid behavior. Do
> > you want to continue? [Y/N]
> 
> This should only be rejected if the source and target pool are not
> compatible. So a cpupool could be attributed to allow only specific
> cpus (and maybe domains?) in it.

Yes you are right.


> Otherwise it would be impossible to create new cpupools after boot on
> such a system and populating them with cpus.
>
> > > I am also concerned that it would be more difficult to detect any
> > > misconfiguration. So I think this option would still need to be turned on
> > > only
> > > if hmp-unsafe are the new command line one are both on.
> > > 
> > > If we want to enable it without hmp-unsafe on, we would need to at least
> > > lock
> > > the pools.
> > 
> > Locking the pools is a good idea.
> 
> This would be another option, yes.
> 
> > My preference is not to tie this feature to the hmp-unsafe command line,
> > more on this below.
> 
> I agree.
> 
> > > > - when hmp-unsafe is on, this feature will be turned of (if activated in
> > > > configuration) and all cores would be added in the same pool.
> > > > 
> > > > What do you think ?
> > > 
> > > I am split. On one hand, this is making easier for someone to try
> > > big.LITTLE
> > > as you don't have manually pin vCPUs. On the other hand, this is handling
> > > a
> > > single use-case and you would need to use hmp-unsafe and pinning if you
> > > want
> > > to get more exotic setup (e.g. a domain with big.LITTLE).
> > > 
> > > Another possible solution is to pin dom0 vCPUs (AFAIK they are just sticky
> > > by
> > > default) and then create the pools from dom0 userspace. My assumption is
> > > for
> > > dom0less we would want to use pinning instead.
> > > 
> > > That said I would like to hear from Xilinx and EPAM as, IIRC, they are
> > > already
> > > using hmp-unsafe in production.
> > 
> > This discussion has been very interesting, it is cool to hear new ideas
> > like this one. I have a couple of thoughts to share.
> > 
> > First I think that the ability of creating cpupools at boot time is
> > super important. It goes way beyond big.LITTLE. It would be incredibly
> > useful to separate real-time (sched=null) and non-real-time
> > (sched=credit2) workloads. I think it will only become more important
> > going forward so I'd love to see an option to configure cpupools that
> > works for dom0less. It could be based on device tree properties rather
> > than kconfig options.
> 
> I think device tree AND command line option should be possible (think of
> x86 here).

Sure


> > It is true that if we had the devicetree-based cpupool configuration I
> > mentioned, then somebody could use it to create cpupools matching
> > big.LITTLE. So "in theory" it solves the problem. However, I think that
> > for big.LITTLE it would be suboptimal. For big.LITTLE it would be best
> > if Xen configured the cpupools automatically rather than based on the
> > device tree configuration. That way, it is going to work automatically
> > without extra steps even in the simplest Xen setups.
> > 
> > So I think that it is a good idea to have a command line option (better
> > than a kconfig option) to trigger the MIDR-based cpupool creation at
> > boot time. The option could be called midr-cpupools=on/off or
> > hw-cpupools=on/off for example.
> 
> I'd rather go for:
> 
> cpupools=<options>
> 
> With e.g. <options>:
> 
> - "auto-midr": split system into cpupools based on MIDR
> - "auto-numa": split system into cpupools based on NUMA nodes
> - "cpus=<list of cpus>[,sched=<scheduler>]
> 
> This would be rather flexible without adding more and more options
> doing similar things. Other sub-options could be added rather easily.

I like this


> > In terms of whether it should be the default or not, I don't feel
> > strongly about it. Unfortunately we (Xilinx) rely on a number of
> > non-default options already so we are already in the situation where we
> > have to be extra-careful at the options passed. I don't think that
> > adding one more would make a significant difference either way.
> > 
> > 
> > But my preference is *not* to tie the new command line option with
> > hmp-unsafe because if you use midr-cpupools and don't mess with the
> > pools then it is actually safe. We could even lock the cpupools like
> > Julien suggested or warn/return error on changing the cpupools. In this
> > scenario, it would be detrimental to also pass hmp-unsafe: it would
> > allow actually unsafe configurations that the user wanted to avoid by
> > using midr-cpupools. It would end up disabling checks we could put in
> > place to make midr-cpupools safer.
> > 
> > So in short I think it should be:
> > 
> > - midr-cpupools alone
> > cpupools created at boot, warning/errors on changing cpupools
> > 
> > - midr-cpupools + hmp-unsafe
> > cpupools created at boot, changing cpupools is allowed (we could still
> > warn but no errors)
> 
> I'd rather add an explicit ",locked" option to above cpupools parameter.

yeah that's better

 
> > 
> > - hmp-unsafe alone
> > same as today with hmp-unsafe
> > 
> > 
> > For the default as I said I don't have a strong preference. I think
> > midr-cpupools could be "on" be default.
> > 
> 
> What about making this a Kconfig option?

Could also be a good idea


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-18  2:19           ` Stefano Stabellini
  2021-11-18  5:19             ` Juergen Gross
@ 2021-11-19 18:02             ` Julien Grall
  2021-11-19 18:55               ` Stefano Stabellini
  1 sibling, 1 reply; 27+ messages in thread
From: Julien Grall @ 2021-11-19 18:02 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Bertrand Marquis, Oleksandr Tyshchenko, Luca Fancellu, Xen-devel,
	Wei Chen, Volodymyr Babchuk, George Dunlap, Juergen Gross,
	Dario Faggioli

Hi Stefano,

On 18/11/2021 02:19, Stefano Stabellini wrote:
> On Wed, 17 Nov 2021, Julien Grall wrote:
>>>>>> On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
>>>>>>
>>>>>> Hi Luca,
>>>>>>
>>>>>> On 17/11/2021 09:57, Luca Fancellu wrote:
>>>>>>> Currently Xen creates a default cpupool0 that contains all the cpu
>>>>>>> brought up
>>>>>>> during boot and it assumes that the platform has only one kind of
>>>>>>> CPU.
>>>>>>> This assumption does not hold on big.LITTLE platform, but putting
>>>>>>> different
>>>>>>> type of CPU in the same cpupool can result in instability and
>>>>>>> security issues
>>>>>>> for the domains running on the pool.
>>>>>>
>>>>>> I agree that you can't move a LITTLE vCPU to a big pCPU. However...
>>>>>>
>>>>>>> For this reason this serie introduces an architecture specific way
>>>>>>> to create
>>>>>>> different cpupool at boot time, this is particularly useful on ARM
>>>>>>> big.LITTLE
>>>>>>> platform where there might be the need to have different cpupools
>>>>>>> for each type
>>>>>>> of core, but also systems using NUMA can have different cpu pool for
>>>>>>> each node.
>>>>>>
>>>>>> ... from my understanding, all the vCPUs of a domain have to be in the
>>>>>> same cpupool. So with this approach it is not possible:
>>>>>>     1) to have a mix of LITTLE and big vCPUs in the domain
>>>>>>     2) to create a domain spanning across two NUMA nodes
>>>>>>
>>>>>> So I think we need to make sure that any solutions we go through will
>>>>>> not prevent us to implement those setups.
>>>>> The point of this patch is to make all cores available without breaking
>>>>> the current behaviour of existing system.
>>>>
>>>> I might be missing some context here. By breaking current behavior, do you
>>>> mean user that may want to add "hmp-unsafe" on the command line?
>>>
>>> Right, with hmp-unsafe the behaviour is now the same as without, only extra
>>> cores are parked in other cpupools.
>>>
>>> So you have a point in fact that behaviour is changed for someone who was
>>> using hmp-unsafe before if this is activated.
>>> The command line argument suggested by Jurgen definitely makes sense here.
>>>
>>> We could instead do the following:
>>> - when this is activated in the configuration, boot all cores and park them
>>> in different pools (depending on command line argument). Current behaviour
>>> not change if other pools are not used (but more cores will be on in xen)
>>
>>  From my understanding, it is possible to move a pCPU in/out a pool afterwards.
>> So the security concern with big.LITTLE is still present, even though it would
>> be difficult to hit it.
> 
> As far as I know moving a pCPU in/out of a pool is something that cannot
> happen automatically: it requires manual intervention to the user and it
> is uncommon. 
> We could print a warning or simply return error to prevent
> the action from happening. Or something like:
> 
> "This action might result in memory corruptions and invalid behavior. Do
> you want to continue? [Y/N]
> 
> 
>> I am also concerned that it would be more difficult to detect any
>> misconfiguration. So I think this option would still need to be turned on only
>> if hmp-unsafe are the new command line one are both on.
>>
>> If we want to enable it without hmp-unsafe on, we would need to at least lock
>> the pools.
> 
> Locking the pools is a good idea.
> 
> My preference is not to tie this feature to the hmp-unsafe command line,
> more on this below.

The only reason I suggested to tie with hmp-unsafe is if you (or anyone 
else) really wanted to use a version where the pool are unlocked.

If we are going to implement the lock, then I think the hmp-unsafe would 
not be necessary for such configuration.

> 
> 
>>> - when hmp-unsafe is on, this feature will be turned of (if activated in
>>> configuration) and all cores would be added in the same pool.
>>>
>>> What do you think ?
>>
>> I am split. On one hand, this is making easier for someone to try big.LITTLE
>> as you don't have manually pin vCPUs. On the other hand, this is handling a
>> single use-case and you would need to use hmp-unsafe and pinning if you want
>> to get more exotic setup (e.g. a domain with big.LITTLE).
>>
>> Another possible solution is to pin dom0 vCPUs (AFAIK they are just sticky by
>> default) and then create the pools from dom0 userspace. My assumption is for
>> dom0less we would want to use pinning instead.
>>
>> That said I would like to hear from Xilinx and EPAM as, IIRC, they are already
>> using hmp-unsafe in production.
> 
> This discussion has been very interesting, it is cool to hear new ideas
> like this one. I have a couple of thoughts to share.
> 
> First I think that the ability of creating cpupools at boot time is
> super important. It goes way beyond big.LITTLE. It would be incredibly
> useful to separate real-time (sched=null) and non-real-time
> (sched=credit2) workloads. I think it will only become more important
> going forward so I'd love to see an option to configure cpupools that
> works for dom0less. It could be based on device tree properties rather
> than kconfig options.
> 
> It is true that if we had the devicetree-based cpupool configuration I
> mentioned, then somebody could use it to create cpupools matching
> big.LITTLE. > So "in theory" it solves the problem. However, I think that
> for big.LITTLE it would be suboptimal. For big.LITTLE it would be best
> if Xen configured the cpupools automatically rather than based on the
> device tree configuration. 

This brings one question. How do Linux detect and use big.LITTLE? Is 
there a Device-Tree binding?

[...]

> So I think that it is a good idea to have a command line option (better
> than a kconfig option) to trigger the MIDR-based cpupool creation at
> boot time. The option could be called midr-cpupools=on/off or
> hw-cpupools=on/off for example.
> In terms of whether it should be the default or not, I don't feel
> strongly about it.

On by default means this will security supported and we need to be 
reasonably confident this cannot be broken.

In this case, I am not only referring to moving a pCPU between pool but 
also Xen doing the right thing (e.g. finding the minimum cache line size).


[...]

> - midr-cpupools alone
> cpupools created at boot, warning/errors on changing cpupools >
> - midr-cpupools + hmp-unsafe
> cpupools created at boot, changing cpupools is allowed (we could still
> warn but no errors)
> 
> - hmp-unsafe alone
> same as today with hmp-unsafe

I like better Juergen's version. But before agreeing on the command line 
, I would like to understand how Linux is dealing with big.LITTLE today 
(see my question above).

> 
> For the default as I said I don't have a strong preference. I think
> midr-cpupools could be "on" be default.

I think this should be off at the beginning until the feature is matured 
enough. We are soon opening the tree again for the next development 
cycle. So I think we have enough time to make sure everything work 
properly to have turned on by default before next release.


Cheers,

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-19 18:02             ` Julien Grall
@ 2021-11-19 18:55               ` Stefano Stabellini
  2021-11-23 13:54                 ` Julien Grall
  0 siblings, 1 reply; 27+ messages in thread
From: Stefano Stabellini @ 2021-11-19 18:55 UTC (permalink / raw)
  To: Julien Grall
  Cc: Stefano Stabellini, Bertrand Marquis, Oleksandr Tyshchenko,
	Luca Fancellu, Xen-devel, Wei Chen, Volodymyr Babchuk,
	George Dunlap, Juergen Gross, Dario Faggioli

On Fri, 19 Nov 2021, Julien Grall wrote:
> Hi Stefano,
> 
> On 18/11/2021 02:19, Stefano Stabellini wrote:
> > On Wed, 17 Nov 2021, Julien Grall wrote:
> > > > > > > On 17 Nov 2021, at 10:26, Julien Grall <julien@xen.org> wrote:
> > > > > > > 
> > > > > > > Hi Luca,
> > > > > > > 
> > > > > > > On 17/11/2021 09:57, Luca Fancellu wrote:
> > > > > > > > Currently Xen creates a default cpupool0 that contains all the
> > > > > > > > cpu
> > > > > > > > brought up
> > > > > > > > during boot and it assumes that the platform has only one kind
> > > > > > > > of
> > > > > > > > CPU.
> > > > > > > > This assumption does not hold on big.LITTLE platform, but
> > > > > > > > putting
> > > > > > > > different
> > > > > > > > type of CPU in the same cpupool can result in instability and
> > > > > > > > security issues
> > > > > > > > for the domains running on the pool.
> > > > > > > 
> > > > > > > I agree that you can't move a LITTLE vCPU to a big pCPU.
> > > > > > > However...
> > > > > > > 
> > > > > > > > For this reason this serie introduces an architecture specific
> > > > > > > > way
> > > > > > > > to create
> > > > > > > > different cpupool at boot time, this is particularly useful on
> > > > > > > > ARM
> > > > > > > > big.LITTLE
> > > > > > > > platform where there might be the need to have different
> > > > > > > > cpupools
> > > > > > > > for each type
> > > > > > > > of core, but also systems using NUMA can have different cpu pool
> > > > > > > > for
> > > > > > > > each node.
> > > > > > > 
> > > > > > > ... from my understanding, all the vCPUs of a domain have to be in
> > > > > > > the
> > > > > > > same cpupool. So with this approach it is not possible:
> > > > > > >     1) to have a mix of LITTLE and big vCPUs in the domain
> > > > > > >     2) to create a domain spanning across two NUMA nodes
> > > > > > > 
> > > > > > > So I think we need to make sure that any solutions we go through
> > > > > > > will
> > > > > > > not prevent us to implement those setups.
> > > > > > The point of this patch is to make all cores available without
> > > > > > breaking
> > > > > > the current behaviour of existing system.
> > > > > 
> > > > > I might be missing some context here. By breaking current behavior, do
> > > > > you
> > > > > mean user that may want to add "hmp-unsafe" on the command line?
> > > > 
> > > > Right, with hmp-unsafe the behaviour is now the same as without, only
> > > > extra
> > > > cores are parked in other cpupools.
> > > > 
> > > > So you have a point in fact that behaviour is changed for someone who
> > > > was
> > > > using hmp-unsafe before if this is activated.
> > > > The command line argument suggested by Jurgen definitely makes sense
> > > > here.
> > > > 
> > > > We could instead do the following:
> > > > - when this is activated in the configuration, boot all cores and park
> > > > them
> > > > in different pools (depending on command line argument). Current
> > > > behaviour
> > > > not change if other pools are not used (but more cores will be on in
> > > > xen)
> > > 
> > >  From my understanding, it is possible to move a pCPU in/out a pool
> > > afterwards.
> > > So the security concern with big.LITTLE is still present, even though it
> > > would
> > > be difficult to hit it.
> > 
> > As far as I know moving a pCPU in/out of a pool is something that cannot
> > happen automatically: it requires manual intervention to the user and it
> > is uncommon. We could print a warning or simply return error to prevent
> > the action from happening. Or something like:
> > 
> > "This action might result in memory corruptions and invalid behavior. Do
> > you want to continue? [Y/N]
> > 
> > 
> > > I am also concerned that it would be more difficult to detect any
> > > misconfiguration. So I think this option would still need to be turned on
> > > only
> > > if hmp-unsafe are the new command line one are both on.
> > > 
> > > If we want to enable it without hmp-unsafe on, we would need to at least
> > > lock
> > > the pools.
> > 
> > Locking the pools is a good idea.
> > 
> > My preference is not to tie this feature to the hmp-unsafe command line,
> > more on this below.
> 
> The only reason I suggested to tie with hmp-unsafe is if you (or anyone else)
> really wanted to use a version where the pool are unlocked.
> 
> If we are going to implement the lock, then I think the hmp-unsafe would not
> be necessary for such configuration.
> 
> > 
> > 
> > > > - when hmp-unsafe is on, this feature will be turned of (if activated in
> > > > configuration) and all cores would be added in the same pool.
> > > > 
> > > > What do you think ?
> > > 
> > > I am split. On one hand, this is making easier for someone to try
> > > big.LITTLE
> > > as you don't have manually pin vCPUs. On the other hand, this is handling
> > > a
> > > single use-case and you would need to use hmp-unsafe and pinning if you
> > > want
> > > to get more exotic setup (e.g. a domain with big.LITTLE).
> > > 
> > > Another possible solution is to pin dom0 vCPUs (AFAIK they are just sticky
> > > by
> > > default) and then create the pools from dom0 userspace. My assumption is
> > > for
> > > dom0less we would want to use pinning instead.
> > > 
> > > That said I would like to hear from Xilinx and EPAM as, IIRC, they are
> > > already
> > > using hmp-unsafe in production.
> > 
> > This discussion has been very interesting, it is cool to hear new ideas
> > like this one. I have a couple of thoughts to share.
> > 
> > First I think that the ability of creating cpupools at boot time is
> > super important. It goes way beyond big.LITTLE. It would be incredibly
> > useful to separate real-time (sched=null) and non-real-time
> > (sched=credit2) workloads. I think it will only become more important
> > going forward so I'd love to see an option to configure cpupools that
> > works for dom0less. It could be based on device tree properties rather
> > than kconfig options.
> > 
> > It is true that if we had the devicetree-based cpupool configuration I
> > mentioned, then somebody could use it to create cpupools matching
> > big.LITTLE. > So "in theory" it solves the problem. However, I think that
> > for big.LITTLE it would be suboptimal. For big.LITTLE it would be best
> > if Xen configured the cpupools automatically rather than based on the
> > device tree configuration. 
> 
> This brings one question. How do Linux detect and use big.LITTLE? Is there a
> Device-Tree binding?
> 
> [...]
> 
> > So I think that it is a good idea to have a command line option (better
> > than a kconfig option) to trigger the MIDR-based cpupool creation at
> > boot time. The option could be called midr-cpupools=on/off or
> > hw-cpupools=on/off for example.
> > In terms of whether it should be the default or not, I don't feel
> > strongly about it.
> 
> On by default means this will security supported and we need to be reasonably
> confident this cannot be broken.
> 
> In this case, I am not only referring to moving a pCPU between pool but also
> Xen doing the right thing (e.g. finding the minimum cache line size).
> 
> 
> [...]
> 
> > - midr-cpupools alone
> > cpupools created at boot, warning/errors on changing cpupools >
> > - midr-cpupools + hmp-unsafe
> > cpupools created at boot, changing cpupools is allowed (we could still
> > warn but no errors)
> > 
> > - hmp-unsafe alone
> > same as today with hmp-unsafe
> 
> I like better Juergen's version. But before agreeing on the command line , I
> would like to understand how Linux is dealing with big.LITTLE today (see my
> question above).

I also like Juergen's version better :-)

The device tree binding that covers big.LITTLE is the same that covers
cpus: Documentation/devicetree/bindings/arm/cpus.yaml

So basically, you can tell it is a big.LITTLE system because the
compatible strings differ between certain cpus, e.g.:

      cpu@0 {
        device_type = "cpu";
        compatible = "arm,cortex-a15";
        reg = <0x0>;
      };

      cpu@1 {
        device_type = "cpu";
        compatible = "arm,cortex-a15";
        reg = <0x1>;
      };

      cpu@100 {
        device_type = "cpu";
        compatible = "arm,cortex-a7";
        reg = <0x100>;
      };

      cpu@101 {
        device_type = "cpu";
        compatible = "arm,cortex-a7";
        reg = <0x101>;
      };


> > For the default as I said I don't have a strong preference. I think
> > midr-cpupools could be "on" be default.
> 
> I think this should be off at the beginning until the feature is matured
> enough. We are soon opening the tree again for the next development cycle. So
> I think we have enough time to make sure everything work properly to have
> turned on by default before next release.

That's fine


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-19 18:55               ` Stefano Stabellini
@ 2021-11-23 13:54                 ` Julien Grall
  2021-11-23 14:45                   ` Bertrand Marquis
  0 siblings, 1 reply; 27+ messages in thread
From: Julien Grall @ 2021-11-23 13:54 UTC (permalink / raw)
  To: Stefano Stabellini
  Cc: Bertrand Marquis, Oleksandr Tyshchenko, Luca Fancellu, Xen-devel,
	Wei Chen, Volodymyr Babchuk, George Dunlap, Juergen Gross,
	Dario Faggioli

Hi Stefano,

On 19/11/2021 18:55, Stefano Stabellini wrote:
> On Fri, 19 Nov 2021, Julien Grall wrote:
>> I like better Juergen's version. But before agreeing on the command line , I
>> would like to understand how Linux is dealing with big.LITTLE today (see my
>> question above).
> 
> I also like Juergen's version better :-)
> 
> The device tree binding that covers big.LITTLE is the same that covers
> cpus: Documentation/devicetree/bindings/arm/cpus.yaml

Are you sure? I found 
Documentation/devicetree/bindings/arm/cpu-capacity.txt.

> 
> So basically, you can tell it is a big.LITTLE system because the
> compatible strings differ between certain cpus, e.g.:
> 
>        cpu@0 {
>          device_type = "cpu";
>          compatible = "arm,cortex-a15";
>          reg = <0x0>;
>        };
> 
>        cpu@1 {
>          device_type = "cpu";
>          compatible = "arm,cortex-a15";
>          reg = <0x1>;
>        };
> 
>        cpu@100 {
>          device_type = "cpu";
>          compatible = "arm,cortex-a7";
>          reg = <0x100>;
>        };
> 
>        cpu@101 {
>          device_type = "cpu";
>          compatible = "arm,cortex-a7";
>          reg = <0x101>;
>        };

I have not found any code in Linux using the compatible. Instead, they 
all seem to use the cpu-map (see drivers/base/arch_topology.c).

Anyway, to me it would be better to parse the Device-Tree over using the 
MIDR. The advantage is we can cover a wide range of cases (you may have 
processor with the same MIDR but different frequency). For now, we could 
create a cpupool per cluster.

Cheers,

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-23 13:54                 ` Julien Grall
@ 2021-11-23 14:45                   ` Bertrand Marquis
  2021-11-23 22:01                     ` Stefano Stabellini
  0 siblings, 1 reply; 27+ messages in thread
From: Bertrand Marquis @ 2021-11-23 14:45 UTC (permalink / raw)
  To: Julien Grall
  Cc: Stefano Stabellini, Oleksandr Tyshchenko, Luca Fancellu,
	Xen-devel, Wei Chen, Volodymyr Babchuk, George Dunlap,
	Juergen Gross, Dario Faggioli

Hi Julien,

> On 23 Nov 2021, at 13:54, Julien Grall <julien@xen.org> wrote:
> 
> Hi Stefano,
> 
> On 19/11/2021 18:55, Stefano Stabellini wrote:
>> On Fri, 19 Nov 2021, Julien Grall wrote:
>>> I like better Juergen's version. But before agreeing on the command line , I
>>> would like to understand how Linux is dealing with big.LITTLE today (see my
>>> question above).
>> I also like Juergen's version better :-)
>> The device tree binding that covers big.LITTLE is the same that covers
>> cpus: Documentation/devicetree/bindings/arm/cpus.yaml
> 
> Are you sure? I found Documentation/devicetree/bindings/arm/cpu-capacity.txt.
> 
>> So basically, you can tell it is a big.LITTLE system because the
>> compatible strings differ between certain cpus, e.g.:
>>       cpu@0 {
>>         device_type = "cpu";
>>         compatible = "arm,cortex-a15";
>>         reg = <0x0>;
>>       };
>>       cpu@1 {
>>         device_type = "cpu";
>>         compatible = "arm,cortex-a15";
>>         reg = <0x1>;
>>       };
>>       cpu@100 {
>>         device_type = "cpu";
>>         compatible = "arm,cortex-a7";
>>         reg = <0x100>;
>>       };
>>       cpu@101 {
>>         device_type = "cpu";
>>         compatible = "arm,cortex-a7";
>>         reg = <0x101>;
>>       };
> 
> I have not found any code in Linux using the compatible. Instead, they all seem to use the cpu-map (see drivers/base/arch_topology.c).
> 
> Anyway, to me it would be better to parse the Device-Tree over using the MIDR. The advantage is we can cover a wide range of cases (you may have processor with the same MIDR but different frequency). For now, we could create a cpupool per cluster.

This is a really good idea as this could also work for NUMA systems.

So reg & ~0xff would give the cluster number ?

We will probably end up splitting cores into different cpupools even if they are all the same as there are several CPUs having several clusters but the same cores (I recall some NXP boards being like that).

Some device tree also have a cpu-map definition:
        cpu-map {
            cluster0 {
                core0 {
                    cpu = <&A57_0>;
                };
                core1 {
                    cpu = <&A57_1>;
                };
            };

            cluster1 {
                core0 {
                    cpu = <&A53_0>;
                };
                core1 {
                    cpu = <&A53_1>;
                };
                core2 {
                    cpu = <&A53_2>;
                };
                core3 {
                    cpu = <&A53_3>;
                };
            };
        };

@stefano: is the cpu-map something standard ? Lots of device trees do have it in Linux now but I do not recall seeing that on older device trees.

Maybe using cpu-map could be a solution, we could say that device tree without a cpu-map are not supported.

Cheers
Bertrand



> 
> Cheers,
> 
> -- 
> Julien Grall



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-23 14:45                   ` Bertrand Marquis
@ 2021-11-23 22:01                     ` Stefano Stabellini
  0 siblings, 0 replies; 27+ messages in thread
From: Stefano Stabellini @ 2021-11-23 22:01 UTC (permalink / raw)
  To: Bertrand Marquis
  Cc: Julien Grall, Stefano Stabellini, Oleksandr Tyshchenko,
	Luca Fancellu, Xen-devel, Wei Chen, Volodymyr Babchuk,
	George Dunlap, Juergen Gross, Dario Faggioli

On Tue, 23 Nov 2021, Bertrand Marquis wrote:
> Hi Julien,
> 
> > On 23 Nov 2021, at 13:54, Julien Grall <julien@xen.org> wrote:
> > 
> > Hi Stefano,
> > 
> > On 19/11/2021 18:55, Stefano Stabellini wrote:
> >> On Fri, 19 Nov 2021, Julien Grall wrote:
> >>> I like better Juergen's version. But before agreeing on the command line , I
> >>> would like to understand how Linux is dealing with big.LITTLE today (see my
> >>> question above).
> >> I also like Juergen's version better :-)
> >> The device tree binding that covers big.LITTLE is the same that covers
> >> cpus: Documentation/devicetree/bindings/arm/cpus.yaml
> > 
> > Are you sure? I found Documentation/devicetree/bindings/arm/cpu-capacity.txt.
> > 
> >> So basically, you can tell it is a big.LITTLE system because the
> >> compatible strings differ between certain cpus, e.g.:
> >>       cpu@0 {
> >>         device_type = "cpu";
> >>         compatible = "arm,cortex-a15";
> >>         reg = <0x0>;
> >>       };
> >>       cpu@1 {
> >>         device_type = "cpu";
> >>         compatible = "arm,cortex-a15";
> >>         reg = <0x1>;
> >>       };
> >>       cpu@100 {
> >>         device_type = "cpu";
> >>         compatible = "arm,cortex-a7";
> >>         reg = <0x100>;
> >>       };
> >>       cpu@101 {
> >>         device_type = "cpu";
> >>         compatible = "arm,cortex-a7";
> >>         reg = <0x101>;
> >>       };
> > 
> > I have not found any code in Linux using the compatible. Instead, they all seem to use the cpu-map (see drivers/base/arch_topology.c).
> > 
> > Anyway, to me it would be better to parse the Device-Tree over using the MIDR. The advantage is we can cover a wide range of cases (you may have processor with the same MIDR but different frequency). For now, we could create a cpupool per cluster.
> 
> This is a really good idea as this could also work for NUMA systems.
> 
> So reg & ~0xff would give the cluster number ?
> 
> We will probably end up splitting cores into different cpupools even if they are all the same as there are several CPUs having several clusters but the same cores (I recall some NXP boards being like that).
> 
> Some device tree also have a cpu-map definition:
>         cpu-map {
>             cluster0 {
>                 core0 {
>                     cpu = <&A57_0>;
>                 };
>                 core1 {
>                     cpu = <&A57_1>;
>                 };
>             };
> 
>             cluster1 {
>                 core0 {
>                     cpu = <&A53_0>;
>                 };
>                 core1 {
>                     cpu = <&A53_1>;
>                 };
>                 core2 {
>                     cpu = <&A53_2>;
>                 };
>                 core3 {
>                     cpu = <&A53_3>;
>                 };
>             };
>         };
> 
> @stefano: is the cpu-map something standard ? Lots of device trees do have it in Linux now but I do not recall seeing that on older device trees.
> Maybe using cpu-map could be a solution, we could say that device tree without a cpu-map are not supported.


cpu-map is newer than big.LITTLE support in Linux. See for instance
commit 4ab328f06. Before cpu-map was introduced, Linux used mostly the
MPIDR or sometimes the *machine* compatible string. I did find one
example of a board where the cpu compatible string is the same for both
big and LITTLE CPUs: arch/arm64/boot/dts/rockchip/rk3368.dtsi. That
could be the reason why the cpu compatible string is not used for
detecting big.LITTLE. Sorry about that, my earlier suggestion was wrong.

Yes I think using cpu-map would be fine. It seems to be widely used by
different vendors. (Even if something as generic as cpu-map should
really be under the devicetree.org spec, not under Linux, but anyway.)

Only one note: you mentioned NUMA. As you can see from
Documentation/devicetree/bindings/numa.txt, NUMA doesn't use cpu-map.
Instead, it uses numa-node-id and distance-map.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [RFC PATCH 0/2] Boot time cpupools
  2021-11-18  5:19             ` Juergen Gross
  2021-11-18 17:27               ` Stefano Stabellini
@ 2021-12-07  9:27               ` Luca Fancellu
  1 sibling, 0 replies; 27+ messages in thread
From: Luca Fancellu @ 2021-12-07  9:27 UTC (permalink / raw)
  To: Juergen Gross, Julien Grall, Stefano Stabellini, Oleksandr Tyshchenko
  Cc: Bertrand Marquis, Xen-devel, Wei Chen, Volodymyr Babchuk,
	George Dunlap, Dario Faggioli

Hi all,

Thank you for all your feedbacks, sorry for the late response. Given the amount of suggestions I’ve been working
on a proposal for the boot time cpupools that I hope could be good for everyone.

The feature will be enabled by CONFIG_BOOT_TIME_CPUPOOLS, so without it everything is behaving as now.

When the feature is enabled, the code will check the device tree and use informations from there to create the cpupools:

a72_1: cpu@0 {
        compatible = "arm,cortex-a72";
        reg = <0x0 0x0>;
        device_type = "cpu";
        [...]
};

a72_2: cpu@1 {
        compatible = "arm,cortex-a72";
        reg = <0x0 0x1>;
        device_type = "cpu";
        [...]
};

cpu@2 {
        compatible = "arm,cortex-a72";
        reg = <0x0 0x2>;
        device_type = "cpu";
        [...]
};

a53_1: cpu@100 {
        compatible = "arm,cortex-a53";
        reg = <0x0 0x100>;
        device_type = "cpu";
        [...]
};

a53_2: cpu@101 {
        compatible = "arm,cortex-a53";
        reg = <0x0 0x101>;
        device_type = "cpu";
        [...]
};

chosen {

    cpupool_a {
        compatible = "xen,cpupool";
        xen,cpupool-id = <0>;
        xen,cpupool-cpus = <&a72_1 &a72_2>;     
    };
    cpupool_b {
        compatible = "xen,cpupool";
        xen,cpupool-id = <1>;
        xen,cpupool-cpus = <&a53_1 &a53_2>;
        xen,cpupool-sched = "credit2";
    };
    
   […]

};

So for every node under chosen with the compatible “xen,cpupool”, a cpupool is created (if it doesn’t exists).

Mandatory properties of that node are: 
 - “xen,cpupool-id” which identifies the id of the pool
 - “xen,cpupool-cpus” which lists the handle of the cpus

Optional property is “xen,cpupool-sched” which is a string that identifies the scheduler. A cpupool with identifier
0 (zero) can’t have that property, it will get the default scheduler from Xen.

A set of rules are applied:

  1) The cpupool with id 0 is always created, being it listed or not in the DT
  2) The cpupool with id 0 must have at least one cpu, if it doesn’t the system will panic.
  3) Every cpu that is not assigned to any cpupool will be automatically assigned to the cpupool with id 0 
      (only cpu that are brought up by Xen)
  4) When a cpu is assigned to a cpupool in the DT, but the cpu is not up, the system will panic.

So, given this explanation, the above example will create a system with two cpupool:

 - cpupool with id 0 containing 3 cpu a72 (two are explicitly listed, one was not assigned to any other cpupool)
 - cpupool with id 1 containing 2 cpu a53 (cpus explicitly listed)

Clearly the above example works only if Xen is started using the hmp-unsafe=1 parameter, otherwise some cpus
won’t be started.


Given the above example, we might be able to have an option like this (“xen,domain-cpupool-id”) to assign
dom0less guests to cpupools:

chosen {

    cpupool_a {
        compatible = "xen,cpupool";
        xen,cpupool-id = <0>;
        xen,cpupool-cpus = <&a72_1 &a72_2>;     
    };
    cpupool_b {
        compatible = "xen,cpupool";
        xen,cpupool-id = <1>;
        xen,cpupool-cpus = <&a53_1 &a53_2>;
        xen,cpupool-sched = "credit2";
    };
    
    domU1 {
        #size-cells = <0x1>;
        #address-cells = <0x1>;
        compatible = "xen,domain";
        cpus = <0x1>;
        memory = <0x0 0xc0000>;
        xen,domain-cpupool-id = <1>;            /* Optional */
        vpl011;

        module@0 {
            compatible = "multiboot,kernel", "multiboot,module";
            […]
        };
    };

};


Any thoughts on this?

Cheers,
Luca




^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2021-12-07  9:28 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-17  9:57 [RFC PATCH 0/2] Boot time cpupools Luca Fancellu
2021-11-17  9:57 ` [RFC PATCH 1/2] xen/cpupool: Create different cpupools at boot time Luca Fancellu
2021-11-17 11:05   ` Juergen Gross
2021-11-18  3:01     ` Stefano Stabellini
2021-11-18  5:08       ` Juergen Gross
2021-11-18  2:59   ` Stefano Stabellini
2021-11-17  9:57 ` [RFC PATCH 2/2] tools/cpupools: Give a name to unnamed cpupools Luca Fancellu
2021-11-17 11:10   ` Juergen Gross
2021-11-17 11:52     ` Luca Fancellu
2021-11-17 12:06       ` Juergen Gross
2021-11-17 10:26 ` [RFC PATCH 0/2] Boot time cpupools Julien Grall
2021-11-17 10:41   ` Juergen Gross
2021-11-17 11:16   ` Bertrand Marquis
2021-11-17 11:21     ` Juergen Gross
2021-11-17 11:48     ` Julien Grall
2021-11-17 12:07       ` Bertrand Marquis
2021-11-17 19:10         ` Julien Grall
2021-11-18  2:19           ` Stefano Stabellini
2021-11-18  5:19             ` Juergen Gross
2021-11-18 17:27               ` Stefano Stabellini
2021-12-07  9:27               ` Luca Fancellu
2021-11-19 18:02             ` Julien Grall
2021-11-19 18:55               ` Stefano Stabellini
2021-11-23 13:54                 ` Julien Grall
2021-11-23 14:45                   ` Bertrand Marquis
2021-11-23 22:01                     ` Stefano Stabellini
2021-11-18 15:29           ` Oleksandr Tyshchenko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.