All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
@ 2015-07-10 18:37 Konrad Rzeszutek Wilk
  2015-07-10 18:57 ` Konrad Rzeszutek Wilk
  2015-07-10 18:57 ` Konrad Rzeszutek Wilk
  0 siblings, 2 replies; 12+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-07-10 18:37 UTC (permalink / raw)
  To: david.vrabel, boris.ostrovsky, xen-devel, linux-kernel
  Cc: Konrad Rzeszutek Wilk

When Xen migrates an HVM guest, by default its shared_info can
only hold up to 32 CPUs. As such the hypercall
VCPUOP_register_vcpu_info was introduced which allowed us to
setup per-page areas for VCPUs. This means we can boot PVHVM
guest with more than 32 VCPUs. During migration the per-cpu
structure is allocated freshly by the hypervisor (vcpu_info_mfn
is set to INVALID_MFN) so that the newly migrated guest
can make an VCPUOP_register_vcpu_info hypercall.

Unfortunatly we end up triggering this condition in Xen:
/* Run this command on yourself or on other offline VCPUS. */
 if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )

which means we are unable to setup the per-cpu VCPU structures
for running vCPUS. The Linux PV code paths make this work by
iterating over every vCPU with:

 1) is target CPU up (VCPUOP_is_up hypercall?)
 2) if yes, then VCPUOP_down to pause it.
 3) VCPUOP_register_vcpu_info
 4) if it was down, then VCPUOP_up to bring it back up

But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
not allowed on HVM guests we can't do this. However with the
Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
we can do this. As such first check if VCPUOP_is_up is actually
possible before trying this dance.

As most of this dance code is done already in 'xen_setup_vcpu'
lets make it callable on both PV and HVM. This means moving one
of the checks out to 'xen_setup_runstate_info'.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
 arch/x86/xen/suspend.c   |  7 +------
 arch/x86/xen/time.c      |  3 +++
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 46957ea..187dec6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
 void xen_vcpu_restore(void)
 {
 	int cpu;
+	bool vcpuops = true;
+	const struct cpumask *mask;
 
-	for_each_possible_cpu(cpu) {
+	mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
+
+	/* Only Xen 4.5 and higher supports this. */
+	if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == -ENOSYS)
+		vcpuops = false;
+
+	for_each_cpu(cpu, mask) {
 		bool other_cpu = (cpu != smp_processor_id());
-		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+		bool is_up = false;
 
-		if (other_cpu && is_up &&
+		if (vcpuops)
+			is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+
+		if (vcpuops && other_cpu && is_up &&
 		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
 			BUG();
 
@@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
 		if (have_vcpu_info_placement)
 			xen_vcpu_setup(cpu);
 
-		if (other_cpu && is_up &&
+		if (vcpuops && other_cpu && is_up &&
 		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
 			BUG();
 	}
@@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
 	 * in that case multiple vcpus might be online. */
 	for_each_online_cpu(cpu) {
 		/* Leave it to be NULL. */
-		if (cpu >= MAX_VIRT_CPUS)
-			continue;
+		if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
+			per_cpu(xen_vcpu, cpu) = NULL; /* Triggers xen_vcpu_setup.*/
 		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 	}
 }
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 53b4c08..cd66397 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -31,15 +31,10 @@ static void xen_pv_pre_suspend(void)
 static void xen_hvm_post_suspend(int suspend_cancelled)
 {
 #ifdef CONFIG_XEN_PVHVM
-	int cpu;
 	xen_hvm_init_shared_info();
 	xen_callback_vector();
 	xen_unplug_emulated_devices();
-	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
-		for_each_online_cpu(cpu) {
-			xen_setup_runstate_info(cpu);
-		}
-	}
+	xen_vcpu_restore();
 #endif
 }
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 55da33b..6882d0c 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -105,6 +105,9 @@ void xen_setup_runstate_info(int cpu)
 {
 	struct vcpu_register_runstate_memory_area area;
 
+	if (xen_hvm_domain() && !(xen_feature(XENFEAT_hvm_safe_pvclock)))
+		return;
+
 	area.addr.v = &per_cpu(xen_runstate, cpu);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2015-07-10 18:37 [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3) Konrad Rzeszutek Wilk
@ 2015-07-10 18:57 ` Konrad Rzeszutek Wilk
  2015-11-12 16:40   ` Ian Campbell
                     ` (2 more replies)
  2015-07-10 18:57 ` Konrad Rzeszutek Wilk
  1 sibling, 3 replies; 12+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-07-10 18:57 UTC (permalink / raw)
  To: david.vrabel, boris.ostrovsky, xen-devel, linux-kernel

On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk wrote:
> When Xen migrates an HVM guest, by default its shared_info can
> only hold up to 32 CPUs. As such the hypercall
> VCPUOP_register_vcpu_info was introduced which allowed us to
> setup per-page areas for VCPUs. This means we can boot PVHVM
> guest with more than 32 VCPUs. During migration the per-cpu
> structure is allocated freshly by the hypervisor (vcpu_info_mfn
> is set to INVALID_MFN) so that the newly migrated guest
> can make an VCPUOP_register_vcpu_info hypercall.
> 
> Unfortunatly we end up triggering this condition in Xen:
> /* Run this command on yourself or on other offline VCPUS. */
>  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
> 
> which means we are unable to setup the per-cpu VCPU structures
> for running vCPUS. The Linux PV code paths make this work by
> iterating over every vCPU with:
> 
>  1) is target CPU up (VCPUOP_is_up hypercall?)
>  2) if yes, then VCPUOP_down to pause it.
>  3) VCPUOP_register_vcpu_info
>  4) if it was down, then VCPUOP_up to bring it back up
> 
> But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
> not allowed on HVM guests we can't do this. However with the
> Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
> ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")

<sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
looking at patches and spotted this - and thought it was already in!

Sorry about this patch - and please ignore it until the VCPU_op*
can be used by HVM guests.

> we can do this. As such first check if VCPUOP_is_up is actually
> possible before trying this dance.
> 
> As most of this dance code is done already in 'xen_setup_vcpu'
> lets make it callable on both PV and HVM. This means moving one
> of the checks out to 'xen_setup_runstate_info'.
> 
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
>  arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
>  arch/x86/xen/suspend.c   |  7 +------
>  arch/x86/xen/time.c      |  3 +++
>  3 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 46957ea..187dec6 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
>  void xen_vcpu_restore(void)
>  {
>  	int cpu;
> +	bool vcpuops = true;
> +	const struct cpumask *mask;
>  
> -	for_each_possible_cpu(cpu) {
> +	mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
> +
> +	/* Only Xen 4.5 and higher supports this. */
> +	if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == -ENOSYS)
> +		vcpuops = false;
> +
> +	for_each_cpu(cpu, mask) {
>  		bool other_cpu = (cpu != smp_processor_id());
> -		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
> +		bool is_up = false;
>  
> -		if (other_cpu && is_up &&
> +		if (vcpuops)
> +			is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
> +
> +		if (vcpuops && other_cpu && is_up &&
>  		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
>  			BUG();
>  
> @@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
>  		if (have_vcpu_info_placement)
>  			xen_vcpu_setup(cpu);
>  
> -		if (other_cpu && is_up &&
> +		if (vcpuops && other_cpu && is_up &&
>  		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
>  			BUG();
>  	}
> @@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
>  	 * in that case multiple vcpus might be online. */
>  	for_each_online_cpu(cpu) {
>  		/* Leave it to be NULL. */
> -		if (cpu >= MAX_VIRT_CPUS)
> -			continue;
> +		if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
> +			per_cpu(xen_vcpu, cpu) = NULL; /* Triggers xen_vcpu_setup.*/
>  		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
>  	}
>  }
> diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
> index 53b4c08..cd66397 100644
> --- a/arch/x86/xen/suspend.c
> +++ b/arch/x86/xen/suspend.c
> @@ -31,15 +31,10 @@ static void xen_pv_pre_suspend(void)
>  static void xen_hvm_post_suspend(int suspend_cancelled)
>  {
>  #ifdef CONFIG_XEN_PVHVM
> -	int cpu;
>  	xen_hvm_init_shared_info();
>  	xen_callback_vector();
>  	xen_unplug_emulated_devices();
> -	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
> -		for_each_online_cpu(cpu) {
> -			xen_setup_runstate_info(cpu);
> -		}
> -	}
> +	xen_vcpu_restore();
>  #endif
>  }
>  
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index 55da33b..6882d0c 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -105,6 +105,9 @@ void xen_setup_runstate_info(int cpu)
>  {
>  	struct vcpu_register_runstate_memory_area area;
>  
> +	if (xen_hvm_domain() && !(xen_feature(XENFEAT_hvm_safe_pvclock)))
> +		return;
> +
>  	area.addr.v = &per_cpu(xen_runstate, cpu);
>  
>  	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> -- 
> 2.1.0
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2015-07-10 18:37 [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3) Konrad Rzeszutek Wilk
  2015-07-10 18:57 ` Konrad Rzeszutek Wilk
@ 2015-07-10 18:57 ` Konrad Rzeszutek Wilk
  1 sibling, 0 replies; 12+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-07-10 18:57 UTC (permalink / raw)
  To: david.vrabel, boris.ostrovsky, xen-devel, linux-kernel

On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk wrote:
> When Xen migrates an HVM guest, by default its shared_info can
> only hold up to 32 CPUs. As such the hypercall
> VCPUOP_register_vcpu_info was introduced which allowed us to
> setup per-page areas for VCPUs. This means we can boot PVHVM
> guest with more than 32 VCPUs. During migration the per-cpu
> structure is allocated freshly by the hypervisor (vcpu_info_mfn
> is set to INVALID_MFN) so that the newly migrated guest
> can make an VCPUOP_register_vcpu_info hypercall.
> 
> Unfortunatly we end up triggering this condition in Xen:
> /* Run this command on yourself or on other offline VCPUS. */
>  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
> 
> which means we are unable to setup the per-cpu VCPU structures
> for running vCPUS. The Linux PV code paths make this work by
> iterating over every vCPU with:
> 
>  1) is target CPU up (VCPUOP_is_up hypercall?)
>  2) if yes, then VCPUOP_down to pause it.
>  3) VCPUOP_register_vcpu_info
>  4) if it was down, then VCPUOP_up to bring it back up
> 
> But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
> not allowed on HVM guests we can't do this. However with the
> Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
> ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")

<sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
looking at patches and spotted this - and thought it was already in!

Sorry about this patch - and please ignore it until the VCPU_op*
can be used by HVM guests.

> we can do this. As such first check if VCPUOP_is_up is actually
> possible before trying this dance.
> 
> As most of this dance code is done already in 'xen_setup_vcpu'
> lets make it callable on both PV and HVM. This means moving one
> of the checks out to 'xen_setup_runstate_info'.
> 
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
>  arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
>  arch/x86/xen/suspend.c   |  7 +------
>  arch/x86/xen/time.c      |  3 +++
>  3 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 46957ea..187dec6 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
>  void xen_vcpu_restore(void)
>  {
>  	int cpu;
> +	bool vcpuops = true;
> +	const struct cpumask *mask;
>  
> -	for_each_possible_cpu(cpu) {
> +	mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
> +
> +	/* Only Xen 4.5 and higher supports this. */
> +	if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == -ENOSYS)
> +		vcpuops = false;
> +
> +	for_each_cpu(cpu, mask) {
>  		bool other_cpu = (cpu != smp_processor_id());
> -		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
> +		bool is_up = false;
>  
> -		if (other_cpu && is_up &&
> +		if (vcpuops)
> +			is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
> +
> +		if (vcpuops && other_cpu && is_up &&
>  		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
>  			BUG();
>  
> @@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
>  		if (have_vcpu_info_placement)
>  			xen_vcpu_setup(cpu);
>  
> -		if (other_cpu && is_up &&
> +		if (vcpuops && other_cpu && is_up &&
>  		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
>  			BUG();
>  	}
> @@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
>  	 * in that case multiple vcpus might be online. */
>  	for_each_online_cpu(cpu) {
>  		/* Leave it to be NULL. */
> -		if (cpu >= MAX_VIRT_CPUS)
> -			continue;
> +		if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
> +			per_cpu(xen_vcpu, cpu) = NULL; /* Triggers xen_vcpu_setup.*/
>  		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
>  	}
>  }
> diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
> index 53b4c08..cd66397 100644
> --- a/arch/x86/xen/suspend.c
> +++ b/arch/x86/xen/suspend.c
> @@ -31,15 +31,10 @@ static void xen_pv_pre_suspend(void)
>  static void xen_hvm_post_suspend(int suspend_cancelled)
>  {
>  #ifdef CONFIG_XEN_PVHVM
> -	int cpu;
>  	xen_hvm_init_shared_info();
>  	xen_callback_vector();
>  	xen_unplug_emulated_devices();
> -	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
> -		for_each_online_cpu(cpu) {
> -			xen_setup_runstate_info(cpu);
> -		}
> -	}
> +	xen_vcpu_restore();
>  #endif
>  }
>  
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index 55da33b..6882d0c 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -105,6 +105,9 @@ void xen_setup_runstate_info(int cpu)
>  {
>  	struct vcpu_register_runstate_memory_area area;
>  
> +	if (xen_hvm_domain() && !(xen_feature(XENFEAT_hvm_safe_pvclock)))
> +		return;
> +
>  	area.addr.v = &per_cpu(xen_runstate, cpu);
>  
>  	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> -- 
> 2.1.0
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2015-07-10 18:57 ` Konrad Rzeszutek Wilk
@ 2015-11-12 16:40   ` Ian Campbell
  2015-11-13 14:42     ` Konrad Rzeszutek Wilk
  2016-07-21 14:14   ` Konrad Rzeszutek Wilk
  2016-07-21 14:14   ` Konrad Rzeszutek Wilk
  2 siblings, 1 reply; 12+ messages in thread
From: Ian Campbell @ 2015-11-12 16:40 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, david.vrabel, boris.ostrovsky, xen-devel

On Fri, 2015-07-10 at 14:57 -0400, Konrad Rzeszutek Wilk wrote:
> On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk wrote:
> > When Xen migrates an HVM guest, by default its shared_info can
> > only hold up to 32 CPUs. As such the hypercall
> > VCPUOP_register_vcpu_info was introduced which allowed us to
> > setup per-page areas for VCPUs. This means we can boot PVHVM
> > guest with more than 32 VCPUs. During migration the per-cpu
> > structure is allocated freshly by the hypervisor (vcpu_info_mfn
> > is set to INVALID_MFN) so that the newly migrated guest
> > can make an VCPUOP_register_vcpu_info hypercall.
> > 
> > Unfortunatly we end up triggering this condition in Xen:
> > /* Run this command on yourself or on other offline VCPUS. */
> >  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
> > 
> > which means we are unable to setup the per-cpu VCPU structures
> > for running vCPUS. The Linux PV code paths make this work by
> > iterating over every vCPU with:
> > 
> >  1) is target CPU up (VCPUOP_is_up hypercall?)
> >  2) if yes, then VCPUOP_down to pause it.
> >  3) VCPUOP_register_vcpu_info
> >  4) if it was down, then VCPUOP_up to bring it back up
> > 
> > But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
> > not allowed on HVM guests we can't do this. However with the
> > Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
> > ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
> 
> <sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
> looking at patches and spotted this - and thought it was already in!
> 
> Sorry about this patch - and please ignore it until the VCPU_op*
> can be used by HVM guests.

FYI I just tripped over this while implementing ARM save/restore (in that I
couldn't figure out HTF HVM VCPUs > MAX_LEGACY_VCPUS were getting their
vcpu_info re-registered, which turns out to be because they aren't...).

ARM also lack the VCPU_up/down/is_up hypercalls. My plan there is simply to
use on_each_cpu to do it, I can get away with this on ARM because the
necessary infra (IPIs etc) are provided by the h/w virt platform (i.e. look
native) so there is no reliance on Xen infra being fully up.

Not sure if that is also true of x86/PVHVM but thought I would mention it
in case it seemed preferable to you.

Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2015-11-12 16:40   ` Ian Campbell
@ 2015-11-13 14:42     ` Konrad Rzeszutek Wilk
  2015-11-13 14:49       ` Ian Campbell
  0 siblings, 1 reply; 12+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-11-13 14:42 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, boris.ostrovsky, david.vrabel

On Thu, Nov 12, 2015 at 04:40:06PM +0000, Ian Campbell wrote:
> On Fri, 2015-07-10 at 14:57 -0400, Konrad Rzeszutek Wilk wrote:
> > On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk wrote:
> > > When Xen migrates an HVM guest, by default its shared_info can
> > > only hold up to 32 CPUs. As such the hypercall
> > > VCPUOP_register_vcpu_info was introduced which allowed us to
> > > setup per-page areas for VCPUs. This means we can boot PVHVM
> > > guest with more than 32 VCPUs. During migration the per-cpu
> > > structure is allocated freshly by the hypervisor (vcpu_info_mfn
> > > is set to INVALID_MFN) so that the newly migrated guest
> > > can make an VCPUOP_register_vcpu_info hypercall.
> > > 
> > > Unfortunatly we end up triggering this condition in Xen:
> > > /* Run this command on yourself or on other offline VCPUS. */
> > >  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
> > > 
> > > which means we are unable to setup the per-cpu VCPU structures
> > > for running vCPUS. The Linux PV code paths make this work by
> > > iterating over every vCPU with:
> > > 
> > >  1) is target CPU up (VCPUOP_is_up hypercall?)
> > >  2) if yes, then VCPUOP_down to pause it.
> > >  3) VCPUOP_register_vcpu_info
> > >  4) if it was down, then VCPUOP_up to bring it back up
> > > 
> > > But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
> > > not allowed on HVM guests we can't do this. However with the
> > > Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
> > > ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
> > 
> > <sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
> > looking at patches and spotted this - and thought it was already in!
> > 
> > Sorry about this patch - and please ignore it until the VCPU_op*
> > can be used by HVM guests.
> 
> FYI I just tripped over this while implementing ARM save/restore (in that I
> couldn't figure out HTF HVM VCPUs > MAX_LEGACY_VCPUS were getting their
> vcpu_info re-registered, which turns out to be because they aren't...).
> 
> ARM also lack the VCPU_up/down/is_up hypercalls. My plan there is simply to
> use on_each_cpu to do it, I can get away with this on ARM because the
> necessary infra (IPIs etc) are provided by the h/w virt platform (i.e. look
> native) so there is no reliance on Xen infra being fully up.
> 
> Not sure if that is also true of x86/PVHVM but thought I would mention it
> in case it seemed preferable to you.

Yes, but we have a hard-limit of 32 CPUs on 'HVM' guests and on the
shared_info structure. Hence to go above that we need to use the VCPU_op calls.

> 
> Ian.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2015-11-13 14:42     ` Konrad Rzeszutek Wilk
@ 2015-11-13 14:49       ` Ian Campbell
  2015-12-01 21:53         ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 12+ messages in thread
From: Ian Campbell @ 2015-11-13 14:49 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: xen-devel, boris.ostrovsky, david.vrabel

On Fri, 2015-11-13 at 09:42 -0500, Konrad Rzeszutek Wilk wrote:
> On Thu, Nov 12, 2015 at 04:40:06PM +0000, Ian Campbell wrote:
> > On Fri, 2015-07-10 at 14:57 -0400, Konrad Rzeszutek Wilk wrote:
> > > On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk
> > > wrote:
> > > > When Xen migrates an HVM guest, by default its shared_info can
> > > > only hold up to 32 CPUs. As such the hypercall
> > > > VCPUOP_register_vcpu_info was introduced which allowed us to
> > > > setup per-page areas for VCPUs. This means we can boot PVHVM
> > > > guest with more than 32 VCPUs. During migration the per-cpu
> > > > structure is allocated freshly by the hypervisor (vcpu_info_mfn
> > > > is set to INVALID_MFN) so that the newly migrated guest
> > > > can make an VCPUOP_register_vcpu_info hypercall.
> > > > 
> > > > Unfortunatly we end up triggering this condition in Xen:
> > > > /* Run this command on yourself or on other offline VCPUS. */
> > > >  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
> > > > 
> > > > which means we are unable to setup the per-cpu VCPU structures
> > > > for running vCPUS. The Linux PV code paths make this work by
> > > > iterating over every vCPU with:
> > > > 
> > > >  1) is target CPU up (VCPUOP_is_up hypercall?)
> > > >  2) if yes, then VCPUOP_down to pause it.
> > > >  3) VCPUOP_register_vcpu_info
> > > >  4) if it was down, then VCPUOP_up to bring it back up
> > > > 
> > > > But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
> > > > not allowed on HVM guests we can't do this. However with the
> > > > Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
> > > > ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
> > > 
> > > <sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
> > > looking at patches and spotted this - and thought it was already in!
> > > 
> > > Sorry about this patch - and please ignore it until the VCPU_op*
> > > can be used by HVM guests.
> > 
> > FYI I just tripped over this while implementing ARM save/restore (in
> > that I
> > couldn't figure out HTF HVM VCPUs > MAX_LEGACY_VCPUS were getting their
> > vcpu_info re-registered, which turns out to be because they aren't...).
> > 
> > ARM also lack the VCPU_up/down/is_up hypercalls. My plan there is
> > simply to
> > use on_each_cpu to do it, I can get away with this on ARM because the
> > necessary infra (IPIs etc) are provided by the h/w virt platform (i.e.
> > look
> > native) so there is no reliance on Xen infra being fully up.
> > 
> > Not sure if that is also true of x86/PVHVM but thought I would mention
> > it
> > in case it seemed preferable to you.
> 
> Yes, but we have a hard-limit of 32 CPUs on 'HVM' guests and on the
> shared_info structure. Hence to go above that we need to use the VCPU_op
> calls.

I think you mean s/HVM/Linux PVHVM/? Because HVM_MAX_VCPUS in the
hypervisor is 128.

Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2015-11-13 14:49       ` Ian Campbell
@ 2015-12-01 21:53         ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 12+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-12-01 21:53 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, boris.ostrovsky, david.vrabel

On Fri, Nov 13, 2015 at 02:49:22PM +0000, Ian Campbell wrote:
> On Fri, 2015-11-13 at 09:42 -0500, Konrad Rzeszutek Wilk wrote:
> > On Thu, Nov 12, 2015 at 04:40:06PM +0000, Ian Campbell wrote:
> > > On Fri, 2015-07-10 at 14:57 -0400, Konrad Rzeszutek Wilk wrote:
> > > > On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk
> > > > wrote:
> > > > > When Xen migrates an HVM guest, by default its shared_info can
> > > > > only hold up to 32 CPUs. As such the hypercall
> > > > > VCPUOP_register_vcpu_info was introduced which allowed us to
> > > > > setup per-page areas for VCPUs. This means we can boot PVHVM
> > > > > guest with more than 32 VCPUs. During migration the per-cpu
> > > > > structure is allocated freshly by the hypervisor (vcpu_info_mfn
> > > > > is set to INVALID_MFN) so that the newly migrated guest
> > > > > can make an VCPUOP_register_vcpu_info hypercall.
> > > > > 
> > > > > Unfortunatly we end up triggering this condition in Xen:
> > > > > /* Run this command on yourself or on other offline VCPUS. */
> > > > >  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
> > > > > 
> > > > > which means we are unable to setup the per-cpu VCPU structures
> > > > > for running vCPUS. The Linux PV code paths make this work by
> > > > > iterating over every vCPU with:
> > > > > 
> > > > >  1) is target CPU up (VCPUOP_is_up hypercall?)
> > > > >  2) if yes, then VCPUOP_down to pause it.
> > > > >  3) VCPUOP_register_vcpu_info
> > > > >  4) if it was down, then VCPUOP_up to bring it back up
> > > > > 
> > > > > But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
> > > > > not allowed on HVM guests we can't do this. However with the
> > > > > Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
> > > > > ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
> > > > 
> > > > <sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
> > > > looking at patches and spotted this - and thought it was already in!
> > > > 
> > > > Sorry about this patch - and please ignore it until the VCPU_op*
> > > > can be used by HVM guests.
> > > 
> > > FYI I just tripped over this while implementing ARM save/restore (in
> > > that I
> > > couldn't figure out HTF HVM VCPUs > MAX_LEGACY_VCPUS were getting their
> > > vcpu_info re-registered, which turns out to be because they aren't...).
> > > 
> > > ARM also lack the VCPU_up/down/is_up hypercalls. My plan there is
> > > simply to
> > > use on_each_cpu to do it, I can get away with this on ARM because the
> > > necessary infra (IPIs etc) are provided by the h/w virt platform (i.e.
> > > look
> > > native) so there is no reliance on Xen infra being fully up.
> > > 
> > > Not sure if that is also true of x86/PVHVM but thought I would mention
> > > it
> > > in case it seemed preferable to you.
> > 
> > Yes, but we have a hard-limit of 32 CPUs on 'HVM' guests and on the
> > shared_info structure. Hence to go above that we need to use the VCPU_op
> > calls.
> 
> I think you mean s/HVM/Linux PVHVM/? Because HVM_MAX_VCPUS in the
> hypervisor is 128.

Thank you. I confused the CPU support with event channels support.

The issue was the shared page and the events - the shared
page by default only has enough slots for 32 CPUs. Anything above that
and we need to use the fancy VCPUOP hypercalls. And that is exactly
what we end up doing during bootup. But if we do save/restore we get to
the issue that only one hypercall is allowed on HVM: VCPUOP_register_vcpu_info

And that hypercall can only be called on for vCPUS which are offline
or if we (the VCPU) is calling it. However the restore path brings
every CPU from CPU0. We could send an IPI to the other CPU such that
it would call VCPUOP_register_vcpu_info.. but IPIs are using events and
the events are not working past 32CPUs unless you call VCPUOP_register_vcpu_info.

> 
> Ian.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2015-07-10 18:57 ` Konrad Rzeszutek Wilk
  2015-11-12 16:40   ` Ian Campbell
  2016-07-21 14:14   ` Konrad Rzeszutek Wilk
@ 2016-07-21 14:14   ` Konrad Rzeszutek Wilk
  2016-07-21 15:05     ` Boris Ostrovsky
  2016-07-21 15:05     ` Boris Ostrovsky
  2 siblings, 2 replies; 12+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-07-21 14:14 UTC (permalink / raw)
  To: david.vrabel, boris.ostrovsky, xen-devel, linux-kernel

On Fri, Jul 10, 2015 at 02:57:51PM -0400, Konrad Rzeszutek Wilk wrote:
> On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk wrote:
> > When Xen migrates an HVM guest, by default its shared_info can
> > only hold up to 32 CPUs. As such the hypercall
> > VCPUOP_register_vcpu_info was introduced which allowed us to
> > setup per-page areas for VCPUs. This means we can boot PVHVM
> > guest with more than 32 VCPUs. During migration the per-cpu
> > structure is allocated freshly by the hypervisor (vcpu_info_mfn
> > is set to INVALID_MFN) so that the newly migrated guest
> > can make an VCPUOP_register_vcpu_info hypercall.
> > 
> > Unfortunatly we end up triggering this condition in Xen:
> > /* Run this command on yourself or on other offline VCPUS. */
> >  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
> > 
> > which means we are unable to setup the per-cpu VCPU structures
> > for running vCPUS. The Linux PV code paths make this work by
> > iterating over every vCPU with:
> > 
> >  1) is target CPU up (VCPUOP_is_up hypercall?)
> >  2) if yes, then VCPUOP_down to pause it.
> >  3) VCPUOP_register_vcpu_info
> >  4) if it was down, then VCPUOP_up to bring it back up
> > 
> > But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
> > not allowed on HVM guests we can't do this. However with the
> > Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
> > ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
> 
> <sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
> looking at patches and spotted this - and thought it was already in!
> 
> Sorry about this patch - and please ignore it until the VCPU_op*
> can be used by HVM guests.

The corresponding patch is in Xen tree (192df6f9122ddebc21d0a632c10da3453aeee1c2)

Could folks take a look at the patch pls?

Without it you can't migrate an Linux guest with more than 32 vCPUs.

> 
> > we can do this. As such first check if VCPUOP_is_up is actually
> > possible before trying this dance.
> > 
> > As most of this dance code is done already in 'xen_setup_vcpu'
> > lets make it callable on both PV and HVM. This means moving one
> > of the checks out to 'xen_setup_runstate_info'.
> > 
> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > ---
> >  arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
> >  arch/x86/xen/suspend.c   |  7 +------
> >  arch/x86/xen/time.c      |  3 +++
> >  3 files changed, 21 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> > index 46957ea..187dec6 100644
> > --- a/arch/x86/xen/enlighten.c
> > +++ b/arch/x86/xen/enlighten.c
> > @@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
> >  void xen_vcpu_restore(void)
> >  {
> >  	int cpu;
> > +	bool vcpuops = true;
> > +	const struct cpumask *mask;
> >  
> > -	for_each_possible_cpu(cpu) {
> > +	mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
> > +
> > +	/* Only Xen 4.5 and higher supports this. */
> > +	if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == -ENOSYS)
> > +		vcpuops = false;
> > +
> > +	for_each_cpu(cpu, mask) {
> >  		bool other_cpu = (cpu != smp_processor_id());
> > -		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
> > +		bool is_up = false;
> >  
> > -		if (other_cpu && is_up &&
> > +		if (vcpuops)
> > +			is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
> > +
> > +		if (vcpuops && other_cpu && is_up &&
> >  		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
> >  			BUG();
> >  
> > @@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
> >  		if (have_vcpu_info_placement)
> >  			xen_vcpu_setup(cpu);
> >  
> > -		if (other_cpu && is_up &&
> > +		if (vcpuops && other_cpu && is_up &&
> >  		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
> >  			BUG();
> >  	}
> > @@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
> >  	 * in that case multiple vcpus might be online. */
> >  	for_each_online_cpu(cpu) {
> >  		/* Leave it to be NULL. */
> > -		if (cpu >= MAX_VIRT_CPUS)
> > -			continue;
> > +		if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
> > +			per_cpu(xen_vcpu, cpu) = NULL; /* Triggers xen_vcpu_setup.*/
> >  		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
> >  	}
> >  }
> > diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
> > index 53b4c08..cd66397 100644
> > --- a/arch/x86/xen/suspend.c
> > +++ b/arch/x86/xen/suspend.c
> > @@ -31,15 +31,10 @@ static void xen_pv_pre_suspend(void)
> >  static void xen_hvm_post_suspend(int suspend_cancelled)
> >  {
> >  #ifdef CONFIG_XEN_PVHVM
> > -	int cpu;
> >  	xen_hvm_init_shared_info();
> >  	xen_callback_vector();
> >  	xen_unplug_emulated_devices();
> > -	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
> > -		for_each_online_cpu(cpu) {
> > -			xen_setup_runstate_info(cpu);
> > -		}
> > -	}
> > +	xen_vcpu_restore();
> >  #endif
> >  }
> >  
> > diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> > index 55da33b..6882d0c 100644
> > --- a/arch/x86/xen/time.c
> > +++ b/arch/x86/xen/time.c
> > @@ -105,6 +105,9 @@ void xen_setup_runstate_info(int cpu)
> >  {
> >  	struct vcpu_register_runstate_memory_area area;
> >  
> > +	if (xen_hvm_domain() && !(xen_feature(XENFEAT_hvm_safe_pvclock)))
> > +		return;
> > +
> >  	area.addr.v = &per_cpu(xen_runstate, cpu);
> >  
> >  	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > -- 
> > 2.1.0
> > 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2015-07-10 18:57 ` Konrad Rzeszutek Wilk
  2015-11-12 16:40   ` Ian Campbell
@ 2016-07-21 14:14   ` Konrad Rzeszutek Wilk
  2016-07-21 14:14   ` Konrad Rzeszutek Wilk
  2 siblings, 0 replies; 12+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-07-21 14:14 UTC (permalink / raw)
  To: david.vrabel, boris.ostrovsky, xen-devel, linux-kernel

On Fri, Jul 10, 2015 at 02:57:51PM -0400, Konrad Rzeszutek Wilk wrote:
> On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk wrote:
> > When Xen migrates an HVM guest, by default its shared_info can
> > only hold up to 32 CPUs. As such the hypercall
> > VCPUOP_register_vcpu_info was introduced which allowed us to
> > setup per-page areas for VCPUs. This means we can boot PVHVM
> > guest with more than 32 VCPUs. During migration the per-cpu
> > structure is allocated freshly by the hypervisor (vcpu_info_mfn
> > is set to INVALID_MFN) so that the newly migrated guest
> > can make an VCPUOP_register_vcpu_info hypercall.
> > 
> > Unfortunatly we end up triggering this condition in Xen:
> > /* Run this command on yourself or on other offline VCPUS. */
> >  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
> > 
> > which means we are unable to setup the per-cpu VCPU structures
> > for running vCPUS. The Linux PV code paths make this work by
> > iterating over every vCPU with:
> > 
> >  1) is target CPU up (VCPUOP_is_up hypercall?)
> >  2) if yes, then VCPUOP_down to pause it.
> >  3) VCPUOP_register_vcpu_info
> >  4) if it was down, then VCPUOP_up to bring it back up
> > 
> > But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
> > not allowed on HVM guests we can't do this. However with the
> > Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
> > ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
> 
> <sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
> looking at patches and spotted this - and thought it was already in!
> 
> Sorry about this patch - and please ignore it until the VCPU_op*
> can be used by HVM guests.

The corresponding patch is in Xen tree (192df6f9122ddebc21d0a632c10da3453aeee1c2)

Could folks take a look at the patch pls?

Without it you can't migrate an Linux guest with more than 32 vCPUs.

> 
> > we can do this. As such first check if VCPUOP_is_up is actually
> > possible before trying this dance.
> > 
> > As most of this dance code is done already in 'xen_setup_vcpu'
> > lets make it callable on both PV and HVM. This means moving one
> > of the checks out to 'xen_setup_runstate_info'.
> > 
> > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > ---
> >  arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
> >  arch/x86/xen/suspend.c   |  7 +------
> >  arch/x86/xen/time.c      |  3 +++
> >  3 files changed, 21 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> > index 46957ea..187dec6 100644
> > --- a/arch/x86/xen/enlighten.c
> > +++ b/arch/x86/xen/enlighten.c
> > @@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
> >  void xen_vcpu_restore(void)
> >  {
> >  	int cpu;
> > +	bool vcpuops = true;
> > +	const struct cpumask *mask;
> >  
> > -	for_each_possible_cpu(cpu) {
> > +	mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
> > +
> > +	/* Only Xen 4.5 and higher supports this. */
> > +	if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == -ENOSYS)
> > +		vcpuops = false;
> > +
> > +	for_each_cpu(cpu, mask) {
> >  		bool other_cpu = (cpu != smp_processor_id());
> > -		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
> > +		bool is_up = false;
> >  
> > -		if (other_cpu && is_up &&
> > +		if (vcpuops)
> > +			is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
> > +
> > +		if (vcpuops && other_cpu && is_up &&
> >  		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
> >  			BUG();
> >  
> > @@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
> >  		if (have_vcpu_info_placement)
> >  			xen_vcpu_setup(cpu);
> >  
> > -		if (other_cpu && is_up &&
> > +		if (vcpuops && other_cpu && is_up &&
> >  		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
> >  			BUG();
> >  	}
> > @@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
> >  	 * in that case multiple vcpus might be online. */
> >  	for_each_online_cpu(cpu) {
> >  		/* Leave it to be NULL. */
> > -		if (cpu >= MAX_VIRT_CPUS)
> > -			continue;
> > +		if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
> > +			per_cpu(xen_vcpu, cpu) = NULL; /* Triggers xen_vcpu_setup.*/
> >  		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
> >  	}
> >  }
> > diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
> > index 53b4c08..cd66397 100644
> > --- a/arch/x86/xen/suspend.c
> > +++ b/arch/x86/xen/suspend.c
> > @@ -31,15 +31,10 @@ static void xen_pv_pre_suspend(void)
> >  static void xen_hvm_post_suspend(int suspend_cancelled)
> >  {
> >  #ifdef CONFIG_XEN_PVHVM
> > -	int cpu;
> >  	xen_hvm_init_shared_info();
> >  	xen_callback_vector();
> >  	xen_unplug_emulated_devices();
> > -	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
> > -		for_each_online_cpu(cpu) {
> > -			xen_setup_runstate_info(cpu);
> > -		}
> > -	}
> > +	xen_vcpu_restore();
> >  #endif
> >  }
> >  
> > diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> > index 55da33b..6882d0c 100644
> > --- a/arch/x86/xen/time.c
> > +++ b/arch/x86/xen/time.c
> > @@ -105,6 +105,9 @@ void xen_setup_runstate_info(int cpu)
> >  {
> >  	struct vcpu_register_runstate_memory_area area;
> >  
> > +	if (xen_hvm_domain() && !(xen_feature(XENFEAT_hvm_safe_pvclock)))
> > +		return;
> > +
> >  	area.addr.v = &per_cpu(xen_runstate, cpu);
> >  
> >  	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
> > -- 
> > 2.1.0
> > 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2016-07-21 14:14   ` Konrad Rzeszutek Wilk
  2016-07-21 15:05     ` Boris Ostrovsky
@ 2016-07-21 15:05     ` Boris Ostrovsky
  1 sibling, 0 replies; 12+ messages in thread
From: Boris Ostrovsky @ 2016-07-21 15:05 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, david.vrabel, xen-devel, linux-kernel

On 07/21/2016 10:14 AM, Konrad Rzeszutek Wilk wrote:
> On Fri, Jul 10, 2015 at 02:57:51PM -0400, Konrad Rzeszutek Wilk wrote:
>> On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk wrote:
>>> When Xen migrates an HVM guest, by default its shared_info can
>>> only hold up to 32 CPUs. As such the hypercall
>>> VCPUOP_register_vcpu_info was introduced which allowed us to
>>> setup per-page areas for VCPUs. This means we can boot PVHVM
>>> guest with more than 32 VCPUs. During migration the per-cpu
>>> structure is allocated freshly by the hypervisor (vcpu_info_mfn
>>> is set to INVALID_MFN) so that the newly migrated guest
>>> can make an VCPUOP_register_vcpu_info hypercall.
>>>
>>> Unfortunatly we end up triggering this condition in Xen:
>>> /* Run this command on yourself or on other offline VCPUS. */
>>>  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
>>>
>>> which means we are unable to setup the per-cpu VCPU structures
>>> for running vCPUS. The Linux PV code paths make this work by
>>> iterating over every vCPU with:
>>>
>>>  1) is target CPU up (VCPUOP_is_up hypercall?)
>>>  2) if yes, then VCPUOP_down to pause it.
>>>  3) VCPUOP_register_vcpu_info
>>>  4) if it was down, then VCPUOP_up to bring it back up
>>>
>>> But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
>>> not allowed on HVM guests we can't do this. However with the
>>> Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
>>> ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
>> <sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
>> looking at patches and spotted this - and thought it was already in!
>>
>> Sorry about this patch - and please ignore it until the VCPU_op*
>> can be used by HVM guests.
> The corresponding patch is in Xen tree (192df6f9122ddebc21d0a632c10da3453aeee1c2)
>
> Could folks take a look at the patch pls?
>
> Without it you can't migrate an Linux guest with more than 32 vCPUs.
>
>>> we can do this. As such first check if VCPUOP_is_up is actually
>>> possible before trying this dance.
>>>
>>> As most of this dance code is done already in 'xen_setup_vcpu'
>>> lets make it callable on both PV and HVM. This means moving one
>>> of the checks out to 'xen_setup_runstate_info'.
>>>
>>> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>>> ---
>>>  arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
>>>  arch/x86/xen/suspend.c   |  7 +------
>>>  arch/x86/xen/time.c      |  3 +++
>>>  3 files changed, 21 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>>> index 46957ea..187dec6 100644
>>> --- a/arch/x86/xen/enlighten.c
>>> +++ b/arch/x86/xen/enlighten.c
>>> @@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
>>>  void xen_vcpu_restore(void)
>>>  {
>>>  	int cpu;
>>> +	bool vcpuops = true;
>>> +	const struct cpumask *mask;
>>>  
>>> -	for_each_possible_cpu(cpu) {
>>> +	mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
>>> +
>>> +	/* Only Xen 4.5 and higher supports this. */
>>> +	if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == -ENOSYS)
>>> +		vcpuops = false;
>>> +
>>> +	for_each_cpu(cpu, mask) {
>>>  		bool other_cpu = (cpu != smp_processor_id());
>>> -		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
>>> +		bool is_up = false;
>>>  
>>> -		if (other_cpu && is_up &&
>>> +		if (vcpuops)
>>> +			is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);


You can just say
   is_up = (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL) > 0);

and then you won't need vcpuops bool.




>>> +
>>> +		if (vcpuops && other_cpu && is_up &&
>>>  		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
>>>  			BUG();
>>>  
>>> @@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
>>>  		if (have_vcpu_info_placement)
>>>  			xen_vcpu_setup(cpu);
>>>  
>>> -		if (other_cpu && is_up &&
>>> +		if (vcpuops && other_cpu && is_up &&
>>>  		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
>>>  			BUG();
>>>  	}
>>> @@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
>>>  	 * in that case multiple vcpus might be online. */
>>>  	for_each_online_cpu(cpu) {
>>>  		/* Leave it to be NULL. */
>>> -		if (cpu >= MAX_VIRT_CPUS)
>>> -			continue;
>>> +		if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
>>> +			per_cpu(xen_vcpu, cpu) = NULL; /* Triggers xen_vcpu_setup.*/
>>>  		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];

I don't think I understand this change.

Can you have cpu > NR_CPUS? And isn't per_cpu(xen_vcpu, cpu) NULL
already (as the comment at the top suggests)?


-boris

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
  2016-07-21 14:14   ` Konrad Rzeszutek Wilk
@ 2016-07-21 15:05     ` Boris Ostrovsky
  2016-07-21 15:05     ` Boris Ostrovsky
  1 sibling, 0 replies; 12+ messages in thread
From: Boris Ostrovsky @ 2016-07-21 15:05 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, david.vrabel, xen-devel, linux-kernel

On 07/21/2016 10:14 AM, Konrad Rzeszutek Wilk wrote:
> On Fri, Jul 10, 2015 at 02:57:51PM -0400, Konrad Rzeszutek Wilk wrote:
>> On Fri, Jul 10, 2015 at 02:37:46PM -0400, Konrad Rzeszutek Wilk wrote:
>>> When Xen migrates an HVM guest, by default its shared_info can
>>> only hold up to 32 CPUs. As such the hypercall
>>> VCPUOP_register_vcpu_info was introduced which allowed us to
>>> setup per-page areas for VCPUs. This means we can boot PVHVM
>>> guest with more than 32 VCPUs. During migration the per-cpu
>>> structure is allocated freshly by the hypervisor (vcpu_info_mfn
>>> is set to INVALID_MFN) so that the newly migrated guest
>>> can make an VCPUOP_register_vcpu_info hypercall.
>>>
>>> Unfortunatly we end up triggering this condition in Xen:
>>> /* Run this command on yourself or on other offline VCPUS. */
>>>  if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )
>>>
>>> which means we are unable to setup the per-cpu VCPU structures
>>> for running vCPUS. The Linux PV code paths make this work by
>>> iterating over every vCPU with:
>>>
>>>  1) is target CPU up (VCPUOP_is_up hypercall?)
>>>  2) if yes, then VCPUOP_down to pause it.
>>>  3) VCPUOP_register_vcpu_info
>>>  4) if it was down, then VCPUOP_up to bring it back up
>>>
>>> But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
>>> not allowed on HVM guests we can't do this. However with the
>>> Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
>>> ("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
>> <sigh> I was in my local tree which was Roger's 'hvm_without_dm_v3'
>> looking at patches and spotted this - and thought it was already in!
>>
>> Sorry about this patch - and please ignore it until the VCPU_op*
>> can be used by HVM guests.
> The corresponding patch is in Xen tree (192df6f9122ddebc21d0a632c10da3453aeee1c2)
>
> Could folks take a look at the patch pls?
>
> Without it you can't migrate an Linux guest with more than 32 vCPUs.
>
>>> we can do this. As such first check if VCPUOP_is_up is actually
>>> possible before trying this dance.
>>>
>>> As most of this dance code is done already in 'xen_setup_vcpu'
>>> lets make it callable on both PV and HVM. This means moving one
>>> of the checks out to 'xen_setup_runstate_info'.
>>>
>>> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>>> ---
>>>  arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
>>>  arch/x86/xen/suspend.c   |  7 +------
>>>  arch/x86/xen/time.c      |  3 +++
>>>  3 files changed, 21 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>>> index 46957ea..187dec6 100644
>>> --- a/arch/x86/xen/enlighten.c
>>> +++ b/arch/x86/xen/enlighten.c
>>> @@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
>>>  void xen_vcpu_restore(void)
>>>  {
>>>  	int cpu;
>>> +	bool vcpuops = true;
>>> +	const struct cpumask *mask;
>>>  
>>> -	for_each_possible_cpu(cpu) {
>>> +	mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
>>> +
>>> +	/* Only Xen 4.5 and higher supports this. */
>>> +	if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == -ENOSYS)
>>> +		vcpuops = false;
>>> +
>>> +	for_each_cpu(cpu, mask) {
>>>  		bool other_cpu = (cpu != smp_processor_id());
>>> -		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
>>> +		bool is_up = false;
>>>  
>>> -		if (other_cpu && is_up &&
>>> +		if (vcpuops)
>>> +			is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);


You can just say
   is_up = (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL) > 0);

and then you won't need vcpuops bool.




>>> +
>>> +		if (vcpuops && other_cpu && is_up &&
>>>  		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
>>>  			BUG();
>>>  
>>> @@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
>>>  		if (have_vcpu_info_placement)
>>>  			xen_vcpu_setup(cpu);
>>>  
>>> -		if (other_cpu && is_up &&
>>> +		if (vcpuops && other_cpu && is_up &&
>>>  		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
>>>  			BUG();
>>>  	}
>>> @@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
>>>  	 * in that case multiple vcpus might be online. */
>>>  	for_each_online_cpu(cpu) {
>>>  		/* Leave it to be NULL. */
>>> -		if (cpu >= MAX_VIRT_CPUS)
>>> -			continue;
>>> +		if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
>>> +			per_cpu(xen_vcpu, cpu) = NULL; /* Triggers xen_vcpu_setup.*/
>>>  		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];

I don't think I understand this change.

Can you have cpu > NR_CPUS? And isn't per_cpu(xen_vcpu, cpu) NULL
already (as the comment at the top suggests)?


-boris


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3).
@ 2015-07-10 18:37 Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 12+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-07-10 18:37 UTC (permalink / raw)
  To: david.vrabel, boris.ostrovsky, xen-devel, linux-kernel
  Cc: Konrad Rzeszutek Wilk

When Xen migrates an HVM guest, by default its shared_info can
only hold up to 32 CPUs. As such the hypercall
VCPUOP_register_vcpu_info was introduced which allowed us to
setup per-page areas for VCPUs. This means we can boot PVHVM
guest with more than 32 VCPUs. During migration the per-cpu
structure is allocated freshly by the hypervisor (vcpu_info_mfn
is set to INVALID_MFN) so that the newly migrated guest
can make an VCPUOP_register_vcpu_info hypercall.

Unfortunatly we end up triggering this condition in Xen:
/* Run this command on yourself or on other offline VCPUS. */
 if ( (v != current) && !test_bit(_VPF_down, &v->pause_flags) )

which means we are unable to setup the per-cpu VCPU structures
for running vCPUS. The Linux PV code paths make this work by
iterating over every vCPU with:

 1) is target CPU up (VCPUOP_is_up hypercall?)
 2) if yes, then VCPUOP_down to pause it.
 3) VCPUOP_register_vcpu_info
 4) if it was down, then VCPUOP_up to bring it back up

But since VCPUOP_down, VCPUOP_is_up, and VCPUOP_up are
not allowed on HVM guests we can't do this. However with the
Xen git commit f80c5623a126afc31e6bb9382268d579f0324a7a
("xen/x86: allow HVM guests to use hypercalls to bring up vCPUs"")
we can do this. As such first check if VCPUOP_is_up is actually
possible before trying this dance.

As most of this dance code is done already in 'xen_setup_vcpu'
lets make it callable on both PV and HVM. This means moving one
of the checks out to 'xen_setup_runstate_info'.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 23 +++++++++++++++++------
 arch/x86/xen/suspend.c   |  7 +------
 arch/x86/xen/time.c      |  3 +++
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 46957ea..187dec6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -238,12 +238,23 @@ static void xen_vcpu_setup(int cpu)
 void xen_vcpu_restore(void)
 {
 	int cpu;
+	bool vcpuops = true;
+	const struct cpumask *mask;
 
-	for_each_possible_cpu(cpu) {
+	mask = xen_pv_domain() ? cpu_possible_mask : cpu_online_mask;
+
+	/* Only Xen 4.5 and higher supports this. */
+	if (HYPERVISOR_vcpu_op(VCPUOP_is_up, smp_processor_id(), NULL) == -ENOSYS)
+		vcpuops = false;
+
+	for_each_cpu(cpu, mask) {
 		bool other_cpu = (cpu != smp_processor_id());
-		bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+		bool is_up = false;
 
-		if (other_cpu && is_up &&
+		if (vcpuops)
+			is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+
+		if (vcpuops && other_cpu && is_up &&
 		    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
 			BUG();
 
@@ -252,7 +263,7 @@ void xen_vcpu_restore(void)
 		if (have_vcpu_info_placement)
 			xen_vcpu_setup(cpu);
 
-		if (other_cpu && is_up &&
+		if (vcpuops && other_cpu && is_up &&
 		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
 			BUG();
 	}
@@ -1704,8 +1715,8 @@ void __ref xen_hvm_init_shared_info(void)
 	 * in that case multiple vcpus might be online. */
 	for_each_online_cpu(cpu) {
 		/* Leave it to be NULL. */
-		if (cpu >= MAX_VIRT_CPUS)
-			continue;
+		if (cpu >= MAX_VIRT_CPUS && cpu <= NR_CPUS)
+			per_cpu(xen_vcpu, cpu) = NULL; /* Triggers xen_vcpu_setup.*/
 		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 	}
 }
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 53b4c08..cd66397 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -31,15 +31,10 @@ static void xen_pv_pre_suspend(void)
 static void xen_hvm_post_suspend(int suspend_cancelled)
 {
 #ifdef CONFIG_XEN_PVHVM
-	int cpu;
 	xen_hvm_init_shared_info();
 	xen_callback_vector();
 	xen_unplug_emulated_devices();
-	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
-		for_each_online_cpu(cpu) {
-			xen_setup_runstate_info(cpu);
-		}
-	}
+	xen_vcpu_restore();
 #endif
 }
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 55da33b..6882d0c 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -105,6 +105,9 @@ void xen_setup_runstate_info(int cpu)
 {
 	struct vcpu_register_runstate_memory_area area;
 
+	if (xen_hvm_domain() && !(xen_feature(XENFEAT_hvm_safe_pvclock)))
+		return;
+
 	area.addr.v = &per_cpu(xen_runstate, cpu);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-07-21 15:06 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-10 18:37 [PATCH] xen/pvhvm: Support more than 32 VCPUs when migrating (v3) Konrad Rzeszutek Wilk
2015-07-10 18:57 ` Konrad Rzeszutek Wilk
2015-11-12 16:40   ` Ian Campbell
2015-11-13 14:42     ` Konrad Rzeszutek Wilk
2015-11-13 14:49       ` Ian Campbell
2015-12-01 21:53         ` Konrad Rzeszutek Wilk
2016-07-21 14:14   ` Konrad Rzeszutek Wilk
2016-07-21 14:14   ` Konrad Rzeszutek Wilk
2016-07-21 15:05     ` Boris Ostrovsky
2016-07-21 15:05     ` Boris Ostrovsky
2015-07-10 18:57 ` Konrad Rzeszutek Wilk
  -- strict thread matches above, loose matches on Subject: below --
2015-07-10 18:37 Konrad Rzeszutek Wilk

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.