[PATCH v3 0/1] Introduce VCPUOP_reset_vcpu_info

* [PATCH v3 0/1] Introduce VCPUOP_reset_vcpu_info
@ 2014-08-19 10:04 Vitaly Kuznetsov
  2014-08-19 10:04 ` [PATCH v3 1/1] " Vitaly Kuznetsov
  2014-08-19 18:59 ` [PATCH v3 0/1] " David Vrabel
  0 siblings, 2 replies; 14+ messages in thread
From: Vitaly Kuznetsov @ 2014-08-19 10:04 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Jones, David Vrabel, Jan Beulich

The patch and guest code are based on the prototype by Konrad Rzeszutek Wilk.

VCPUOP_reset_vcpu_info is required to support kexec performed by smp pvhvm
guest. It was tested with the guest code listed below.

Note: current linux PVHVM does not perform VCPUOP_register_vcpu_info for VCPU0
so there is no need to perform VCPUOP_reset_vcpu_info. However, registering
vcpu_info for VCPU0 is supported on hypervisor side and it makes sense to have
similar support for reset operation. Guest code listed below contains a proof
of concept which does VCPUOP_register_vcpu_info for VCPU0 on startup and
VCPUOP_reset_vcpu_info on kexec. I don't think we want that part in linux
kernel upstream, however, other/future guests can use it.

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 4fd979e..ec5b152 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -171,7 +171,7 @@ static void clamp_max_cpus(void)
 #endif
 }
 
-static void xen_vcpu_setup(int cpu)
+void xen_vcpu_setup(int cpu)
 {
 	struct vcpu_register_vcpu_info info;
 	int err;
@@ -183,8 +183,6 @@ static void xen_vcpu_setup(int cpu)
 	 * This path is called twice on PVHVM - first during bootup via
 	 * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
 	 * hotplugged: cpu_up -> xen_hvm_cpu_notify.
-	 * As we can only do the VCPUOP_register_vcpu_info once lets
-	 * not over-write its result.
 	 *
 	 * For PV it is called during restore (xen_vcpu_restore) and bootup
 	 * (xen_setup_vcpu_info_placement). The hotplug mechanism does not
@@ -210,10 +208,6 @@ static void xen_vcpu_setup(int cpu)
 	/* Check to see if the hypervisor will put the vcpu_info
 	   structure where we want it, which allows direct access via
 	   a percpu-variable.
-	   N.B. This hypercall can _only_ be called once per CPU. Subsequent
-	   calls will error out with -EINVAL. This is due to the fact that
-	   hypervisor has no unregister variant and this hypercall does not
-	   allow to over-write info.mfn and info.offset.
 	 */
 	err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
 
@@ -228,6 +222,22 @@ static void xen_vcpu_setup(int cpu)
 	}
 }
 
+void xen_teardown_vcpu_setup(int cpu)
+{
+	int err;
+
+	if (!have_vcpu_info_placement)
+		return;
+
+	err = HYPERVISOR_vcpu_op(VCPUOP_reset_vcpu_info, cpu, NULL);
+	if (err) {
+		xen_raw_printk("%s: VCPUOP_reset_vcpu_info rc: %d\n", __func__, err);
+		return;
+	}
+	if (cpu < MAX_VIRT_CPUS)
+		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+}
+
 /*
  * On restore, set the vcpu placement up again.
  * If it fails, then we're in a bad state, since
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index bc5e897..b05f91f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -769,10 +769,27 @@ static void xen_hvm_cpu_die(unsigned int cpu)
 #ifdef CONFIG_KEXEC
 void xen_kexec_shutdown(void)
 {
+	int cpu;
+	cpumask_var_t cpu_offline_mask;
+
 	if (!kexec_in_progress)
 		return;
 
+	gnttab_suspend();
+
+	/* Stop all CPUs except for the first one */
+	disable_nonboot_cpus();
+
 	xen_hvm_reset_eventchannels();
+
+	cpumask_andnot(cpu_offline_mask, cpu_present_mask,
+		       cpu_online_mask);
+
+	for_each_cpu(cpu, cpu_offline_mask)
+		xen_teardown_vcpu_setup(cpu);
+
+	for_each_cpu(cpu, cpu_online_mask)
+		xen_teardown_vcpu_setup(cpu);
 }
 #endif
 
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d083e82..36dd380 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -53,6 +53,7 @@ void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);
 void xen_setup_runstate_info(int cpu);
 void xen_teardown_timer(int cpu);
+void xen_teardown_vcpu_setup(int cpu);
 cycle_t xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
 void __init xen_init_time_ops(void);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index f12fbca..3165ab3 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1750,6 +1750,8 @@ void xen_hvm_reset_eventchannels(void)
 	}
 }
 
+void xen_vcpu_setup(int cpu);
+
 void __init xen_init_IRQ(void)
 {
 	int ret = -EINVAL;
@@ -1759,6 +1761,9 @@ void __init xen_init_IRQ(void)
 	if (ret < 0)
 		xen_evtchn_2l_init();
 
+	if (!xen_pv_domain())
+		xen_vcpu_setup(0);
+
 	evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
 				sizeof(*evtchn_to_irq), GFP_KERNEL);
 	BUG_ON(!evtchn_to_irq);
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index b05288c..b65ca42 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -172,4 +172,10 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
 
 /* Send an NMI to the specified VCPU. @extra_arg == NULL. */
 #define VCPUOP_send_nmi             11
+
+/*
+ * Reset all of the vcpu_info information from their previous location to the
+ * default one used at bootup.
+ */
+#define VCPUOP_reset_vcpu_info      14
 #endif /* __XEN_PUBLIC_VCPU_H__ */

Vitaly Kuznetsov (1):
  Introduce VCPUOP_reset_vcpu_info

 xen/arch/x86/hvm/hvm.c    |  1 +
 xen/common/domain.c       | 73 +++++++++++++++++++++++++++++++++++++++++------
 xen/include/public/vcpu.h | 19 ++++++++++++
 3 files changed, 85 insertions(+), 8 deletions(-)

-- 
1.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread