All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
@ 2017-11-28 19:34 ` Maran Wilson
  0 siblings, 0 replies; 41+ messages in thread
From: Maran Wilson @ 2017-11-28 19:34 UTC (permalink / raw)
  To: boris.ostrovsky, jgross, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, roger.pau, rkrcmar, JBeulich, andrew.cooper3,
	pbonzini, kvm

For certain applications it is desirable to rapidly boot a KVM virtual
machine. In cases where legacy hardware and software support within the
guest is not needed, Qemu should be able to boot directly into the
uncompressed Linux kernel binary without the need to run firmware.

There already exists an ABI to allow this for Xen PVH guests and the ABI is
supported by Linux and FreeBSD:

   https://xenbits.xen.org/docs/unstable/misc/hvmlite.html

This PoC patch enables Qemu to use that same entry point for booting KVM
guests.

Even though the code is still PoC quality, I'm sending this as an RFC now
since there are a number of different ways the specific implementation
details can be handled. I chose a shared code path for Xen and KVM guests
but could just as easily create a separate code path that is advertised by
a different ELF note for KVM. There also seems to be some flexibility in
how the e820 table data is passed and how (or if) it should be identified
as e820 data. As a starting point, I've chosen the options that seem to
result in the smallest patch with minimal to no changes required of the
x86/HVM direct boot ABI.
---
 arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
 1 file changed, 55 insertions(+), 19 deletions(-)

diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 98ab176..d93f711 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
 		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
 }
 
-static void __init init_pvh_bootparams(void)
+static void __init init_pvh_bootparams(bool xen_guest)
 {
 	struct xen_memory_map memmap;
 	int rc;
 
 	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
 
-	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
-	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
-	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
-	if (rc) {
-		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
-		BUG();
+	if (xen_guest) {
+		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
+		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
+		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+		if (rc) {
+			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
+			BUG();
+		}
+		pvh_bootparams.e820_entries = memmap.nr_entries;
+	} else if (pvh_start_info.nr_modules > 1) {
+		/* The second module should be the e820 data for KVM guests */
+		struct hvm_modlist_entry *modaddr;
+		char e820_sig[] = "e820 data";
+		struct boot_e820_entry *ep;
+		struct e820_table *tp;
+		char *cmdline_str;
+		int idx;
+
+		modaddr = __va(pvh_start_info.modlist_paddr +
+			       sizeof(struct hvm_modlist_entry));
+		cmdline_str = __va(modaddr->cmdline_paddr);
+
+		if ((modaddr->cmdline_paddr) &&
+		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
+			tp = __va(modaddr->paddr);
+			ep = (struct boot_e820_entry *)tp->entries;
+
+			pvh_bootparams.e820_entries = tp->nr_entries;
+
+			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
+				pvh_bootparams.e820_table[idx] = *ep;
+		}
 	}
-	pvh_bootparams.e820_entries = memmap.nr_entries;
 
 	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
 		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
@@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
 		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
 			E820_TYPE_RESERVED;
 		pvh_bootparams.e820_entries++;
-	} else
+	} else if (xen_guest) {
 		xen_raw_printk("Warning: Can fit ISA range into e820\n");
+	}
 
 	pvh_bootparams.hdr.cmd_line_ptr =
 		pvh_start_info.cmdline_paddr;
@@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
 	 * environment (i.e. hardware_subarch 0).
 	 */
 	pvh_bootparams.hdr.version = 0x212;
-	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
+	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
 }
 
 /*
@@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
  */
 void __init xen_prepare_pvh(void)
 {
-	u32 msr;
+
+	u32 msr = xen_cpuid_base();
 	u64 pfn;
+	bool xen_guest = msr ? true : false;
 
 	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
-		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
-				pvh_start_info.magic);
+		if (xen_guest)
+			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
+					pvh_start_info.magic);
 		BUG();
 	}
 
-	xen_pvh = 1;
+	if (xen_guest) {
+		xen_pvh = 1;
+
+		msr = cpuid_ebx(msr + 2);
+		pfn = __pa(hypercall_page);
+		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
+		BUG();
+	}
 
-	msr = cpuid_ebx(xen_cpuid_base() + 2);
-	pfn = __pa(hypercall_page);
-	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+	init_pvh_bootparams(xen_guest);
 
-	init_pvh_bootparams();
+	if (xen_guest)
+		x86_init.oem.arch_setup = xen_pvh_arch_setup;
 
-	x86_init.oem.arch_setup = xen_pvh_arch_setup;
 }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
@ 2017-11-28 19:34 ` Maran Wilson
  0 siblings, 0 replies; 41+ messages in thread
From: Maran Wilson @ 2017-11-28 19:34 UTC (permalink / raw)
  To: boris.ostrovsky, jgross, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, roger.pau, rkrcmar, JBeulich, andrew.cooper3,
	pbonzini, kvm

For certain applications it is desirable to rapidly boot a KVM virtual
machine. In cases where legacy hardware and software support within the
guest is not needed, Qemu should be able to boot directly into the
uncompressed Linux kernel binary without the need to run firmware.

There already exists an ABI to allow this for Xen PVH guests and the ABI is
supported by Linux and FreeBSD:

   https://xenbits.xen.org/docs/unstable/misc/hvmlite.html

This PoC patch enables Qemu to use that same entry point for booting KVM
guests.

Even though the code is still PoC quality, I'm sending this as an RFC now
since there are a number of different ways the specific implementation
details can be handled. I chose a shared code path for Xen and KVM guests
but could just as easily create a separate code path that is advertised by
a different ELF note for KVM. There also seems to be some flexibility in
how the e820 table data is passed and how (or if) it should be identified
as e820 data. As a starting point, I've chosen the options that seem to
result in the smallest patch with minimal to no changes required of the
x86/HVM direct boot ABI.
---
 arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
 1 file changed, 55 insertions(+), 19 deletions(-)

diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 98ab176..d93f711 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
 		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
 }
 
-static void __init init_pvh_bootparams(void)
+static void __init init_pvh_bootparams(bool xen_guest)
 {
 	struct xen_memory_map memmap;
 	int rc;
 
 	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
 
-	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
-	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
-	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
-	if (rc) {
-		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
-		BUG();
+	if (xen_guest) {
+		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
+		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
+		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+		if (rc) {
+			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
+			BUG();
+		}
+		pvh_bootparams.e820_entries = memmap.nr_entries;
+	} else if (pvh_start_info.nr_modules > 1) {
+		/* The second module should be the e820 data for KVM guests */
+		struct hvm_modlist_entry *modaddr;
+		char e820_sig[] = "e820 data";
+		struct boot_e820_entry *ep;
+		struct e820_table *tp;
+		char *cmdline_str;
+		int idx;
+
+		modaddr = __va(pvh_start_info.modlist_paddr +
+			       sizeof(struct hvm_modlist_entry));
+		cmdline_str = __va(modaddr->cmdline_paddr);
+
+		if ((modaddr->cmdline_paddr) &&
+		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
+			tp = __va(modaddr->paddr);
+			ep = (struct boot_e820_entry *)tp->entries;
+
+			pvh_bootparams.e820_entries = tp->nr_entries;
+
+			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
+				pvh_bootparams.e820_table[idx] = *ep;
+		}
 	}
-	pvh_bootparams.e820_entries = memmap.nr_entries;
 
 	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
 		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
@@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
 		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
 			E820_TYPE_RESERVED;
 		pvh_bootparams.e820_entries++;
-	} else
+	} else if (xen_guest) {
 		xen_raw_printk("Warning: Can fit ISA range into e820\n");
+	}
 
 	pvh_bootparams.hdr.cmd_line_ptr =
 		pvh_start_info.cmdline_paddr;
@@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
 	 * environment (i.e. hardware_subarch 0).
 	 */
 	pvh_bootparams.hdr.version = 0x212;
-	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
+	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
 }
 
 /*
@@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
  */
 void __init xen_prepare_pvh(void)
 {
-	u32 msr;
+
+	u32 msr = xen_cpuid_base();
 	u64 pfn;
+	bool xen_guest = msr ? true : false;
 
 	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
-		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
-				pvh_start_info.magic);
+		if (xen_guest)
+			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
+					pvh_start_info.magic);
 		BUG();
 	}
 
-	xen_pvh = 1;
+	if (xen_guest) {
+		xen_pvh = 1;
+
+		msr = cpuid_ebx(msr + 2);
+		pfn = __pa(hypercall_page);
+		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
+		BUG();
+	}
 
-	msr = cpuid_ebx(xen_cpuid_base() + 2);
-	pfn = __pa(hypercall_page);
-	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+	init_pvh_bootparams(xen_guest);
 
-	init_pvh_bootparams();
+	if (xen_guest)
+		x86_init.oem.arch_setup = xen_pvh_arch_setup;
 
-	x86_init.oem.arch_setup = xen_pvh_arch_setup;
 }
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-28 19:34 ` Maran Wilson
  (?)
@ 2017-11-28 19:41   ` Andrew Cooper
  -1 siblings, 0 replies; 41+ messages in thread
From: Andrew Cooper @ 2017-11-28 19:41 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich, pbonzini,
	kvm

On 28/11/17 19:34, Maran Wilson wrote:
> For certain applications it is desirable to rapidly boot a KVM virtual
> machine. In cases where legacy hardware and software support within the
> guest is not needed, Qemu should be able to boot directly into the
> uncompressed Linux kernel binary without the need to run firmware.
>
> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> supported by Linux and FreeBSD:
>
>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html

Just FYI, this link has recently become stale, following some cleanup. 
The document is now:

https://xenbits.xen.org/docs/unstable/misc/pvh.html

~Andrew

>
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.
>
> Even though the code is still PoC quality, I'm sending this as an RFC now
> since there are a number of different ways the specific implementation
> details can be handled. I chose a shared code path for Xen and KVM guests
> but could just as easily create a separate code path that is advertised by
> a different ELF note for KVM. There also seems to be some flexibility in
> how the e820 table data is passed and how (or if) it should be identified
> as e820 data. As a starting point, I've chosen the options that seem to
> result in the smallest patch with minimal to no changes required of the
> x86/HVM direct boot ABI.
> ---
>  arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
>  1 file changed, 55 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> index 98ab176..d93f711 100644
> --- a/arch/x86/xen/enlighten_pvh.c
> +++ b/arch/x86/xen/enlighten_pvh.c
> @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
>  		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
>  }
>  
> -static void __init init_pvh_bootparams(void)
> +static void __init init_pvh_bootparams(bool xen_guest)
>  {
>  	struct xen_memory_map memmap;
>  	int rc;
>  
>  	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
>  
> -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> -	if (rc) {
> -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> -		BUG();
> +	if (xen_guest) {
> +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> +		if (rc) {
> +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> +			BUG();
> +		}
> +		pvh_bootparams.e820_entries = memmap.nr_entries;
> +	} else if (pvh_start_info.nr_modules > 1) {
> +		/* The second module should be the e820 data for KVM guests */
> +		struct hvm_modlist_entry *modaddr;
> +		char e820_sig[] = "e820 data";
> +		struct boot_e820_entry *ep;
> +		struct e820_table *tp;
> +		char *cmdline_str;
> +		int idx;
> +
> +		modaddr = __va(pvh_start_info.modlist_paddr +
> +			       sizeof(struct hvm_modlist_entry));
> +		cmdline_str = __va(modaddr->cmdline_paddr);
> +
> +		if ((modaddr->cmdline_paddr) &&
> +		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
> +			tp = __va(modaddr->paddr);
> +			ep = (struct boot_e820_entry *)tp->entries;
> +
> +			pvh_bootparams.e820_entries = tp->nr_entries;
> +
> +			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
> +				pvh_bootparams.e820_table[idx] = *ep;
> +		}
>  	}
> -	pvh_bootparams.e820_entries = memmap.nr_entries;
>  
>  	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
> @@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
>  			E820_TYPE_RESERVED;
>  		pvh_bootparams.e820_entries++;
> -	} else
> +	} else if (xen_guest) {
>  		xen_raw_printk("Warning: Can fit ISA range into e820\n");
> +	}
>  
>  	pvh_bootparams.hdr.cmd_line_ptr =
>  		pvh_start_info.cmdline_paddr;
> @@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
>  	 * environment (i.e. hardware_subarch 0).
>  	 */
>  	pvh_bootparams.hdr.version = 0x212;
> -	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
> +	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
>  }
>  
>  /*
> @@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
>   */
>  void __init xen_prepare_pvh(void)
>  {
> -	u32 msr;
> +
> +	u32 msr = xen_cpuid_base();
>  	u64 pfn;
> +	bool xen_guest = msr ? true : false;
>  
>  	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
> -		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> -				pvh_start_info.magic);
> +		if (xen_guest)
> +			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> +					pvh_start_info.magic);
>  		BUG();
>  	}
>  
> -	xen_pvh = 1;
> +	if (xen_guest) {
> +		xen_pvh = 1;
> +
> +		msr = cpuid_ebx(msr + 2);
> +		pfn = __pa(hypercall_page);
> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +
> +	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
> +		BUG();
> +	}
>  
> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
> -	pfn = __pa(hypercall_page);
> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +	init_pvh_bootparams(xen_guest);
>  
> -	init_pvh_bootparams();
> +	if (xen_guest)
> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  
> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  }

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
@ 2017-11-28 19:41   ` Andrew Cooper
  0 siblings, 0 replies; 41+ messages in thread
From: Andrew Cooper @ 2017-11-28 19:41 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich, pbonzini,
	kvm

On 28/11/17 19:34, Maran Wilson wrote:
> For certain applications it is desirable to rapidly boot a KVM virtual
> machine. In cases where legacy hardware and software support within the
> guest is not needed, Qemu should be able to boot directly into the
> uncompressed Linux kernel binary without the need to run firmware.
>
> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> supported by Linux and FreeBSD:
>
>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html

Just FYI, this link has recently become stale, following some cleanup. 
The document is now:

https://xenbits.xen.org/docs/unstable/misc/pvh.html

~Andrew

>
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.
>
> Even though the code is still PoC quality, I'm sending this as an RFC now
> since there are a number of different ways the specific implementation
> details can be handled. I chose a shared code path for Xen and KVM guests
> but could just as easily create a separate code path that is advertised by
> a different ELF note for KVM. There also seems to be some flexibility in
> how the e820 table data is passed and how (or if) it should be identified
> as e820 data. As a starting point, I've chosen the options that seem to
> result in the smallest patch with minimal to no changes required of the
> x86/HVM direct boot ABI.
> ---
>  arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
>  1 file changed, 55 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> index 98ab176..d93f711 100644
> --- a/arch/x86/xen/enlighten_pvh.c
> +++ b/arch/x86/xen/enlighten_pvh.c
> @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
>  		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
>  }
>  
> -static void __init init_pvh_bootparams(void)
> +static void __init init_pvh_bootparams(bool xen_guest)
>  {
>  	struct xen_memory_map memmap;
>  	int rc;
>  
>  	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
>  
> -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> -	if (rc) {
> -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> -		BUG();
> +	if (xen_guest) {
> +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> +		if (rc) {
> +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> +			BUG();
> +		}
> +		pvh_bootparams.e820_entries = memmap.nr_entries;
> +	} else if (pvh_start_info.nr_modules > 1) {
> +		/* The second module should be the e820 data for KVM guests */
> +		struct hvm_modlist_entry *modaddr;
> +		char e820_sig[] = "e820 data";
> +		struct boot_e820_entry *ep;
> +		struct e820_table *tp;
> +		char *cmdline_str;
> +		int idx;
> +
> +		modaddr = __va(pvh_start_info.modlist_paddr +
> +			       sizeof(struct hvm_modlist_entry));
> +		cmdline_str = __va(modaddr->cmdline_paddr);
> +
> +		if ((modaddr->cmdline_paddr) &&
> +		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
> +			tp = __va(modaddr->paddr);
> +			ep = (struct boot_e820_entry *)tp->entries;
> +
> +			pvh_bootparams.e820_entries = tp->nr_entries;
> +
> +			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
> +				pvh_bootparams.e820_table[idx] = *ep;
> +		}
>  	}
> -	pvh_bootparams.e820_entries = memmap.nr_entries;
>  
>  	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
> @@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
>  			E820_TYPE_RESERVED;
>  		pvh_bootparams.e820_entries++;
> -	} else
> +	} else if (xen_guest) {
>  		xen_raw_printk("Warning: Can fit ISA range into e820\n");
> +	}
>  
>  	pvh_bootparams.hdr.cmd_line_ptr =
>  		pvh_start_info.cmdline_paddr;
> @@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
>  	 * environment (i.e. hardware_subarch 0).
>  	 */
>  	pvh_bootparams.hdr.version = 0x212;
> -	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
> +	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
>  }
>  
>  /*
> @@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
>   */
>  void __init xen_prepare_pvh(void)
>  {
> -	u32 msr;
> +
> +	u32 msr = xen_cpuid_base();
>  	u64 pfn;
> +	bool xen_guest = msr ? true : false;
>  
>  	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
> -		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> -				pvh_start_info.magic);
> +		if (xen_guest)
> +			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> +					pvh_start_info.magic);
>  		BUG();
>  	}
>  
> -	xen_pvh = 1;
> +	if (xen_guest) {
> +		xen_pvh = 1;
> +
> +		msr = cpuid_ebx(msr + 2);
> +		pfn = __pa(hypercall_page);
> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +
> +	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
> +		BUG();
> +	}
>  
> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
> -	pfn = __pa(hypercall_page);
> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +	init_pvh_bootparams(xen_guest);
>  
> -	init_pvh_bootparams();
> +	if (xen_guest)
> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  
> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  }


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
@ 2017-11-28 19:41   ` Andrew Cooper
  0 siblings, 0 replies; 41+ messages in thread
From: Andrew Cooper @ 2017-11-28 19:41 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich, pbonzini,
	kvm

On 28/11/17 19:34, Maran Wilson wrote:
> For certain applications it is desirable to rapidly boot a KVM virtual
> machine. In cases where legacy hardware and software support within the
> guest is not needed, Qemu should be able to boot directly into the
> uncompressed Linux kernel binary without the need to run firmware.
>
> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> supported by Linux and FreeBSD:
>
>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html

Just FYI, this link has recently become stale, following some cleanup. 
The document is now:

https://xenbits.xen.org/docs/unstable/misc/pvh.html

~Andrew

>
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.
>
> Even though the code is still PoC quality, I'm sending this as an RFC now
> since there are a number of different ways the specific implementation
> details can be handled. I chose a shared code path for Xen and KVM guests
> but could just as easily create a separate code path that is advertised by
> a different ELF note for KVM. There also seems to be some flexibility in
> how the e820 table data is passed and how (or if) it should be identified
> as e820 data. As a starting point, I've chosen the options that seem to
> result in the smallest patch with minimal to no changes required of the
> x86/HVM direct boot ABI.
> ---
>  arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
>  1 file changed, 55 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> index 98ab176..d93f711 100644
> --- a/arch/x86/xen/enlighten_pvh.c
> +++ b/arch/x86/xen/enlighten_pvh.c
> @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
>  		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
>  }
>  
> -static void __init init_pvh_bootparams(void)
> +static void __init init_pvh_bootparams(bool xen_guest)
>  {
>  	struct xen_memory_map memmap;
>  	int rc;
>  
>  	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
>  
> -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> -	if (rc) {
> -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> -		BUG();
> +	if (xen_guest) {
> +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> +		if (rc) {
> +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> +			BUG();
> +		}
> +		pvh_bootparams.e820_entries = memmap.nr_entries;
> +	} else if (pvh_start_info.nr_modules > 1) {
> +		/* The second module should be the e820 data for KVM guests */
> +		struct hvm_modlist_entry *modaddr;
> +		char e820_sig[] = "e820 data";
> +		struct boot_e820_entry *ep;
> +		struct e820_table *tp;
> +		char *cmdline_str;
> +		int idx;
> +
> +		modaddr = __va(pvh_start_info.modlist_paddr +
> +			       sizeof(struct hvm_modlist_entry));
> +		cmdline_str = __va(modaddr->cmdline_paddr);
> +
> +		if ((modaddr->cmdline_paddr) &&
> +		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
> +			tp = __va(modaddr->paddr);
> +			ep = (struct boot_e820_entry *)tp->entries;
> +
> +			pvh_bootparams.e820_entries = tp->nr_entries;
> +
> +			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
> +				pvh_bootparams.e820_table[idx] = *ep;
> +		}
>  	}
> -	pvh_bootparams.e820_entries = memmap.nr_entries;
>  
>  	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
> @@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
>  			E820_TYPE_RESERVED;
>  		pvh_bootparams.e820_entries++;
> -	} else
> +	} else if (xen_guest) {
>  		xen_raw_printk("Warning: Can fit ISA range into e820\n");
> +	}
>  
>  	pvh_bootparams.hdr.cmd_line_ptr =
>  		pvh_start_info.cmdline_paddr;
> @@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
>  	 * environment (i.e. hardware_subarch 0).
>  	 */
>  	pvh_bootparams.hdr.version = 0x212;
> -	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
> +	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
>  }
>  
>  /*
> @@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
>   */
>  void __init xen_prepare_pvh(void)
>  {
> -	u32 msr;
> +
> +	u32 msr = xen_cpuid_base();
>  	u64 pfn;
> +	bool xen_guest = msr ? true : false;
>  
>  	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
> -		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> -				pvh_start_info.magic);
> +		if (xen_guest)
> +			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> +					pvh_start_info.magic);
>  		BUG();
>  	}
>  
> -	xen_pvh = 1;
> +	if (xen_guest) {
> +		xen_pvh = 1;
> +
> +		msr = cpuid_ebx(msr + 2);
> +		pfn = __pa(hypercall_page);
> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +
> +	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
> +		BUG();
> +	}
>  
> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
> -	pfn = __pa(hypercall_page);
> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +	init_pvh_bootparams(xen_guest);
>  
> -	init_pvh_bootparams();
> +	if (xen_guest)
> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  
> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  }


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-28 19:34 ` Maran Wilson
                   ` (2 preceding siblings ...)
  (?)
@ 2017-11-28 19:58 ` Christoph Hellwig
  -1 siblings, 0 replies; 41+ messages in thread
From: Christoph Hellwig @ 2017-11-28 19:58 UTC (permalink / raw)
  To: Maran Wilson
  Cc: boris.ostrovsky, jgross, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, roger.pau, rkrcmar, JBeulich, andrew.cooper3,
	pbonzini, kvm

On Tue, Nov 28, 2017 at 11:34:42AM -0800, Maran Wilson wrote:
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.

Nice.  I do a a lot of -kernel boots in qemu/kvm for testing, and
speeding this further up would be great.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-28 19:34 ` Maran Wilson
  (?)
  (?)
@ 2017-11-28 19:58 ` Christoph Hellwig
  -1 siblings, 0 replies; 41+ messages in thread
From: Christoph Hellwig @ 2017-11-28 19:58 UTC (permalink / raw)
  To: Maran Wilson
  Cc: jgross, kvm, pbonzini, rkrcmar, andrew.cooper3, x86,
	linux-kernel, mingo, JBeulich, hpa, xen-devel, boris.ostrovsky,
	tglx, roger.pau

On Tue, Nov 28, 2017 at 11:34:42AM -0800, Maran Wilson wrote:
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.

Nice.  I do a a lot of -kernel boots in qemu/kvm for testing, and
speeding this further up would be great.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-28 19:34 ` Maran Wilson
                   ` (3 preceding siblings ...)
  (?)
@ 2017-11-29  8:21 ` Juergen Gross
  2017-11-29  8:50   ` Roger Pau Monné
                     ` (3 more replies)
  -1 siblings, 4 replies; 41+ messages in thread
From: Juergen Gross @ 2017-11-29  8:21 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, roger.pau, rkrcmar, JBeulich, andrew.cooper3,
	pbonzini, kvm

On 28/11/17 20:34, Maran Wilson wrote:
> For certain applications it is desirable to rapidly boot a KVM virtual
> machine. In cases where legacy hardware and software support within the
> guest is not needed, Qemu should be able to boot directly into the
> uncompressed Linux kernel binary without the need to run firmware.
> 
> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> supported by Linux and FreeBSD:
> 
>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
> 
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.
> 
> Even though the code is still PoC quality, I'm sending this as an RFC now
> since there are a number of different ways the specific implementation
> details can be handled. I chose a shared code path for Xen and KVM guests
> but could just as easily create a separate code path that is advertised by
> a different ELF note for KVM. There also seems to be some flexibility in
> how the e820 table data is passed and how (or if) it should be identified
> as e820 data. As a starting point, I've chosen the options that seem to
> result in the smallest patch with minimal to no changes required of the
> x86/HVM direct boot ABI.

I like the idea.

I'd rather split up the different hypervisor types early and use a
common set of service functions instead of special casing xen_guest
everywhere. This would make it much easier to support the KVM PVH
boot without the need to configure the kernel with CONFIG_XEN.

Another option would be to use the same boot path as with grub: set
the boot params in zeropage and start at startup_32.


Juergen

> ---
>  arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
>  1 file changed, 55 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> index 98ab176..d93f711 100644
> --- a/arch/x86/xen/enlighten_pvh.c
> +++ b/arch/x86/xen/enlighten_pvh.c
> @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
>  		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
>  }
>  
> -static void __init init_pvh_bootparams(void)
> +static void __init init_pvh_bootparams(bool xen_guest)
>  {
>  	struct xen_memory_map memmap;
>  	int rc;
>  
>  	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
>  
> -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> -	if (rc) {
> -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> -		BUG();
> +	if (xen_guest) {
> +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> +		if (rc) {
> +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> +			BUG();
> +		}
> +		pvh_bootparams.e820_entries = memmap.nr_entries;
> +	} else if (pvh_start_info.nr_modules > 1) {
> +		/* The second module should be the e820 data for KVM guests */
> +		struct hvm_modlist_entry *modaddr;
> +		char e820_sig[] = "e820 data";
> +		struct boot_e820_entry *ep;
> +		struct e820_table *tp;
> +		char *cmdline_str;
> +		int idx;
> +
> +		modaddr = __va(pvh_start_info.modlist_paddr +
> +			       sizeof(struct hvm_modlist_entry));
> +		cmdline_str = __va(modaddr->cmdline_paddr);
> +
> +		if ((modaddr->cmdline_paddr) &&
> +		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
> +			tp = __va(modaddr->paddr);
> +			ep = (struct boot_e820_entry *)tp->entries;
> +
> +			pvh_bootparams.e820_entries = tp->nr_entries;
> +
> +			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
> +				pvh_bootparams.e820_table[idx] = *ep;
> +		}
>  	}
> -	pvh_bootparams.e820_entries = memmap.nr_entries;
>  
>  	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
> @@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
>  			E820_TYPE_RESERVED;
>  		pvh_bootparams.e820_entries++;
> -	} else
> +	} else if (xen_guest) {
>  		xen_raw_printk("Warning: Can fit ISA range into e820\n");
> +	}
>  
>  	pvh_bootparams.hdr.cmd_line_ptr =
>  		pvh_start_info.cmdline_paddr;
> @@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
>  	 * environment (i.e. hardware_subarch 0).
>  	 */
>  	pvh_bootparams.hdr.version = 0x212;
> -	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
> +	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
>  }
>  
>  /*
> @@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
>   */
>  void __init xen_prepare_pvh(void)
>  {
> -	u32 msr;
> +
> +	u32 msr = xen_cpuid_base();
>  	u64 pfn;
> +	bool xen_guest = msr ? true : false;
>  
>  	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
> -		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> -				pvh_start_info.magic);
> +		if (xen_guest)
> +			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> +					pvh_start_info.magic);
>  		BUG();
>  	}
>  
> -	xen_pvh = 1;
> +	if (xen_guest) {
> +		xen_pvh = 1;
> +
> +		msr = cpuid_ebx(msr + 2);
> +		pfn = __pa(hypercall_page);
> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +
> +	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
> +		BUG();
> +	}
>  
> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
> -	pfn = __pa(hypercall_page);
> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +	init_pvh_bootparams(xen_guest);
>  
> -	init_pvh_bootparams();
> +	if (xen_guest)
> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  
> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  }
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-28 19:34 ` Maran Wilson
                   ` (4 preceding siblings ...)
  (?)
@ 2017-11-29  8:21 ` Juergen Gross
  -1 siblings, 0 replies; 41+ messages in thread
From: Juergen Gross @ 2017-11-29  8:21 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, roger.pau, rkrcmar, JBeulich, andrew.cooper3,
	pbonzini, kvm

On 28/11/17 20:34, Maran Wilson wrote:
> For certain applications it is desirable to rapidly boot a KVM virtual
> machine. In cases where legacy hardware and software support within the
> guest is not needed, Qemu should be able to boot directly into the
> uncompressed Linux kernel binary without the need to run firmware.
> 
> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> supported by Linux and FreeBSD:
> 
>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
> 
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.
> 
> Even though the code is still PoC quality, I'm sending this as an RFC now
> since there are a number of different ways the specific implementation
> details can be handled. I chose a shared code path for Xen and KVM guests
> but could just as easily create a separate code path that is advertised by
> a different ELF note for KVM. There also seems to be some flexibility in
> how the e820 table data is passed and how (or if) it should be identified
> as e820 data. As a starting point, I've chosen the options that seem to
> result in the smallest patch with minimal to no changes required of the
> x86/HVM direct boot ABI.

I like the idea.

I'd rather split up the different hypervisor types early and use a
common set of service functions instead of special casing xen_guest
everywhere. This would make it much easier to support the KVM PVH
boot without the need to configure the kernel with CONFIG_XEN.

Another option would be to use the same boot path as with grub: set
the boot params in zeropage and start at startup_32.


Juergen

> ---
>  arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
>  1 file changed, 55 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> index 98ab176..d93f711 100644
> --- a/arch/x86/xen/enlighten_pvh.c
> +++ b/arch/x86/xen/enlighten_pvh.c
> @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
>  		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
>  }
>  
> -static void __init init_pvh_bootparams(void)
> +static void __init init_pvh_bootparams(bool xen_guest)
>  {
>  	struct xen_memory_map memmap;
>  	int rc;
>  
>  	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
>  
> -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> -	if (rc) {
> -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> -		BUG();
> +	if (xen_guest) {
> +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> +		if (rc) {
> +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> +			BUG();
> +		}
> +		pvh_bootparams.e820_entries = memmap.nr_entries;
> +	} else if (pvh_start_info.nr_modules > 1) {
> +		/* The second module should be the e820 data for KVM guests */
> +		struct hvm_modlist_entry *modaddr;
> +		char e820_sig[] = "e820 data";
> +		struct boot_e820_entry *ep;
> +		struct e820_table *tp;
> +		char *cmdline_str;
> +		int idx;
> +
> +		modaddr = __va(pvh_start_info.modlist_paddr +
> +			       sizeof(struct hvm_modlist_entry));
> +		cmdline_str = __va(modaddr->cmdline_paddr);
> +
> +		if ((modaddr->cmdline_paddr) &&
> +		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
> +			tp = __va(modaddr->paddr);
> +			ep = (struct boot_e820_entry *)tp->entries;
> +
> +			pvh_bootparams.e820_entries = tp->nr_entries;
> +
> +			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
> +				pvh_bootparams.e820_table[idx] = *ep;
> +		}
>  	}
> -	pvh_bootparams.e820_entries = memmap.nr_entries;
>  
>  	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
> @@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
>  		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
>  			E820_TYPE_RESERVED;
>  		pvh_bootparams.e820_entries++;
> -	} else
> +	} else if (xen_guest) {
>  		xen_raw_printk("Warning: Can fit ISA range into e820\n");
> +	}
>  
>  	pvh_bootparams.hdr.cmd_line_ptr =
>  		pvh_start_info.cmdline_paddr;
> @@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
>  	 * environment (i.e. hardware_subarch 0).
>  	 */
>  	pvh_bootparams.hdr.version = 0x212;
> -	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
> +	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
>  }
>  
>  /*
> @@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
>   */
>  void __init xen_prepare_pvh(void)
>  {
> -	u32 msr;
> +
> +	u32 msr = xen_cpuid_base();
>  	u64 pfn;
> +	bool xen_guest = msr ? true : false;
>  
>  	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
> -		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> -				pvh_start_info.magic);
> +		if (xen_guest)
> +			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
> +					pvh_start_info.magic);
>  		BUG();
>  	}
>  
> -	xen_pvh = 1;
> +	if (xen_guest) {
> +		xen_pvh = 1;
> +
> +		msr = cpuid_ebx(msr + 2);
> +		pfn = __pa(hypercall_page);
> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +
> +	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
> +		BUG();
> +	}
>  
> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
> -	pfn = __pa(hypercall_page);
> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +	init_pvh_bootparams(xen_guest);
>  
> -	init_pvh_bootparams();
> +	if (xen_guest)
> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  
> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>  }
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29  8:21 ` Juergen Gross
  2017-11-29  8:50   ` Roger Pau Monné
@ 2017-11-29  8:50   ` Roger Pau Monné
  2017-11-29 14:03     ` Boris Ostrovsky
  2017-11-29 14:03     ` Boris Ostrovsky
  2017-11-29 17:24   ` Maran Wilson
  2017-11-29 17:24   ` Maran Wilson
  3 siblings, 2 replies; 41+ messages in thread
From: Roger Pau Monné @ 2017-11-29  8:50 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Maran Wilson, boris.ostrovsky, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, rkrcmar, JBeulich, andrew.cooper3, pbonzini, kvm

On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
> On 28/11/17 20:34, Maran Wilson wrote:
> > For certain applications it is desirable to rapidly boot a KVM virtual
> > machine. In cases where legacy hardware and software support within the
> > guest is not needed, Qemu should be able to boot directly into the
> > uncompressed Linux kernel binary without the need to run firmware.
> > 
> > There already exists an ABI to allow this for Xen PVH guests and the ABI is
> > supported by Linux and FreeBSD:
> > 
> >    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html

I would also add a link to:

http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info

> > This PoC patch enables Qemu to use that same entry point for booting KVM
> > guests.
> > 
> > Even though the code is still PoC quality, I'm sending this as an RFC now
> > since there are a number of different ways the specific implementation
> > details can be handled. I chose a shared code path for Xen and KVM guests
> > but could just as easily create a separate code path that is advertised by
> > a different ELF note for KVM. There also seems to be some flexibility in
> > how the e820 table data is passed and how (or if) it should be identified
> > as e820 data. As a starting point, I've chosen the options that seem to
> > result in the smallest patch with minimal to no changes required of the
> > x86/HVM direct boot ABI.
> 
> I like the idea.
> 
> I'd rather split up the different hypervisor types early and use a
> common set of service functions instead of special casing xen_guest
> everywhere. This would make it much easier to support the KVM PVH
> boot without the need to configure the kernel with CONFIG_XEN.
> 
> Another option would be to use the same boot path as with grub: set
> the boot params in zeropage and start at startup_32.

I think I prefer this approach since AFAICT it should allow for
greater code share with the common boot path.

> 
> Juergen
> 
> > ---
> >  arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
> >  1 file changed, 55 insertions(+), 19 deletions(-)
> > 
> > diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> > index 98ab176..d93f711 100644
> > --- a/arch/x86/xen/enlighten_pvh.c
> > +++ b/arch/x86/xen/enlighten_pvh.c
> > @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
> >  		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
> >  }
> >  
> > -static void __init init_pvh_bootparams(void)
> > +static void __init init_pvh_bootparams(bool xen_guest)
> >  {
> >  	struct xen_memory_map memmap;
> >  	int rc;
> >  
> >  	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
> >  
> > -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> > -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> > -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> > -	if (rc) {
> > -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> > -		BUG();
> > +	if (xen_guest) {
> > +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> > +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> > +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> > +		if (rc) {
> > +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> > +			BUG();
> > +		}
> > +		pvh_bootparams.e820_entries = memmap.nr_entries;
> > +	} else if (pvh_start_info.nr_modules > 1) {
> > +		/* The second module should be the e820 data for KVM guests */

I don't think this is desirable. You might want to boot other OSes
using this method, and they might want to pass more than one module.

IMHO the hvm_start_info structure should be bumped to contain a
pointer to the memory map. Note that there's a 'version' field that
can be used for that. Even on Xen we might want to pass the memory map
in such a way instead of using the hypercall.

Thanks, Roger.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29  8:21 ` Juergen Gross
@ 2017-11-29  8:50   ` Roger Pau Monné
  2017-11-29  8:50   ` Roger Pau Monné
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 41+ messages in thread
From: Roger Pau Monné @ 2017-11-29  8:50 UTC (permalink / raw)
  To: Juergen Gross
  Cc: rkrcmar, kvm, pbonzini, Maran Wilson, andrew.cooper3, x86,
	linux-kernel, mingo, JBeulich, hpa, xen-devel, boris.ostrovsky,
	tglx

On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
> On 28/11/17 20:34, Maran Wilson wrote:
> > For certain applications it is desirable to rapidly boot a KVM virtual
> > machine. In cases where legacy hardware and software support within the
> > guest is not needed, Qemu should be able to boot directly into the
> > uncompressed Linux kernel binary without the need to run firmware.
> > 
> > There already exists an ABI to allow this for Xen PVH guests and the ABI is
> > supported by Linux and FreeBSD:
> > 
> >    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html

I would also add a link to:

http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info

> > This PoC patch enables Qemu to use that same entry point for booting KVM
> > guests.
> > 
> > Even though the code is still PoC quality, I'm sending this as an RFC now
> > since there are a number of different ways the specific implementation
> > details can be handled. I chose a shared code path for Xen and KVM guests
> > but could just as easily create a separate code path that is advertised by
> > a different ELF note for KVM. There also seems to be some flexibility in
> > how the e820 table data is passed and how (or if) it should be identified
> > as e820 data. As a starting point, I've chosen the options that seem to
> > result in the smallest patch with minimal to no changes required of the
> > x86/HVM direct boot ABI.
> 
> I like the idea.
> 
> I'd rather split up the different hypervisor types early and use a
> common set of service functions instead of special casing xen_guest
> everywhere. This would make it much easier to support the KVM PVH
> boot without the need to configure the kernel with CONFIG_XEN.
> 
> Another option would be to use the same boot path as with grub: set
> the boot params in zeropage and start at startup_32.

I think I prefer this approach since AFAICT it should allow for
greater code share with the common boot path.

> 
> Juergen
> 
> > ---
> >  arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
> >  1 file changed, 55 insertions(+), 19 deletions(-)
> > 
> > diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> > index 98ab176..d93f711 100644
> > --- a/arch/x86/xen/enlighten_pvh.c
> > +++ b/arch/x86/xen/enlighten_pvh.c
> > @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
> >  		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
> >  }
> >  
> > -static void __init init_pvh_bootparams(void)
> > +static void __init init_pvh_bootparams(bool xen_guest)
> >  {
> >  	struct xen_memory_map memmap;
> >  	int rc;
> >  
> >  	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
> >  
> > -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> > -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> > -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> > -	if (rc) {
> > -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> > -		BUG();
> > +	if (xen_guest) {
> > +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
> > +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
> > +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
> > +		if (rc) {
> > +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
> > +			BUG();
> > +		}
> > +		pvh_bootparams.e820_entries = memmap.nr_entries;
> > +	} else if (pvh_start_info.nr_modules > 1) {
> > +		/* The second module should be the e820 data for KVM guests */

I don't think this is desirable. You might want to boot other OSes
using this method, and they might want to pass more than one module.

IMHO the hvm_start_info structure should be bumped to contain a
pointer to the memory map. Note that there's a 'version' field that
can be used for that. Even on Xen we might want to pass the memory map
in such a way instead of using the hypercall.

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-28 19:34 ` Maran Wilson
                   ` (5 preceding siblings ...)
  (?)
@ 2017-11-29  8:59 ` Paolo Bonzini
  2017-11-29 17:14   ` Maran Wilson
  2017-11-29 17:14   ` Maran Wilson
  -1 siblings, 2 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-11-29  8:59 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich,
	andrew.cooper3, kvm

On 28/11/2017 20:34, Maran Wilson wrote:
> For certain applications it is desirable to rapidly boot a KVM virtual
> machine. In cases where legacy hardware and software support within the
> guest is not needed, Qemu should be able to boot directly into the
> uncompressed Linux kernel binary without the need to run firmware.
> 
> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> supported by Linux and FreeBSD:
> 
>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
> 
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.

Nice!  So QEMU would parse the ELF file just like for multiboot, find
the ELF note, and then prepare an hvmlite boot info struct instead of
the multiboot one?  There would then be a new option ROM, very similar
to multiboot.S.

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-28 19:34 ` Maran Wilson
                   ` (6 preceding siblings ...)
  (?)
@ 2017-11-29  8:59 ` Paolo Bonzini
  -1 siblings, 0 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-11-29  8:59 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich,
	andrew.cooper3, kvm

On 28/11/2017 20:34, Maran Wilson wrote:
> For certain applications it is desirable to rapidly boot a KVM virtual
> machine. In cases where legacy hardware and software support within the
> guest is not needed, Qemu should be able to boot directly into the
> uncompressed Linux kernel binary without the need to run firmware.
> 
> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> supported by Linux and FreeBSD:
> 
>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
> 
> This PoC patch enables Qemu to use that same entry point for booting KVM
> guests.

Nice!  So QEMU would parse the ELF file just like for multiboot, find
the ELF note, and then prepare an hvmlite boot info struct instead of
the multiboot one?  There would then be a new option ROM, very similar
to multiboot.S.

Thanks,

Paolo

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29  8:50   ` Roger Pau Monné
@ 2017-11-29 14:03     ` Boris Ostrovsky
  2017-11-29 14:11       ` Juergen Gross
  2017-11-29 14:11       ` Juergen Gross
  2017-11-29 14:03     ` Boris Ostrovsky
  1 sibling, 2 replies; 41+ messages in thread
From: Boris Ostrovsky @ 2017-11-29 14:03 UTC (permalink / raw)
  To: Roger Pau Monné, Juergen Gross
  Cc: Maran Wilson, tglx, mingo, hpa, x86, xen-devel, linux-kernel,
	rkrcmar, JBeulich, andrew.cooper3, pbonzini, kvm

On 11/29/2017 03:50 AM, Roger Pau Monné wrote:
> On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
>> On 28/11/17 20:34, Maran Wilson wrote:
>>> For certain applications it is desirable to rapidly boot a KVM virtual
>>> machine. In cases where legacy hardware and software support within the
>>> guest is not needed, Qemu should be able to boot directly into the
>>> uncompressed Linux kernel binary without the need to run firmware.
>>>
>>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>>> supported by Linux and FreeBSD:
>>>
>>>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
> I would also add a link to:
>
> http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info
>
>>> This PoC patch enables Qemu to use that same entry point for booting KVM
>>> guests.
>>>
>>> Even though the code is still PoC quality, I'm sending this as an RFC now
>>> since there are a number of different ways the specific implementation
>>> details can be handled. I chose a shared code path for Xen and KVM guests
>>> but could just as easily create a separate code path that is advertised by
>>> a different ELF note for KVM. There also seems to be some flexibility in
>>> how the e820 table data is passed and how (or if) it should be identified
>>> as e820 data. As a starting point, I've chosen the options that seem to
>>> result in the smallest patch with minimal to no changes required of the
>>> x86/HVM direct boot ABI.
>> I like the idea.
>>
>> I'd rather split up the different hypervisor types early and use a
>> common set of service functions instead of special casing xen_guest
>> everywhere. This would make it much easier to support the KVM PVH
>> boot without the need to configure the kernel with CONFIG_XEN.
>>
>> Another option would be to use the same boot path as with grub: set
>> the boot params in zeropage and start at startup_32.
> I think I prefer this approach since AFAICT it should allow for
> greater code share with the common boot path.

zeropage is x86/Linux-specific so we'd need some sort of firmware (like
grub) between a hypervisor and Linux to convert hvm_start_info to
bootparams.

-boris

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29  8:50   ` Roger Pau Monné
  2017-11-29 14:03     ` Boris Ostrovsky
@ 2017-11-29 14:03     ` Boris Ostrovsky
  1 sibling, 0 replies; 41+ messages in thread
From: Boris Ostrovsky @ 2017-11-29 14:03 UTC (permalink / raw)
  To: Roger Pau Monné, Juergen Gross
  Cc: rkrcmar, kvm, pbonzini, Maran Wilson, andrew.cooper3, x86,
	linux-kernel, mingo, JBeulich, hpa, xen-devel, tglx

On 11/29/2017 03:50 AM, Roger Pau Monné wrote:
> On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
>> On 28/11/17 20:34, Maran Wilson wrote:
>>> For certain applications it is desirable to rapidly boot a KVM virtual
>>> machine. In cases where legacy hardware and software support within the
>>> guest is not needed, Qemu should be able to boot directly into the
>>> uncompressed Linux kernel binary without the need to run firmware.
>>>
>>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>>> supported by Linux and FreeBSD:
>>>
>>>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
> I would also add a link to:
>
> http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info
>
>>> This PoC patch enables Qemu to use that same entry point for booting KVM
>>> guests.
>>>
>>> Even though the code is still PoC quality, I'm sending this as an RFC now
>>> since there are a number of different ways the specific implementation
>>> details can be handled. I chose a shared code path for Xen and KVM guests
>>> but could just as easily create a separate code path that is advertised by
>>> a different ELF note for KVM. There also seems to be some flexibility in
>>> how the e820 table data is passed and how (or if) it should be identified
>>> as e820 data. As a starting point, I've chosen the options that seem to
>>> result in the smallest patch with minimal to no changes required of the
>>> x86/HVM direct boot ABI.
>> I like the idea.
>>
>> I'd rather split up the different hypervisor types early and use a
>> common set of service functions instead of special casing xen_guest
>> everywhere. This would make it much easier to support the KVM PVH
>> boot without the need to configure the kernel with CONFIG_XEN.
>>
>> Another option would be to use the same boot path as with grub: set
>> the boot params in zeropage and start at startup_32.
> I think I prefer this approach since AFAICT it should allow for
> greater code share with the common boot path.

zeropage is x86/Linux-specific so we'd need some sort of firmware (like
grub) between a hypervisor and Linux to convert hvm_start_info to
bootparams.

-boris

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:03     ` Boris Ostrovsky
  2017-11-29 14:11       ` Juergen Gross
@ 2017-11-29 14:11       ` Juergen Gross
  2017-11-29 14:18         ` Roger Pau Monné
  2017-11-29 14:18         ` Roger Pau Monné
  1 sibling, 2 replies; 41+ messages in thread
From: Juergen Gross @ 2017-11-29 14:11 UTC (permalink / raw)
  To: Boris Ostrovsky, Roger Pau Monné
  Cc: Maran Wilson, tglx, mingo, hpa, x86, xen-devel, linux-kernel,
	rkrcmar, JBeulich, andrew.cooper3, pbonzini, kvm

On 29/11/17 15:03, Boris Ostrovsky wrote:
> On 11/29/2017 03:50 AM, Roger Pau Monné wrote:
>> On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
>>> On 28/11/17 20:34, Maran Wilson wrote:
>>>> For certain applications it is desirable to rapidly boot a KVM virtual
>>>> machine. In cases where legacy hardware and software support within the
>>>> guest is not needed, Qemu should be able to boot directly into the
>>>> uncompressed Linux kernel binary without the need to run firmware.
>>>>
>>>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>>>> supported by Linux and FreeBSD:
>>>>
>>>>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>> I would also add a link to:
>>
>> http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info
>>
>>>> This PoC patch enables Qemu to use that same entry point for booting KVM
>>>> guests.
>>>>
>>>> Even though the code is still PoC quality, I'm sending this as an RFC now
>>>> since there are a number of different ways the specific implementation
>>>> details can be handled. I chose a shared code path for Xen and KVM guests
>>>> but could just as easily create a separate code path that is advertised by
>>>> a different ELF note for KVM. There also seems to be some flexibility in
>>>> how the e820 table data is passed and how (or if) it should be identified
>>>> as e820 data. As a starting point, I've chosen the options that seem to
>>>> result in the smallest patch with minimal to no changes required of the
>>>> x86/HVM direct boot ABI.
>>> I like the idea.
>>>
>>> I'd rather split up the different hypervisor types early and use a
>>> common set of service functions instead of special casing xen_guest
>>> everywhere. This would make it much easier to support the KVM PVH
>>> boot without the need to configure the kernel with CONFIG_XEN.
>>>
>>> Another option would be to use the same boot path as with grub: set
>>> the boot params in zeropage and start at startup_32.
>> I think I prefer this approach since AFAICT it should allow for
>> greater code share with the common boot path.
> 
> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
> grub) between a hypervisor and Linux to convert hvm_start_info to
> bootparams.

qemu?


Juergen

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:03     ` Boris Ostrovsky
@ 2017-11-29 14:11       ` Juergen Gross
  2017-11-29 14:11       ` Juergen Gross
  1 sibling, 0 replies; 41+ messages in thread
From: Juergen Gross @ 2017-11-29 14:11 UTC (permalink / raw)
  To: Boris Ostrovsky, Roger Pau Monné
  Cc: rkrcmar, kvm, pbonzini, Maran Wilson, andrew.cooper3, x86,
	linux-kernel, mingo, JBeulich, hpa, xen-devel, tglx

On 29/11/17 15:03, Boris Ostrovsky wrote:
> On 11/29/2017 03:50 AM, Roger Pau Monné wrote:
>> On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
>>> On 28/11/17 20:34, Maran Wilson wrote:
>>>> For certain applications it is desirable to rapidly boot a KVM virtual
>>>> machine. In cases where legacy hardware and software support within the
>>>> guest is not needed, Qemu should be able to boot directly into the
>>>> uncompressed Linux kernel binary without the need to run firmware.
>>>>
>>>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>>>> supported by Linux and FreeBSD:
>>>>
>>>>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>> I would also add a link to:
>>
>> http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info
>>
>>>> This PoC patch enables Qemu to use that same entry point for booting KVM
>>>> guests.
>>>>
>>>> Even though the code is still PoC quality, I'm sending this as an RFC now
>>>> since there are a number of different ways the specific implementation
>>>> details can be handled. I chose a shared code path for Xen and KVM guests
>>>> but could just as easily create a separate code path that is advertised by
>>>> a different ELF note for KVM. There also seems to be some flexibility in
>>>> how the e820 table data is passed and how (or if) it should be identified
>>>> as e820 data. As a starting point, I've chosen the options that seem to
>>>> result in the smallest patch with minimal to no changes required of the
>>>> x86/HVM direct boot ABI.
>>> I like the idea.
>>>
>>> I'd rather split up the different hypervisor types early and use a
>>> common set of service functions instead of special casing xen_guest
>>> everywhere. This would make it much easier to support the KVM PVH
>>> boot without the need to configure the kernel with CONFIG_XEN.
>>>
>>> Another option would be to use the same boot path as with grub: set
>>> the boot params in zeropage and start at startup_32.
>> I think I prefer this approach since AFAICT it should allow for
>> greater code share with the common boot path.
> 
> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
> grub) between a hypervisor and Linux to convert hvm_start_info to
> bootparams.

qemu?


Juergen


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:11       ` Juergen Gross
@ 2017-11-29 14:18         ` Roger Pau Monné
  2017-11-29 14:25           ` Boris Ostrovsky
  2017-11-29 14:25           ` Boris Ostrovsky
  2017-11-29 14:18         ` Roger Pau Monné
  1 sibling, 2 replies; 41+ messages in thread
From: Roger Pau Monné @ 2017-11-29 14:18 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Boris Ostrovsky, Maran Wilson, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, rkrcmar, JBeulich, andrew.cooper3, pbonzini, kvm

On Wed, Nov 29, 2017 at 03:11:12PM +0100, Juergen Gross wrote:
> On 29/11/17 15:03, Boris Ostrovsky wrote:
> > On 11/29/2017 03:50 AM, Roger Pau Monné wrote:
> >> On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
> >>> On 28/11/17 20:34, Maran Wilson wrote:
> >>>> For certain applications it is desirable to rapidly boot a KVM virtual
> >>>> machine. In cases where legacy hardware and software support within the
> >>>> guest is not needed, Qemu should be able to boot directly into the
> >>>> uncompressed Linux kernel binary without the need to run firmware.
> >>>>
> >>>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> >>>> supported by Linux and FreeBSD:
> >>>>
> >>>>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
> >> I would also add a link to:
> >>
> >> http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info
> >>
> >>>> This PoC patch enables Qemu to use that same entry point for booting KVM
> >>>> guests.
> >>>>
> >>>> Even though the code is still PoC quality, I'm sending this as an RFC now
> >>>> since there are a number of different ways the specific implementation
> >>>> details can be handled. I chose a shared code path for Xen and KVM guests
> >>>> but could just as easily create a separate code path that is advertised by
> >>>> a different ELF note for KVM. There also seems to be some flexibility in
> >>>> how the e820 table data is passed and how (or if) it should be identified
> >>>> as e820 data. As a starting point, I've chosen the options that seem to
> >>>> result in the smallest patch with minimal to no changes required of the
> >>>> x86/HVM direct boot ABI.
> >>> I like the idea.
> >>>
> >>> I'd rather split up the different hypervisor types early and use a
> >>> common set of service functions instead of special casing xen_guest
> >>> everywhere. This would make it much easier to support the KVM PVH
> >>> boot without the need to configure the kernel with CONFIG_XEN.
> >>>
> >>> Another option would be to use the same boot path as with grub: set
> >>> the boot params in zeropage and start at startup_32.
> >> I think I prefer this approach since AFAICT it should allow for
> >> greater code share with the common boot path.
> > 
> > zeropage is x86/Linux-specific so we'd need some sort of firmware (like
> > grub) between a hypervisor and Linux to convert hvm_start_info to
> > bootparams.
> 
> qemu?

But then it won't be using the PVH entry point, and would just use the
native one?

My understanding was that the PVH shim inside of Linux will prepare a
zero-page when booted using the PVH entry point, and then jump into
the native boot path.

Roger.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:11       ` Juergen Gross
  2017-11-29 14:18         ` Roger Pau Monné
@ 2017-11-29 14:18         ` Roger Pau Monné
  1 sibling, 0 replies; 41+ messages in thread
From: Roger Pau Monné @ 2017-11-29 14:18 UTC (permalink / raw)
  To: Juergen Gross
  Cc: rkrcmar, kvm, pbonzini, Maran Wilson, andrew.cooper3, x86,
	linux-kernel, mingo, JBeulich, hpa, xen-devel, Boris Ostrovsky,
	tglx

On Wed, Nov 29, 2017 at 03:11:12PM +0100, Juergen Gross wrote:
> On 29/11/17 15:03, Boris Ostrovsky wrote:
> > On 11/29/2017 03:50 AM, Roger Pau Monné wrote:
> >> On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
> >>> On 28/11/17 20:34, Maran Wilson wrote:
> >>>> For certain applications it is desirable to rapidly boot a KVM virtual
> >>>> machine. In cases where legacy hardware and software support within the
> >>>> guest is not needed, Qemu should be able to boot directly into the
> >>>> uncompressed Linux kernel binary without the need to run firmware.
> >>>>
> >>>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
> >>>> supported by Linux and FreeBSD:
> >>>>
> >>>>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
> >> I would also add a link to:
> >>
> >> http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info
> >>
> >>>> This PoC patch enables Qemu to use that same entry point for booting KVM
> >>>> guests.
> >>>>
> >>>> Even though the code is still PoC quality, I'm sending this as an RFC now
> >>>> since there are a number of different ways the specific implementation
> >>>> details can be handled. I chose a shared code path for Xen and KVM guests
> >>>> but could just as easily create a separate code path that is advertised by
> >>>> a different ELF note for KVM. There also seems to be some flexibility in
> >>>> how the e820 table data is passed and how (or if) it should be identified
> >>>> as e820 data. As a starting point, I've chosen the options that seem to
> >>>> result in the smallest patch with minimal to no changes required of the
> >>>> x86/HVM direct boot ABI.
> >>> I like the idea.
> >>>
> >>> I'd rather split up the different hypervisor types early and use a
> >>> common set of service functions instead of special casing xen_guest
> >>> everywhere. This would make it much easier to support the KVM PVH
> >>> boot without the need to configure the kernel with CONFIG_XEN.
> >>>
> >>> Another option would be to use the same boot path as with grub: set
> >>> the boot params in zeropage and start at startup_32.
> >> I think I prefer this approach since AFAICT it should allow for
> >> greater code share with the common boot path.
> > 
> > zeropage is x86/Linux-specific so we'd need some sort of firmware (like
> > grub) between a hypervisor and Linux to convert hvm_start_info to
> > bootparams.
> 
> qemu?

But then it won't be using the PVH entry point, and would just use the
native one?

My understanding was that the PVH shim inside of Linux will prepare a
zero-page when booted using the PVH entry point, and then jump into
the native boot path.

Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:18         ` Roger Pau Monné
@ 2017-11-29 14:25           ` Boris Ostrovsky
  2017-11-29 14:44             ` Paolo Bonzini
  2017-11-29 14:44             ` Paolo Bonzini
  2017-11-29 14:25           ` Boris Ostrovsky
  1 sibling, 2 replies; 41+ messages in thread
From: Boris Ostrovsky @ 2017-11-29 14:25 UTC (permalink / raw)
  To: Roger Pau Monné, Juergen Gross
  Cc: Maran Wilson, tglx, mingo, hpa, x86, xen-devel, linux-kernel,
	rkrcmar, JBeulich, andrew.cooper3, pbonzini, kvm

On 11/29/2017 09:18 AM, Roger Pau Monné wrote:
> On Wed, Nov 29, 2017 at 03:11:12PM +0100, Juergen Gross wrote:
>> On 29/11/17 15:03, Boris Ostrovsky wrote:
>>> On 11/29/2017 03:50 AM, Roger Pau Monné wrote:
>>>> On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
>>>>> On 28/11/17 20:34, Maran Wilson wrote:
>>>>>> For certain applications it is desirable to rapidly boot a KVM virtual
>>>>>> machine. In cases where legacy hardware and software support within the
>>>>>> guest is not needed, Qemu should be able to boot directly into the
>>>>>> uncompressed Linux kernel binary without the need to run firmware.
>>>>>>
>>>>>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>>>>>> supported by Linux and FreeBSD:
>>>>>>
>>>>>>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>>>> I would also add a link to:
>>>>
>>>> http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info
>>>>
>>>>>> This PoC patch enables Qemu to use that same entry point for booting KVM
>>>>>> guests.
>>>>>>
>>>>>> Even though the code is still PoC quality, I'm sending this as an RFC now
>>>>>> since there are a number of different ways the specific implementation
>>>>>> details can be handled. I chose a shared code path for Xen and KVM guests
>>>>>> but could just as easily create a separate code path that is advertised by
>>>>>> a different ELF note for KVM. There also seems to be some flexibility in
>>>>>> how the e820 table data is passed and how (or if) it should be identified
>>>>>> as e820 data. As a starting point, I've chosen the options that seem to
>>>>>> result in the smallest patch with minimal to no changes required of the
>>>>>> x86/HVM direct boot ABI.
>>>>> I like the idea.
>>>>>
>>>>> I'd rather split up the different hypervisor types early and use a
>>>>> common set of service functions instead of special casing xen_guest
>>>>> everywhere. This would make it much easier to support the KVM PVH
>>>>> boot without the need to configure the kernel with CONFIG_XEN.
>>>>>
>>>>> Another option would be to use the same boot path as with grub: set
>>>>> the boot params in zeropage and start at startup_32.
>>>> I think I prefer this approach since AFAICT it should allow for
>>>> greater code share with the common boot path.
>>> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
>>> grub) between a hypervisor and Linux to convert hvm_start_info to
>>> bootparams.
>> qemu?

I think KVM folks didn't want to do this. I can't find the thread but I
believe it was somewhere during Clear Containers discussion. Paolo?


> But then it won't be using the PVH entry point, and would just use the
> native one?
>
> My understanding was that the PVH shim inside of Linux will prepare a
> zero-page when booted using the PVH entry point, and then jump into
> the native boot path.

Right, but that's not what Juergen's second option is. IIUIC with that
option Linux starts with zeropage already prepared. No shim in the kernel.

-boris

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:18         ` Roger Pau Monné
  2017-11-29 14:25           ` Boris Ostrovsky
@ 2017-11-29 14:25           ` Boris Ostrovsky
  1 sibling, 0 replies; 41+ messages in thread
From: Boris Ostrovsky @ 2017-11-29 14:25 UTC (permalink / raw)
  To: Roger Pau Monné, Juergen Gross
  Cc: rkrcmar, kvm, pbonzini, Maran Wilson, andrew.cooper3, x86,
	linux-kernel, mingo, JBeulich, hpa, xen-devel, tglx

On 11/29/2017 09:18 AM, Roger Pau Monné wrote:
> On Wed, Nov 29, 2017 at 03:11:12PM +0100, Juergen Gross wrote:
>> On 29/11/17 15:03, Boris Ostrovsky wrote:
>>> On 11/29/2017 03:50 AM, Roger Pau Monné wrote:
>>>> On Wed, Nov 29, 2017 at 09:21:59AM +0100, Juergen Gross wrote:
>>>>> On 28/11/17 20:34, Maran Wilson wrote:
>>>>>> For certain applications it is desirable to rapidly boot a KVM virtual
>>>>>> machine. In cases where legacy hardware and software support within the
>>>>>> guest is not needed, Qemu should be able to boot directly into the
>>>>>> uncompressed Linux kernel binary without the need to run firmware.
>>>>>>
>>>>>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>>>>>> supported by Linux and FreeBSD:
>>>>>>
>>>>>>    https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>>>> I would also add a link to:
>>>>
>>>> http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,arch-x86,hvm,start_info.h.html#Struct_hvm_start_info
>>>>
>>>>>> This PoC patch enables Qemu to use that same entry point for booting KVM
>>>>>> guests.
>>>>>>
>>>>>> Even though the code is still PoC quality, I'm sending this as an RFC now
>>>>>> since there are a number of different ways the specific implementation
>>>>>> details can be handled. I chose a shared code path for Xen and KVM guests
>>>>>> but could just as easily create a separate code path that is advertised by
>>>>>> a different ELF note for KVM. There also seems to be some flexibility in
>>>>>> how the e820 table data is passed and how (or if) it should be identified
>>>>>> as e820 data. As a starting point, I've chosen the options that seem to
>>>>>> result in the smallest patch with minimal to no changes required of the
>>>>>> x86/HVM direct boot ABI.
>>>>> I like the idea.
>>>>>
>>>>> I'd rather split up the different hypervisor types early and use a
>>>>> common set of service functions instead of special casing xen_guest
>>>>> everywhere. This would make it much easier to support the KVM PVH
>>>>> boot without the need to configure the kernel with CONFIG_XEN.
>>>>>
>>>>> Another option would be to use the same boot path as with grub: set
>>>>> the boot params in zeropage and start at startup_32.
>>>> I think I prefer this approach since AFAICT it should allow for
>>>> greater code share with the common boot path.
>>> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
>>> grub) between a hypervisor and Linux to convert hvm_start_info to
>>> bootparams.
>> qemu?

I think KVM folks didn't want to do this. I can't find the thread but I
believe it was somewhere during Clear Containers discussion. Paolo?


> But then it won't be using the PVH entry point, and would just use the
> native one?
>
> My understanding was that the PVH shim inside of Linux will prepare a
> zero-page when booted using the PVH entry point, and then jump into
> the native boot path.

Right, but that's not what Juergen's second option is. IIUIC with that
option Linux starts with zeropage already prepared. No shim in the kernel.

-boris


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:25           ` Boris Ostrovsky
  2017-11-29 14:44             ` Paolo Bonzini
@ 2017-11-29 14:44             ` Paolo Bonzini
  2017-11-29 14:47               ` Juergen Gross
                                 ` (3 more replies)
  1 sibling, 4 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-11-29 14:44 UTC (permalink / raw)
  To: Boris Ostrovsky, Roger Pau Monné, Juergen Gross
  Cc: Maran Wilson, tglx, mingo, hpa, x86, xen-devel, linux-kernel,
	rkrcmar, JBeulich, andrew.cooper3, kvm

On 29/11/2017 15:25, Boris Ostrovsky wrote:
>>>> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
>>>> grub) between a hypervisor and Linux to convert hvm_start_info to
>>>> bootparams.
>>> qemu?
>
> I think KVM folks didn't want to do this. I can't find the thread but I
> believe it was somewhere during Clear Containers discussion. Paolo?

QEMU is the right place to parse the ELF file and save it in memory.
You would have to teach QEMU to find the Xen note in ELF-format kernels
(just like it looks for the multiboot header), and use a different
option ROM ("pvhboot.c" for example).

However I don't like to bypass the BIOS; for -kernel, KVM starts the
guest with an option ROM (linuxboot-dma.c or multiboot.S in QEMU
sources) that takes care of boot.

In either case, you would have a new option ROM.  It could either be
very simple and similar to multiboot.S, or it could be larger and do the
same task as xen-pvh.S and enlighten_pvh.c (then get the address of
startup_32 or startup_64 from FW_CFG_KERNEL_ENTRY and jump there).  The
ugly part is that the option ROM would have to know more details about
what it is going to boot, including for example whether it's 32-bit or
64-bit, so I don't really think it is a good idea.

I actually like this patch, except that I'd get the e820 memory map from
fw_cfg (see the first part of
https://github.com/bonzini/qboot/blob/master/fw_cfg.c, and extract_e820
in https://github.com/bonzini/qboot/blob/master/main.c) instead of the
second module.

Thanks,

Paolo

> 
>> But then it won't be using the PVH entry point, and would just use the
>> native one?
>>
>> My understanding was that the PVH shim inside of Linux will prepare a
>> zero-page when booted using the PVH entry point, and then jump into
>> the native boot path.
> Right, but that's not what Juergen's second option is. IIUIC with that
> option Linux starts with zeropage already prepared. No shim in the kernel.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:25           ` Boris Ostrovsky
@ 2017-11-29 14:44             ` Paolo Bonzini
  2017-11-29 14:44             ` Paolo Bonzini
  1 sibling, 0 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-11-29 14:44 UTC (permalink / raw)
  To: Boris Ostrovsky, Roger Pau Monné, Juergen Gross
  Cc: rkrcmar, kvm, Maran Wilson, andrew.cooper3, x86, linux-kernel,
	mingo, JBeulich, hpa, xen-devel, tglx

On 29/11/2017 15:25, Boris Ostrovsky wrote:
>>>> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
>>>> grub) between a hypervisor and Linux to convert hvm_start_info to
>>>> bootparams.
>>> qemu?
>
> I think KVM folks didn't want to do this. I can't find the thread but I
> believe it was somewhere during Clear Containers discussion. Paolo?

QEMU is the right place to parse the ELF file and save it in memory.
You would have to teach QEMU to find the Xen note in ELF-format kernels
(just like it looks for the multiboot header), and use a different
option ROM ("pvhboot.c" for example).

However I don't like to bypass the BIOS; for -kernel, KVM starts the
guest with an option ROM (linuxboot-dma.c or multiboot.S in QEMU
sources) that takes care of boot.

In either case, you would have a new option ROM.  It could either be
very simple and similar to multiboot.S, or it could be larger and do the
same task as xen-pvh.S and enlighten_pvh.c (then get the address of
startup_32 or startup_64 from FW_CFG_KERNEL_ENTRY and jump there).  The
ugly part is that the option ROM would have to know more details about
what it is going to boot, including for example whether it's 32-bit or
64-bit, so I don't really think it is a good idea.

I actually like this patch, except that I'd get the e820 memory map from
fw_cfg (see the first part of
https://github.com/bonzini/qboot/blob/master/fw_cfg.c, and extract_e820
in https://github.com/bonzini/qboot/blob/master/main.c) instead of the
second module.

Thanks,

Paolo

> 
>> But then it won't be using the PVH entry point, and would just use the
>> native one?
>>
>> My understanding was that the PVH shim inside of Linux will prepare a
>> zero-page when booted using the PVH entry point, and then jump into
>> the native boot path.
> Right, but that's not what Juergen's second option is. IIUIC with that
> option Linux starts with zeropage already prepared. No shim in the kernel.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:44             ` Paolo Bonzini
  2017-11-29 14:47               ` Juergen Gross
@ 2017-11-29 14:47               ` Juergen Gross
  2017-11-29 14:50                 ` Paolo Bonzini
                                   ` (3 more replies)
  2017-11-30 18:23               ` Maran Wilson
  2017-11-30 18:23               ` Maran Wilson
  3 siblings, 4 replies; 41+ messages in thread
From: Juergen Gross @ 2017-11-29 14:47 UTC (permalink / raw)
  To: Paolo Bonzini, Boris Ostrovsky, Roger Pau Monné
  Cc: Maran Wilson, tglx, mingo, hpa, x86, xen-devel, linux-kernel,
	rkrcmar, JBeulich, andrew.cooper3, kvm

On 29/11/17 15:44, Paolo Bonzini wrote:
> On 29/11/2017 15:25, Boris Ostrovsky wrote:
>>>>> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
>>>>> grub) between a hypervisor and Linux to convert hvm_start_info to
>>>>> bootparams.
>>>> qemu?
>>
>> I think KVM folks didn't want to do this. I can't find the thread but I
>> believe it was somewhere during Clear Containers discussion. Paolo?
> 
> QEMU is the right place to parse the ELF file and save it in memory.
> You would have to teach QEMU to find the Xen note in ELF-format kernels
> (just like it looks for the multiboot header), and use a different
> option ROM ("pvhboot.c" for example).
> 
> However I don't like to bypass the BIOS; for -kernel, KVM starts the
> guest with an option ROM (linuxboot-dma.c or multiboot.S in QEMU
> sources) that takes care of boot.
> 
> In either case, you would have a new option ROM.  It could either be
> very simple and similar to multiboot.S, or it could be larger and do the
> same task as xen-pvh.S and enlighten_pvh.c (then get the address of
> startup_32 or startup_64 from FW_CFG_KERNEL_ENTRY and jump there).  The
> ugly part is that the option ROM would have to know more details about
> what it is going to boot, including for example whether it's 32-bit or
> 64-bit, so I don't really think it is a good idea.

As grub2 doesn't have to know, qemu shouldn't have to know either.


Juergen

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:44             ` Paolo Bonzini
@ 2017-11-29 14:47               ` Juergen Gross
  2017-11-29 14:47               ` Juergen Gross
                                 ` (2 subsequent siblings)
  3 siblings, 0 replies; 41+ messages in thread
From: Juergen Gross @ 2017-11-29 14:47 UTC (permalink / raw)
  To: Paolo Bonzini, Boris Ostrovsky, Roger Pau Monné
  Cc: rkrcmar, kvm, Maran Wilson, andrew.cooper3, x86, linux-kernel,
	mingo, JBeulich, hpa, xen-devel, tglx

On 29/11/17 15:44, Paolo Bonzini wrote:
> On 29/11/2017 15:25, Boris Ostrovsky wrote:
>>>>> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
>>>>> grub) between a hypervisor and Linux to convert hvm_start_info to
>>>>> bootparams.
>>>> qemu?
>>
>> I think KVM folks didn't want to do this. I can't find the thread but I
>> believe it was somewhere during Clear Containers discussion. Paolo?
> 
> QEMU is the right place to parse the ELF file and save it in memory.
> You would have to teach QEMU to find the Xen note in ELF-format kernels
> (just like it looks for the multiboot header), and use a different
> option ROM ("pvhboot.c" for example).
> 
> However I don't like to bypass the BIOS; for -kernel, KVM starts the
> guest with an option ROM (linuxboot-dma.c or multiboot.S in QEMU
> sources) that takes care of boot.
> 
> In either case, you would have a new option ROM.  It could either be
> very simple and similar to multiboot.S, or it could be larger and do the
> same task as xen-pvh.S and enlighten_pvh.c (then get the address of
> startup_32 or startup_64 from FW_CFG_KERNEL_ENTRY and jump there).  The
> ugly part is that the option ROM would have to know more details about
> what it is going to boot, including for example whether it's 32-bit or
> 64-bit, so I don't really think it is a good idea.

As grub2 doesn't have to know, qemu shouldn't have to know either.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:47               ` Juergen Gross
  2017-11-29 14:50                 ` Paolo Bonzini
@ 2017-11-29 14:50                 ` Paolo Bonzini
  2017-11-29 14:52                 ` Andrew Cooper
  2017-11-29 14:52                 ` Andrew Cooper
  3 siblings, 0 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-11-29 14:50 UTC (permalink / raw)
  To: Juergen Gross, Boris Ostrovsky, Roger Pau Monné
  Cc: Maran Wilson, tglx, mingo, hpa, x86, xen-devel, linux-kernel,
	rkrcmar, JBeulich, andrew.cooper3, kvm

On 29/11/2017 15:47, Juergen Gross wrote:
> On 29/11/17 15:44, Paolo Bonzini wrote:
>> In either case, you would have a new option ROM.  It could either be
>> very simple and similar to multiboot.S, or it could be larger and do the
>> same task as xen-pvh.S and enlighten_pvh.c (then get the address of
>> startup_32 or startup_64 from FW_CFG_KERNEL_ENTRY and jump there).  The
>> ugly part is that the option ROM would have to know more details about
>> what it is going to boot, including for example whether it's 32-bit or
>> 64-bit, so I don't really think it is a good idea.
> 
> As grub2 doesn't have to know, qemu shouldn't have to know either.

That would be exactly what linuxboot-dma.c does already, but it's slower
than PVH because Linux has to uncompress itself.

The above thought experiment would make QEMU able to boot a PVH kernel
without any changes to Linux.

Paolo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:47               ` Juergen Gross
@ 2017-11-29 14:50                 ` Paolo Bonzini
  2017-11-29 14:50                 ` Paolo Bonzini
                                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-11-29 14:50 UTC (permalink / raw)
  To: Juergen Gross, Boris Ostrovsky, Roger Pau Monné
  Cc: rkrcmar, kvm, Maran Wilson, andrew.cooper3, x86, linux-kernel,
	mingo, JBeulich, hpa, xen-devel, tglx

On 29/11/2017 15:47, Juergen Gross wrote:
> On 29/11/17 15:44, Paolo Bonzini wrote:
>> In either case, you would have a new option ROM.  It could either be
>> very simple and similar to multiboot.S, or it could be larger and do the
>> same task as xen-pvh.S and enlighten_pvh.c (then get the address of
>> startup_32 or startup_64 from FW_CFG_KERNEL_ENTRY and jump there).  The
>> ugly part is that the option ROM would have to know more details about
>> what it is going to boot, including for example whether it's 32-bit or
>> 64-bit, so I don't really think it is a good idea.
> 
> As grub2 doesn't have to know, qemu shouldn't have to know either.

That would be exactly what linuxboot-dma.c does already, but it's slower
than PVH because Linux has to uncompress itself.

The above thought experiment would make QEMU able to boot a PVH kernel
without any changes to Linux.

Paolo

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:47               ` Juergen Gross
                                   ` (2 preceding siblings ...)
  2017-11-29 14:52                 ` Andrew Cooper
@ 2017-11-29 14:52                 ` Andrew Cooper
  3 siblings, 0 replies; 41+ messages in thread
From: Andrew Cooper @ 2017-11-29 14:52 UTC (permalink / raw)
  To: Juergen Gross, Paolo Bonzini, Boris Ostrovsky, Roger Pau Monné
  Cc: Maran Wilson, tglx, mingo, hpa, x86, xen-devel, linux-kernel,
	rkrcmar, JBeulich, kvm

On 29/11/17 14:47, Juergen Gross wrote:
> On 29/11/17 15:44, Paolo Bonzini wrote:
>> On 29/11/2017 15:25, Boris Ostrovsky wrote:
>>>>>> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
>>>>>> grub) between a hypervisor and Linux to convert hvm_start_info to
>>>>>> bootparams.
>>>>> qemu?
>>> I think KVM folks didn't want to do this. I can't find the thread but I
>>> believe it was somewhere during Clear Containers discussion. Paolo?
>> QEMU is the right place to parse the ELF file and save it in memory.
>> You would have to teach QEMU to find the Xen note in ELF-format kernels
>> (just like it looks for the multiboot header), and use a different
>> option ROM ("pvhboot.c" for example).
>>
>> However I don't like to bypass the BIOS; for -kernel, KVM starts the
>> guest with an option ROM (linuxboot-dma.c or multiboot.S in QEMU
>> sources) that takes care of boot.
>>
>> In either case, you would have a new option ROM.  It could either be
>> very simple and similar to multiboot.S, or it could be larger and do the
>> same task as xen-pvh.S and enlighten_pvh.c (then get the address of
>> startup_32 or startup_64 from FW_CFG_KERNEL_ENTRY and jump there).  The
>> ugly part is that the option ROM would have to know more details about
>> what it is going to boot, including for example whether it's 32-bit or
>> 64-bit, so I don't really think it is a good idea.
> As grub2 doesn't have to know, qemu shouldn't have to know either.

An underlying requirement for this boot protocol was to remove the
requirement for a priori knowledge of the eventual mode of the guest,
which plagues Xen PV guests.  (One way or another, we need to parse the
kernel which will end up running to work out how to build the domain for
it.)

32bit flat mode is easy to set up, sufficiently large for any reasonable
bootstrapping, and provides no restrictions to what the eventual guest
wants to do.

~Andrew

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:47               ` Juergen Gross
  2017-11-29 14:50                 ` Paolo Bonzini
  2017-11-29 14:50                 ` Paolo Bonzini
@ 2017-11-29 14:52                 ` Andrew Cooper
  2017-11-29 14:52                 ` Andrew Cooper
  3 siblings, 0 replies; 41+ messages in thread
From: Andrew Cooper @ 2017-11-29 14:52 UTC (permalink / raw)
  To: Juergen Gross, Paolo Bonzini, Boris Ostrovsky, Roger Pau Monné
  Cc: rkrcmar, kvm, Maran Wilson, x86, linux-kernel, mingo, JBeulich,
	hpa, xen-devel, tglx

On 29/11/17 14:47, Juergen Gross wrote:
> On 29/11/17 15:44, Paolo Bonzini wrote:
>> On 29/11/2017 15:25, Boris Ostrovsky wrote:
>>>>>> zeropage is x86/Linux-specific so we'd need some sort of firmware (like
>>>>>> grub) between a hypervisor and Linux to convert hvm_start_info to
>>>>>> bootparams.
>>>>> qemu?
>>> I think KVM folks didn't want to do this. I can't find the thread but I
>>> believe it was somewhere during Clear Containers discussion. Paolo?
>> QEMU is the right place to parse the ELF file and save it in memory.
>> You would have to teach QEMU to find the Xen note in ELF-format kernels
>> (just like it looks for the multiboot header), and use a different
>> option ROM ("pvhboot.c" for example).
>>
>> However I don't like to bypass the BIOS; for -kernel, KVM starts the
>> guest with an option ROM (linuxboot-dma.c or multiboot.S in QEMU
>> sources) that takes care of boot.
>>
>> In either case, you would have a new option ROM.  It could either be
>> very simple and similar to multiboot.S, or it could be larger and do the
>> same task as xen-pvh.S and enlighten_pvh.c (then get the address of
>> startup_32 or startup_64 from FW_CFG_KERNEL_ENTRY and jump there).  The
>> ugly part is that the option ROM would have to know more details about
>> what it is going to boot, including for example whether it's 32-bit or
>> 64-bit, so I don't really think it is a good idea.
> As grub2 doesn't have to know, qemu shouldn't have to know either.

An underlying requirement for this boot protocol was to remove the
requirement for a priori knowledge of the eventual mode of the guest,
which plagues Xen PV guests.  (One way or another, we need to parse the
kernel which will end up running to work out how to build the domain for
it.)

32bit flat mode is easy to set up, sufficiently large for any reasonable
bootstrapping, and provides no restrictions to what the eventual guest
wants to do.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29  8:59 ` Paolo Bonzini
  2017-11-29 17:14   ` Maran Wilson
@ 2017-11-29 17:14   ` Maran Wilson
  2017-11-29 17:16     ` Paolo Bonzini
  2017-11-29 17:16     ` Paolo Bonzini
  1 sibling, 2 replies; 41+ messages in thread
From: Maran Wilson @ 2017-11-29 17:14 UTC (permalink / raw)
  To: Paolo Bonzini, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich,
	andrew.cooper3, kvm

On 11/29/2017 12:59 AM, Paolo Bonzini wrote:
> On 28/11/2017 20:34, Maran Wilson wrote:
>> For certain applications it is desirable to rapidly boot a KVM virtual
>> machine. In cases where legacy hardware and software support within the
>> guest is not needed, Qemu should be able to boot directly into the
>> uncompressed Linux kernel binary without the need to run firmware.
>>
>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>> supported by Linux and FreeBSD:
>>
>>     https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>>
>> This PoC patch enables Qemu to use that same entry point for booting KVM
>> guests.
> Nice!  So QEMU would parse the ELF file just like for multiboot, find
> the ELF note, and then prepare an hvmlite boot info struct instead of
> the multiboot one?

Yes, exactly.

> There would then be a new option ROM, very similar
> to multiboot.S.

That is one option. I guess this gets into a discussion about the QEMU 
side of the upcoming patches that would follow ...

I'm currently just initializing the CPU state in QEMU for testing since 
there is such minimal (non Linux specific) setup that is required by the 
ABI.  And (borrowing from the Intel clear container patches) that VM 
setup is only performed when user selects the "nofw" option with the q35 
model. But yeah, if folks think it important to move all such machine 
state initialization out of QEMU and into an option ROM, I can look into 
coding it up that way for the QEMU patches.

Thanks,
-Maran

> Thanks,
>
> Paolo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29  8:59 ` Paolo Bonzini
@ 2017-11-29 17:14   ` Maran Wilson
  2017-11-29 17:14   ` Maran Wilson
  1 sibling, 0 replies; 41+ messages in thread
From: Maran Wilson @ 2017-11-29 17:14 UTC (permalink / raw)
  To: Paolo Bonzini, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich,
	andrew.cooper3, kvm

On 11/29/2017 12:59 AM, Paolo Bonzini wrote:
> On 28/11/2017 20:34, Maran Wilson wrote:
>> For certain applications it is desirable to rapidly boot a KVM virtual
>> machine. In cases where legacy hardware and software support within the
>> guest is not needed, Qemu should be able to boot directly into the
>> uncompressed Linux kernel binary without the need to run firmware.
>>
>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>> supported by Linux and FreeBSD:
>>
>>     https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>>
>> This PoC patch enables Qemu to use that same entry point for booting KVM
>> guests.
> Nice!  So QEMU would parse the ELF file just like for multiboot, find
> the ELF note, and then prepare an hvmlite boot info struct instead of
> the multiboot one?

Yes, exactly.

> There would then be a new option ROM, very similar
> to multiboot.S.

That is one option. I guess this gets into a discussion about the QEMU 
side of the upcoming patches that would follow ...

I'm currently just initializing the CPU state in QEMU for testing since 
there is such minimal (non Linux specific) setup that is required by the 
ABI.  And (borrowing from the Intel clear container patches) that VM 
setup is only performed when user selects the "nofw" option with the q35 
model. But yeah, if folks think it important to move all such machine 
state initialization out of QEMU and into an option ROM, I can look into 
coding it up that way for the QEMU patches.

Thanks,
-Maran

> Thanks,
>
> Paolo


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 17:14   ` Maran Wilson
  2017-11-29 17:16     ` Paolo Bonzini
@ 2017-11-29 17:16     ` Paolo Bonzini
  1 sibling, 0 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-11-29 17:16 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich,
	andrew.cooper3, kvm

On 29/11/2017 18:14, Maran Wilson wrote:
> That is one option. I guess this gets into a discussion about the QEMU
> side of the upcoming patches that would follow ...
> 
> I'm currently just initializing the CPU state in QEMU for testing since
> there is such minimal (non Linux specific) setup that is required by the
> ABI.  And (borrowing from the Intel clear container patches) that VM
> setup is only performed when user selects the "nofw" option with the q35
> model. But yeah, if folks think it important to move all such machine
> state initialization out of QEMU and into an option ROM, I can look into
> coding it up that way for the QEMU patches.

Yes, please do an option ROM.  I'll take care of porting it to qboot.

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 17:14   ` Maran Wilson
@ 2017-11-29 17:16     ` Paolo Bonzini
  2017-11-29 17:16     ` Paolo Bonzini
  1 sibling, 0 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-11-29 17:16 UTC (permalink / raw)
  To: Maran Wilson, boris.ostrovsky, jgross, tglx, mingo, hpa, x86,
	xen-devel, linux-kernel, roger.pau, rkrcmar, JBeulich,
	andrew.cooper3, kvm

On 29/11/2017 18:14, Maran Wilson wrote:
> That is one option. I guess this gets into a discussion about the QEMU
> side of the upcoming patches that would follow ...
> 
> I'm currently just initializing the CPU state in QEMU for testing since
> there is such minimal (non Linux specific) setup that is required by the
> ABI.  And (borrowing from the Intel clear container patches) that VM
> setup is only performed when user selects the "nofw" option with the q35
> model. But yeah, if folks think it important to move all such machine
> state initialization out of QEMU and into an option ROM, I can look into
> coding it up that way for the QEMU patches.

Yes, please do an option ROM.  I'll take care of porting it to qboot.

Thanks,

Paolo

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29  8:21 ` Juergen Gross
                     ` (2 preceding siblings ...)
  2017-11-29 17:24   ` Maran Wilson
@ 2017-11-29 17:24   ` Maran Wilson
  3 siblings, 0 replies; 41+ messages in thread
From: Maran Wilson @ 2017-11-29 17:24 UTC (permalink / raw)
  To: Juergen Gross, boris.ostrovsky, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, roger.pau, rkrcmar, JBeulich, andrew.cooper3,
	pbonzini, kvm

On 11/29/2017 12:21 AM, Juergen Gross wrote:
> On 28/11/17 20:34, Maran Wilson wrote:
>> For certain applications it is desirable to rapidly boot a KVM virtual
>> machine. In cases where legacy hardware and software support within the
>> guest is not needed, Qemu should be able to boot directly into the
>> uncompressed Linux kernel binary without the need to run firmware.
>>
>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>> supported by Linux and FreeBSD:
>>
>>     https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>>
>> This PoC patch enables Qemu to use that same entry point for booting KVM
>> guests.
>>
>> Even though the code is still PoC quality, I'm sending this as an RFC now
>> since there are a number of different ways the specific implementation
>> details can be handled. I chose a shared code path for Xen and KVM guests
>> but could just as easily create a separate code path that is advertised by
>> a different ELF note for KVM. There also seems to be some flexibility in
>> how the e820 table data is passed and how (or if) it should be identified
>> as e820 data. As a starting point, I've chosen the options that seem to
>> result in the smallest patch with minimal to no changes required of the
>> x86/HVM direct boot ABI.
> I like the idea.
>
> I'd rather split up the different hypervisor types early and use a
> common set of service functions instead of special casing xen_guest
> everywhere. This would make it much easier to support the KVM PVH
> boot without the need to configure the kernel with CONFIG_XEN.

Thanks for the feedback. I'll try doing something like that as this 
patch moves from proof of concept to a real proposal.

> Another option would be to use the same boot path as with grub: set
> the boot params in zeropage and start at startup_32.

I think others have already responded about that. The main thing I was 
trying to avoid, was adding any Linux OS specific initialization (like 
zeropage) to QEMU. Especially since this PVH entry point already exists 
in Linux.

Thanks,
-Maran

>
> Juergen
>
>> ---
>>   arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
>>   1 file changed, 55 insertions(+), 19 deletions(-)
>>
>> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
>> index 98ab176..d93f711 100644
>> --- a/arch/x86/xen/enlighten_pvh.c
>> +++ b/arch/x86/xen/enlighten_pvh.c
>> @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
>>   		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
>>   }
>>   
>> -static void __init init_pvh_bootparams(void)
>> +static void __init init_pvh_bootparams(bool xen_guest)
>>   {
>>   	struct xen_memory_map memmap;
>>   	int rc;
>>   
>>   	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
>>   
>> -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
>> -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
>> -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
>> -	if (rc) {
>> -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
>> -		BUG();
>> +	if (xen_guest) {
>> +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
>> +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
>> +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
>> +		if (rc) {
>> +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
>> +			BUG();
>> +		}
>> +		pvh_bootparams.e820_entries = memmap.nr_entries;
>> +	} else if (pvh_start_info.nr_modules > 1) {
>> +		/* The second module should be the e820 data for KVM guests */
>> +		struct hvm_modlist_entry *modaddr;
>> +		char e820_sig[] = "e820 data";
>> +		struct boot_e820_entry *ep;
>> +		struct e820_table *tp;
>> +		char *cmdline_str;
>> +		int idx;
>> +
>> +		modaddr = __va(pvh_start_info.modlist_paddr +
>> +			       sizeof(struct hvm_modlist_entry));
>> +		cmdline_str = __va(modaddr->cmdline_paddr);
>> +
>> +		if ((modaddr->cmdline_paddr) &&
>> +		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
>> +			tp = __va(modaddr->paddr);
>> +			ep = (struct boot_e820_entry *)tp->entries;
>> +
>> +			pvh_bootparams.e820_entries = tp->nr_entries;
>> +
>> +			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
>> +				pvh_bootparams.e820_table[idx] = *ep;
>> +		}
>>   	}
>> -	pvh_bootparams.e820_entries = memmap.nr_entries;
>>   
>>   	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
>>   		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
>> @@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
>>   		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
>>   			E820_TYPE_RESERVED;
>>   		pvh_bootparams.e820_entries++;
>> -	} else
>> +	} else if (xen_guest) {
>>   		xen_raw_printk("Warning: Can fit ISA range into e820\n");
>> +	}
>>   
>>   	pvh_bootparams.hdr.cmd_line_ptr =
>>   		pvh_start_info.cmdline_paddr;
>> @@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
>>   	 * environment (i.e. hardware_subarch 0).
>>   	 */
>>   	pvh_bootparams.hdr.version = 0x212;
>> -	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
>> +	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
>>   }
>>   
>>   /*
>> @@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
>>    */
>>   void __init xen_prepare_pvh(void)
>>   {
>> -	u32 msr;
>> +
>> +	u32 msr = xen_cpuid_base();
>>   	u64 pfn;
>> +	bool xen_guest = msr ? true : false;
>>   
>>   	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
>> -		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
>> -				pvh_start_info.magic);
>> +		if (xen_guest)
>> +			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
>> +					pvh_start_info.magic);
>>   		BUG();
>>   	}
>>   
>> -	xen_pvh = 1;
>> +	if (xen_guest) {
>> +		xen_pvh = 1;
>> +
>> +		msr = cpuid_ebx(msr + 2);
>> +		pfn = __pa(hypercall_page);
>> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
>> +
>> +	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
>> +		BUG();
>> +	}
>>   
>> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
>> -	pfn = __pa(hypercall_page);
>> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
>> +	init_pvh_bootparams(xen_guest);
>>   
>> -	init_pvh_bootparams();
>> +	if (xen_guest)
>> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
>>   
>> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>>   }
>>

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29  8:21 ` Juergen Gross
  2017-11-29  8:50   ` Roger Pau Monné
  2017-11-29  8:50   ` Roger Pau Monné
@ 2017-11-29 17:24   ` Maran Wilson
  2017-11-29 17:24   ` Maran Wilson
  3 siblings, 0 replies; 41+ messages in thread
From: Maran Wilson @ 2017-11-29 17:24 UTC (permalink / raw)
  To: Juergen Gross, boris.ostrovsky, tglx, mingo, hpa, x86, xen-devel,
	linux-kernel, roger.pau, rkrcmar, JBeulich, andrew.cooper3,
	pbonzini, kvm

On 11/29/2017 12:21 AM, Juergen Gross wrote:
> On 28/11/17 20:34, Maran Wilson wrote:
>> For certain applications it is desirable to rapidly boot a KVM virtual
>> machine. In cases where legacy hardware and software support within the
>> guest is not needed, Qemu should be able to boot directly into the
>> uncompressed Linux kernel binary without the need to run firmware.
>>
>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>> supported by Linux and FreeBSD:
>>
>>     https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>>
>> This PoC patch enables Qemu to use that same entry point for booting KVM
>> guests.
>>
>> Even though the code is still PoC quality, I'm sending this as an RFC now
>> since there are a number of different ways the specific implementation
>> details can be handled. I chose a shared code path for Xen and KVM guests
>> but could just as easily create a separate code path that is advertised by
>> a different ELF note for KVM. There also seems to be some flexibility in
>> how the e820 table data is passed and how (or if) it should be identified
>> as e820 data. As a starting point, I've chosen the options that seem to
>> result in the smallest patch with minimal to no changes required of the
>> x86/HVM direct boot ABI.
> I like the idea.
>
> I'd rather split up the different hypervisor types early and use a
> common set of service functions instead of special casing xen_guest
> everywhere. This would make it much easier to support the KVM PVH
> boot without the need to configure the kernel with CONFIG_XEN.

Thanks for the feedback. I'll try doing something like that as this 
patch moves from proof of concept to a real proposal.

> Another option would be to use the same boot path as with grub: set
> the boot params in zeropage and start at startup_32.

I think others have already responded about that. The main thing I was 
trying to avoid, was adding any Linux OS specific initialization (like 
zeropage) to QEMU. Especially since this PVH entry point already exists 
in Linux.

Thanks,
-Maran

>
> Juergen
>
>> ---
>>   arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
>>   1 file changed, 55 insertions(+), 19 deletions(-)
>>
>> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
>> index 98ab176..d93f711 100644
>> --- a/arch/x86/xen/enlighten_pvh.c
>> +++ b/arch/x86/xen/enlighten_pvh.c
>> @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
>>   		acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
>>   }
>>   
>> -static void __init init_pvh_bootparams(void)
>> +static void __init init_pvh_bootparams(bool xen_guest)
>>   {
>>   	struct xen_memory_map memmap;
>>   	int rc;
>>   
>>   	memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
>>   
>> -	memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
>> -	set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
>> -	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
>> -	if (rc) {
>> -		xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
>> -		BUG();
>> +	if (xen_guest) {
>> +		memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
>> +		set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
>> +		rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
>> +		if (rc) {
>> +			xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
>> +			BUG();
>> +		}
>> +		pvh_bootparams.e820_entries = memmap.nr_entries;
>> +	} else if (pvh_start_info.nr_modules > 1) {
>> +		/* The second module should be the e820 data for KVM guests */
>> +		struct hvm_modlist_entry *modaddr;
>> +		char e820_sig[] = "e820 data";
>> +		struct boot_e820_entry *ep;
>> +		struct e820_table *tp;
>> +		char *cmdline_str;
>> +		int idx;
>> +
>> +		modaddr = __va(pvh_start_info.modlist_paddr +
>> +			       sizeof(struct hvm_modlist_entry));
>> +		cmdline_str = __va(modaddr->cmdline_paddr);
>> +
>> +		if ((modaddr->cmdline_paddr) &&
>> +		    (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
>> +			tp = __va(modaddr->paddr);
>> +			ep = (struct boot_e820_entry *)tp->entries;
>> +
>> +			pvh_bootparams.e820_entries = tp->nr_entries;
>> +
>> +			for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
>> +				pvh_bootparams.e820_table[idx] = *ep;
>> +		}
>>   	}
>> -	pvh_bootparams.e820_entries = memmap.nr_entries;
>>   
>>   	if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
>>   		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
>> @@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
>>   		pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
>>   			E820_TYPE_RESERVED;
>>   		pvh_bootparams.e820_entries++;
>> -	} else
>> +	} else if (xen_guest) {
>>   		xen_raw_printk("Warning: Can fit ISA range into e820\n");
>> +	}
>>   
>>   	pvh_bootparams.hdr.cmd_line_ptr =
>>   		pvh_start_info.cmdline_paddr;
>> @@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
>>   	 * environment (i.e. hardware_subarch 0).
>>   	 */
>>   	pvh_bootparams.hdr.version = 0x212;
>> -	pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
>> +	pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
>>   }
>>   
>>   /*
>> @@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
>>    */
>>   void __init xen_prepare_pvh(void)
>>   {
>> -	u32 msr;
>> +
>> +	u32 msr = xen_cpuid_base();
>>   	u64 pfn;
>> +	bool xen_guest = msr ? true : false;
>>   
>>   	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
>> -		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
>> -				pvh_start_info.magic);
>> +		if (xen_guest)
>> +			xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
>> +					pvh_start_info.magic);
>>   		BUG();
>>   	}
>>   
>> -	xen_pvh = 1;
>> +	if (xen_guest) {
>> +		xen_pvh = 1;
>> +
>> +		msr = cpuid_ebx(msr + 2);
>> +		pfn = __pa(hypercall_page);
>> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
>> +
>> +	} else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
>> +		BUG();
>> +	}
>>   
>> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
>> -	pfn = __pa(hypercall_page);
>> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
>> +	init_pvh_bootparams(xen_guest);
>>   
>> -	init_pvh_bootparams();
>> +	if (xen_guest)
>> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
>>   
>> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>>   }
>>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:44             ` Paolo Bonzini
                                 ` (2 preceding siblings ...)
  2017-11-30 18:23               ` Maran Wilson
@ 2017-11-30 18:23               ` Maran Wilson
  2017-12-01  8:08                 ` Paolo Bonzini
  2017-12-01  8:08                 ` Paolo Bonzini
  3 siblings, 2 replies; 41+ messages in thread
From: Maran Wilson @ 2017-11-30 18:23 UTC (permalink / raw)
  To: Paolo Bonzini, Boris Ostrovsky, Roger Pau Monné, Juergen Gross
  Cc: tglx, mingo, hpa, x86, xen-devel, linux-kernel, rkrcmar,
	JBeulich, andrew.cooper3, kvm

On 11/29/2017 6:44 AM, Paolo Bonzini wrote:
> I actually like this patch, except that I'd get the e820 memory map from
> fw_cfg (see the first part of
> https://github.com/bonzini/qboot/blob/master/fw_cfg.c, and extract_e820
> inhttps://github.com/bonzini/qboot/blob/master/main.c) instead of the
> second module.

Hi Paolo,

I want to make sure I understand exactly what you are suggesting...

Are you saying the Linux PVH entry code (such as init_pvh_bootparams()) 
should use the fw_cfg interface to read the e820 memory map data and put 
it into the zeropage? Basically, keeping the patch very much like it 
already is, just extracting the e820 data via the fw_cfg interface 
instead of from the second module of start_info struct?

If that is the case, I guess I'm a bit hesitant to throw the QEMU 
specific fw_cfg interface into the mix on the Linux PVH side when the 
existing PVH ABI already seems to contain an interface for passing 
modules/blobs to the guest. But if you feel there is a compelling reason 
to use the fw_cfg interface here, I'm happy to explore that approach 
further.

Thanks,
-Maran

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-29 14:44             ` Paolo Bonzini
  2017-11-29 14:47               ` Juergen Gross
  2017-11-29 14:47               ` Juergen Gross
@ 2017-11-30 18:23               ` Maran Wilson
  2017-11-30 18:23               ` Maran Wilson
  3 siblings, 0 replies; 41+ messages in thread
From: Maran Wilson @ 2017-11-30 18:23 UTC (permalink / raw)
  To: Paolo Bonzini, Boris Ostrovsky, Roger Pau Monné, Juergen Gross
  Cc: kvm, rkrcmar, andrew.cooper3, x86, linux-kernel, mingo, JBeulich,
	hpa, xen-devel, tglx

On 11/29/2017 6:44 AM, Paolo Bonzini wrote:
> I actually like this patch, except that I'd get the e820 memory map from
> fw_cfg (see the first part of
> https://github.com/bonzini/qboot/blob/master/fw_cfg.c, and extract_e820
> inhttps://github.com/bonzini/qboot/blob/master/main.c) instead of the
> second module.

Hi Paolo,

I want to make sure I understand exactly what you are suggesting...

Are you saying the Linux PVH entry code (such as init_pvh_bootparams()) 
should use the fw_cfg interface to read the e820 memory map data and put 
it into the zeropage? Basically, keeping the patch very much like it 
already is, just extracting the e820 data via the fw_cfg interface 
instead of from the second module of start_info struct?

If that is the case, I guess I'm a bit hesitant to throw the QEMU 
specific fw_cfg interface into the mix on the Linux PVH side when the 
existing PVH ABI already seems to contain an interface for passing 
modules/blobs to the guest. But if you feel there is a compelling reason 
to use the fw_cfg interface here, I'm happy to explore that approach 
further.

Thanks,
-Maran

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-30 18:23               ` Maran Wilson
  2017-12-01  8:08                 ` Paolo Bonzini
@ 2017-12-01  8:08                 ` Paolo Bonzini
  2017-12-07 23:03                   ` Maran Wilson
  2017-12-07 23:03                   ` Maran Wilson
  1 sibling, 2 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-12-01  8:08 UTC (permalink / raw)
  To: Maran Wilson, Boris Ostrovsky, Roger Pau Monné, Juergen Gross
  Cc: tglx, mingo, hpa, x86, xen-devel, linux-kernel, rkrcmar,
	JBeulich, andrew.cooper3, kvm

On 30/11/2017 19:23, Maran Wilson wrote:
> Are you saying the Linux PVH entry code (such as init_pvh_bootparams())
> should use the fw_cfg interface to read the e820 memory map data and put
> it into the zeropage? Basically, keeping the patch very much like it
> already is, just extracting the e820 data via the fw_cfg interface
> instead of from the second module of start_info struct?

Yes.

> If that is the case, I guess I'm a bit hesitant to throw the QEMU
> specific fw_cfg interface into the mix on the Linux PVH side when the
> existing PVH ABI already seems to contain an interface for passing
> modules/blobs to the guest. But if you feel there is a compelling reason
> to use the fw_cfg interface here, I'm happy to explore that approach
> further.

I think the same holds true for Xen, but it is still using a hypercall 
to get the memory map.  In the end, using fw_cfg seems closest to what 
the Xen code does.

There are other possibilities:

1) defining a v2 PVH ABI that includes the e820 map would also be a 
possibility.

2) modify enlighten_pvh.c to get the start info in multiboot format,
something like:

diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 98ab17673454..656e41449db0 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -88,19 +88,22 @@ void __init xen_prepare_pvh(void)
 	u32 msr;
 	u64 pfn;
 
-	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
+	if (pvh_start_info.magic == XEN_HVM_START_MAGIC_VALUE) {
+		xen_pvh = 1;
+
+		init_pvh_bootparams_xen();
+
+		msr = cpuid_ebx(xen_cpuid_base() + 2);
+		pfn = __pa(hypercall_page);
+		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+		x86_init.oem.arch_setup = xen_pvh_arch_setup;
+	} else if (pvh_start_info.magic == MULTIBOOT_INFO_MAGIC_VALUE) {
+		init_pvh_bootparams_multiboot();
+
+	} else {
 		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
 				pvh_start_info.magic);
 		BUG();
 	}
-
-	xen_pvh = 1;
-
-	msr = cpuid_ebx(xen_cpuid_base() + 2);
-	pfn = __pa(hypercall_page);
-	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-
-	init_pvh_bootparams();
-
-	x86_init.oem.arch_setup = xen_pvh_arch_setup;
 }


Note that this would *not* be a multiboot-format kernel, as it would
still have the Xen PVH ELF note.  It would just reuse the format of
the start info struct.

However, I think it is simpler to just use the e820 memory map from
fw_cfg.

Paolo

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-11-30 18:23               ` Maran Wilson
@ 2017-12-01  8:08                 ` Paolo Bonzini
  2017-12-01  8:08                 ` Paolo Bonzini
  1 sibling, 0 replies; 41+ messages in thread
From: Paolo Bonzini @ 2017-12-01  8:08 UTC (permalink / raw)
  To: Maran Wilson, Boris Ostrovsky, Roger Pau Monné, Juergen Gross
  Cc: kvm, rkrcmar, andrew.cooper3, x86, linux-kernel, mingo, JBeulich,
	hpa, xen-devel, tglx

On 30/11/2017 19:23, Maran Wilson wrote:
> Are you saying the Linux PVH entry code (such as init_pvh_bootparams())
> should use the fw_cfg interface to read the e820 memory map data and put
> it into the zeropage? Basically, keeping the patch very much like it
> already is, just extracting the e820 data via the fw_cfg interface
> instead of from the second module of start_info struct?

Yes.

> If that is the case, I guess I'm a bit hesitant to throw the QEMU
> specific fw_cfg interface into the mix on the Linux PVH side when the
> existing PVH ABI already seems to contain an interface for passing
> modules/blobs to the guest. But if you feel there is a compelling reason
> to use the fw_cfg interface here, I'm happy to explore that approach
> further.

I think the same holds true for Xen, but it is still using a hypercall 
to get the memory map.  In the end, using fw_cfg seems closest to what 
the Xen code does.

There are other possibilities:

1) defining a v2 PVH ABI that includes the e820 map would also be a 
possibility.

2) modify enlighten_pvh.c to get the start info in multiboot format,
something like:

diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 98ab17673454..656e41449db0 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -88,19 +88,22 @@ void __init xen_prepare_pvh(void)
 	u32 msr;
 	u64 pfn;
 
-	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
+	if (pvh_start_info.magic == XEN_HVM_START_MAGIC_VALUE) {
+		xen_pvh = 1;
+
+		init_pvh_bootparams_xen();
+
+		msr = cpuid_ebx(xen_cpuid_base() + 2);
+		pfn = __pa(hypercall_page);
+		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
+
+		x86_init.oem.arch_setup = xen_pvh_arch_setup;
+	} else if (pvh_start_info.magic == MULTIBOOT_INFO_MAGIC_VALUE) {
+		init_pvh_bootparams_multiboot();
+
+	} else {
 		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
 				pvh_start_info.magic);
 		BUG();
 	}
-
-	xen_pvh = 1;
-
-	msr = cpuid_ebx(xen_cpuid_base() + 2);
-	pfn = __pa(hypercall_page);
-	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
-
-	init_pvh_bootparams();
-
-	x86_init.oem.arch_setup = xen_pvh_arch_setup;
 }


Note that this would *not* be a multiboot-format kernel, as it would
still have the Xen PVH ELF note.  It would just reuse the format of
the start info struct.

However, I think it is simpler to just use the e820 memory map from
fw_cfg.

Paolo

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-12-01  8:08                 ` Paolo Bonzini
@ 2017-12-07 23:03                   ` Maran Wilson
  2017-12-07 23:03                   ` Maran Wilson
  1 sibling, 0 replies; 41+ messages in thread
From: Maran Wilson @ 2017-12-07 23:03 UTC (permalink / raw)
  To: Paolo Bonzini, Boris Ostrovsky, Roger Pau Monné, Juergen Gross
  Cc: tglx, mingo, hpa, x86, xen-devel, linux-kernel, rkrcmar,
	JBeulich, andrew.cooper3, kvm

Just FYI: I sent out a v2 of this patch but in doing so I moved a few 
people from the "to" line to the "cc" line.

For anyone who previously did not comment but still wanted to follow the 
discussion, here's the link to the v2 email:

https://lkml.org/lkml/2017/12/7/1624

Thanks,
-Maran

On 12/1/2017 12:08 AM, Paolo Bonzini wrote:
> On 30/11/2017 19:23, Maran Wilson wrote:
>> Are you saying the Linux PVH entry code (such as init_pvh_bootparams())
>> should use the fw_cfg interface to read the e820 memory map data and put
>> it into the zeropage? Basically, keeping the patch very much like it
>> already is, just extracting the e820 data via the fw_cfg interface
>> instead of from the second module of start_info struct?
> Yes.
>
>> If that is the case, I guess I'm a bit hesitant to throw the QEMU
>> specific fw_cfg interface into the mix on the Linux PVH side when the
>> existing PVH ABI already seems to contain an interface for passing
>> modules/blobs to the guest. But if you feel there is a compelling reason
>> to use the fw_cfg interface here, I'm happy to explore that approach
>> further.
> I think the same holds true for Xen, but it is still using a hypercall
> to get the memory map.  In the end, using fw_cfg seems closest to what
> the Xen code does.
>
> There are other possibilities:
>
> 1) defining a v2 PVH ABI that includes the e820 map would also be a
> possibility.
>
> 2) modify enlighten_pvh.c to get the start info in multiboot format,
> something like:
>
> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> index 98ab17673454..656e41449db0 100644
> --- a/arch/x86/xen/enlighten_pvh.c
> +++ b/arch/x86/xen/enlighten_pvh.c
> @@ -88,19 +88,22 @@ void __init xen_prepare_pvh(void)
>   	u32 msr;
>   	u64 pfn;
>   
> -	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
> +	if (pvh_start_info.magic == XEN_HVM_START_MAGIC_VALUE) {
> +		xen_pvh = 1;
> +
> +		init_pvh_bootparams_xen();
> +
> +		msr = cpuid_ebx(xen_cpuid_base() + 2);
> +		pfn = __pa(hypercall_page);
> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +
> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
> +	} else if (pvh_start_info.magic == MULTIBOOT_INFO_MAGIC_VALUE) {
> +		init_pvh_bootparams_multiboot();
> +
> +	} else {
>   		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
>   				pvh_start_info.magic);
>   		BUG();
>   	}
> -
> -	xen_pvh = 1;
> -
> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
> -	pfn = __pa(hypercall_page);
> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> -
> -	init_pvh_bootparams();
> -
> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>   }
>
>
> Note that this would *not* be a multiboot-format kernel, as it would
> still have the Xen PVH ELF note.  It would just reuse the format of
> the start info struct.
>
> However, I think it is simpler to just use the e820 memory map from
> fw_cfg.
>
> Paolo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point
  2017-12-01  8:08                 ` Paolo Bonzini
  2017-12-07 23:03                   ` Maran Wilson
@ 2017-12-07 23:03                   ` Maran Wilson
  1 sibling, 0 replies; 41+ messages in thread
From: Maran Wilson @ 2017-12-07 23:03 UTC (permalink / raw)
  To: Paolo Bonzini, Boris Ostrovsky, Roger Pau Monné, Juergen Gross
  Cc: kvm, rkrcmar, andrew.cooper3, x86, linux-kernel, mingo, JBeulich,
	hpa, xen-devel, tglx

Just FYI: I sent out a v2 of this patch but in doing so I moved a few 
people from the "to" line to the "cc" line.

For anyone who previously did not comment but still wanted to follow the 
discussion, here's the link to the v2 email:

https://lkml.org/lkml/2017/12/7/1624

Thanks,
-Maran

On 12/1/2017 12:08 AM, Paolo Bonzini wrote:
> On 30/11/2017 19:23, Maran Wilson wrote:
>> Are you saying the Linux PVH entry code (such as init_pvh_bootparams())
>> should use the fw_cfg interface to read the e820 memory map data and put
>> it into the zeropage? Basically, keeping the patch very much like it
>> already is, just extracting the e820 data via the fw_cfg interface
>> instead of from the second module of start_info struct?
> Yes.
>
>> If that is the case, I guess I'm a bit hesitant to throw the QEMU
>> specific fw_cfg interface into the mix on the Linux PVH side when the
>> existing PVH ABI already seems to contain an interface for passing
>> modules/blobs to the guest. But if you feel there is a compelling reason
>> to use the fw_cfg interface here, I'm happy to explore that approach
>> further.
> I think the same holds true for Xen, but it is still using a hypercall
> to get the memory map.  In the end, using fw_cfg seems closest to what
> the Xen code does.
>
> There are other possibilities:
>
> 1) defining a v2 PVH ABI that includes the e820 map would also be a
> possibility.
>
> 2) modify enlighten_pvh.c to get the start info in multiboot format,
> something like:
>
> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
> index 98ab17673454..656e41449db0 100644
> --- a/arch/x86/xen/enlighten_pvh.c
> +++ b/arch/x86/xen/enlighten_pvh.c
> @@ -88,19 +88,22 @@ void __init xen_prepare_pvh(void)
>   	u32 msr;
>   	u64 pfn;
>   
> -	if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
> +	if (pvh_start_info.magic == XEN_HVM_START_MAGIC_VALUE) {
> +		xen_pvh = 1;
> +
> +		init_pvh_bootparams_xen();
> +
> +		msr = cpuid_ebx(xen_cpuid_base() + 2);
> +		pfn = __pa(hypercall_page);
> +		wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> +
> +		x86_init.oem.arch_setup = xen_pvh_arch_setup;
> +	} else if (pvh_start_info.magic == MULTIBOOT_INFO_MAGIC_VALUE) {
> +		init_pvh_bootparams_multiboot();
> +
> +	} else {
>   		xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
>   				pvh_start_info.magic);
>   		BUG();
>   	}
> -
> -	xen_pvh = 1;
> -
> -	msr = cpuid_ebx(xen_cpuid_base() + 2);
> -	pfn = __pa(hypercall_page);
> -	wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
> -
> -	init_pvh_bootparams();
> -
> -	x86_init.oem.arch_setup = xen_pvh_arch_setup;
>   }
>
>
> Note that this would *not* be a multiboot-format kernel, as it would
> still have the Xen PVH ELF note.  It would just reuse the format of
> the start info struct.
>
> However, I think it is simpler to just use the e820 memory map from
> fw_cfg.
>
> Paolo


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2017-12-07 23:06 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-28 19:34 [RFC PATCH] KVM: x86: Allow Qemu/KVM to use PVH entry point Maran Wilson
2017-11-28 19:34 ` Maran Wilson
2017-11-28 19:41 ` Andrew Cooper
2017-11-28 19:41   ` Andrew Cooper
2017-11-28 19:41   ` Andrew Cooper
2017-11-28 19:58 ` Christoph Hellwig
2017-11-28 19:58 ` Christoph Hellwig
2017-11-29  8:21 ` Juergen Gross
2017-11-29  8:50   ` Roger Pau Monné
2017-11-29  8:50   ` Roger Pau Monné
2017-11-29 14:03     ` Boris Ostrovsky
2017-11-29 14:11       ` Juergen Gross
2017-11-29 14:11       ` Juergen Gross
2017-11-29 14:18         ` Roger Pau Monné
2017-11-29 14:25           ` Boris Ostrovsky
2017-11-29 14:44             ` Paolo Bonzini
2017-11-29 14:44             ` Paolo Bonzini
2017-11-29 14:47               ` Juergen Gross
2017-11-29 14:47               ` Juergen Gross
2017-11-29 14:50                 ` Paolo Bonzini
2017-11-29 14:50                 ` Paolo Bonzini
2017-11-29 14:52                 ` Andrew Cooper
2017-11-29 14:52                 ` Andrew Cooper
2017-11-30 18:23               ` Maran Wilson
2017-11-30 18:23               ` Maran Wilson
2017-12-01  8:08                 ` Paolo Bonzini
2017-12-01  8:08                 ` Paolo Bonzini
2017-12-07 23:03                   ` Maran Wilson
2017-12-07 23:03                   ` Maran Wilson
2017-11-29 14:25           ` Boris Ostrovsky
2017-11-29 14:18         ` Roger Pau Monné
2017-11-29 14:03     ` Boris Ostrovsky
2017-11-29 17:24   ` Maran Wilson
2017-11-29 17:24   ` Maran Wilson
2017-11-29  8:21 ` Juergen Gross
2017-11-29  8:59 ` Paolo Bonzini
2017-11-29 17:14   ` Maran Wilson
2017-11-29 17:14   ` Maran Wilson
2017-11-29 17:16     ` Paolo Bonzini
2017-11-29 17:16     ` Paolo Bonzini
2017-11-29  8:59 ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.