linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5] hyperv: root partition faults writing to VP ASSIST MSR PAGE
@ 2021-07-31 12:05 Praveen Kumar
  2021-07-31 20:36 ` Sunil Muthuswamy
  2021-08-02 12:51 ` Wei Liu
  0 siblings, 2 replies; 4+ messages in thread
From: Praveen Kumar @ 2021-07-31 12:05 UTC (permalink / raw)
  To: linux-hyperv, linux-kernel
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, viremana, sunilmut, nunodasneves

For Root partition the VP assist pages are pre-determined by the
hypervisor. The Root kernel is not allowed to change them to
different locations. And thus, we are getting below stack as in
current implementation Root is trying to perform write to specific
MSR.

[ 2.778197] unchecked MSR access error: WRMSR to 0x40000073 (tried to
write 0x0000000145ac5001) at rIP: 0xffffffff810c1084
(native_write_msr+0x4/0x30)
[ 2.784867] Call Trace:
[ 2.791507] hv_cpu_init+0xf1/0x1c0
[ 2.798144] ? hyperv_report_panic+0xd0/0xd0
[ 2.804806] cpuhp_invoke_callback+0x11a/0x440
[ 2.811465] ? hv_resume+0x90/0x90
[ 2.818137] cpuhp_issue_call+0x126/0x130
[ 2.824782] __cpuhp_setup_state_cpuslocked+0x102/0x2b0
[ 2.831427] ? hyperv_report_panic+0xd0/0xd0
[ 2.838075] ? hyperv_report_panic+0xd0/0xd0
[ 2.844723] ? hv_resume+0x90/0x90
[ 2.851375] __cpuhp_setup_state+0x3d/0x90
[ 2.858030] hyperv_init+0x14e/0x410
[ 2.864689] ? enable_IR_x2apic+0x190/0x1a0
[ 2.871349] apic_intr_mode_init+0x8b/0x100
[ 2.878017] x86_late_time_init+0x20/0x30
[ 2.884675] start_kernel+0x459/0x4fb
[ 2.891329] secondary_startup_64_no_verify+0xb0/0xbb

Since, the hypervisor already provides the VP assist page for root
partition, we need to memremap the memory from hypervisor for root
kernel to use. The mapping is done in hv_cpu_init during bringup and
is unmaped in hv_cpu_die during teardown.

Signed-off-by: Praveen Kumar <kumarpraveen@linux.microsoft.com>
---
 arch/x86/hyperv/hv_init.c          | 64 ++++++++++++++++++++----------
 arch/x86/include/asm/hyperv-tlfs.h |  9 +++++
 2 files changed, 53 insertions(+), 20 deletions(-)

changelog:
v1: initial patch
v2: commit message changes, removal of HV_MSR_APIC_ACCESS_AVAILABLE
    check and addition of null check before reading the VP assist MSR
    for root partition
v3: added new data structure to handle VP ASSIST MSR page and done
    handling in hv_cpu_init and hv_cpu_die
v4: better code alignment, VP ASSIST handling correction for root
    partition in hv_cpu_die and renaming of hv_vp_assist_msr_contents
    attribute
v5: disable VP ASSIST page for root partition during hv_cpu_die
---
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 6f247e7e07eb..a46bd92c532a 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -44,6 +44,7 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
 
 static int hv_cpu_init(unsigned int cpu)
 {
+	union hv_vp_assist_msr_contents msr = {0};
 	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
 	int ret;
 
@@ -54,25 +55,34 @@ static int hv_cpu_init(unsigned int cpu)
 	if (!hv_vp_assist_page)
 		return 0;
 
-	/*
-	 * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
-	 * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
-	 * we always write the EOI MSR in hv_apic_eoi_write() *after* the
-	 * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
-	 * not be stopped in the case of CPU offlining and the VM will hang.
-	 */
 	if (!*hvp) {
-		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
-	}
-
-	if (*hvp) {
-		u64 val;
-
-		val = vmalloc_to_pfn(*hvp);
-		val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) |
-			HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
-		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
+		if (hv_root_partition) {
+			/*
+			 * For Root partition we get the hypervisor provided VP ASSIST
+			 * PAGE, instead of allocating a new page.
+			 */
+			rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+			*hvp = memremap(msr.pfn <<
+					HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
+					PAGE_SIZE, MEMREMAP_WB);
+		} else {
+			/*
+			 * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's
+			 * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
+			 * out to make sure we always write the EOI MSR in
+			 * hv_apic_eoi_write() *after* theEOI optimization is disabled
+			 * in hv_cpu_die(), otherwise a CPU may not be stopped in the
+			 * case of CPU offlining and the VM will hang.
+			 */
+			*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
+			if (*hvp)
+				msr.pfn = vmalloc_to_pfn(*hvp);
+		}
+		WARN_ON(!(*hvp));
+		if (*hvp) {
+			msr.enable = 1;
+			wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+		}
 	}
 
 	return 0;
@@ -170,8 +180,22 @@ static int hv_cpu_die(unsigned int cpu)
 
 	hv_common_cpu_die(cpu);
 
-	if (hv_vp_assist_page && hv_vp_assist_page[cpu])
-		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
+	if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
+		union hv_vp_assist_msr_contents msr = {0};
+		if (hv_root_partition) {
+			/*
+			 * For Root partition the VP ASSIST page is mapped to
+			 * hypervisor provided page, and thus, we unmap the
+			 * page here and nullify it, so that in future we have
+			 * correct page address mapped in hv_cpu_init.
+			 */
+			memunmap(hv_vp_assist_page[cpu]);
+			hv_vp_assist_page[cpu] = NULL;
+			rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+			msr.enable = 0;
+		}
+		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+	}
 
 	if (hv_reenlightenment_cb == NULL)
 		return 0;
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index f1366ce609e3..2322d6bd5883 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -288,6 +288,15 @@ union hv_x64_msr_hypercall_contents {
 	} __packed;
 };
 
+union hv_vp_assist_msr_contents {
+	u64 as_uint64;
+	struct {
+		u64 enable:1;
+		u64 reserved:11;
+		u64 pfn:52;
+	} __packed;
+};
+
 struct hv_reenlightenment_control {
 	__u64 vector:8;
 	__u64 reserved1:8;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v5] hyperv: root partition faults writing to VP ASSIST MSR PAGE
  2021-07-31 12:05 [PATCH v5] hyperv: root partition faults writing to VP ASSIST MSR PAGE Praveen Kumar
@ 2021-07-31 20:36 ` Sunil Muthuswamy
  2021-08-02 12:51 ` Wei Liu
  1 sibling, 0 replies; 4+ messages in thread
From: Sunil Muthuswamy @ 2021-07-31 20:36 UTC (permalink / raw)
  To: Praveen Kumar, linux-hyperv, linux-kernel
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, wei.liu,
	Dexuan Cui, tglx, mingo, bp, x86, hpa, viremana, nunodasneves

> For Root partition the VP assist pages are pre-determined by the
> hypervisor. The Root kernel is not allowed to change them to
> different locations. And thus, we are getting below stack as in
> current implementation Root is trying to perform write to specific
> MSR.
> 
> [ 2.778197] unchecked MSR access error: WRMSR to 0x40000073 (tried to
> write 0x0000000145ac5001) at rIP: 0xffffffff810c1084
> (native_write_msr+0x4/0x30)
> [ 2.784867] Call Trace:
> [ 2.791507] hv_cpu_init+0xf1/0x1c0
> [ 2.798144] ? hyperv_report_panic+0xd0/0xd0
> [ 2.804806] cpuhp_invoke_callback+0x11a/0x440
> [ 2.811465] ? hv_resume+0x90/0x90
> [ 2.818137] cpuhp_issue_call+0x126/0x130
> [ 2.824782] __cpuhp_setup_state_cpuslocked+0x102/0x2b0
> [ 2.831427] ? hyperv_report_panic+0xd0/0xd0
> [ 2.838075] ? hyperv_report_panic+0xd0/0xd0
> [ 2.844723] ? hv_resume+0x90/0x90
> [ 2.851375] __cpuhp_setup_state+0x3d/0x90
> [ 2.858030] hyperv_init+0x14e/0x410
> [ 2.864689] ? enable_IR_x2apic+0x190/0x1a0
> [ 2.871349] apic_intr_mode_init+0x8b/0x100
> [ 2.878017] x86_late_time_init+0x20/0x30
> [ 2.884675] start_kernel+0x459/0x4fb
> [ 2.891329] secondary_startup_64_no_verify+0xb0/0xbb
> 
> Since, the hypervisor already provides the VP assist page for root
> partition, we need to memremap the memory from hypervisor for root
> kernel to use. The mapping is done in hv_cpu_init during bringup and
> is unmaped in hv_cpu_die during teardown.
> 
> Signed-off-by: Praveen Kumar <kumarpraveen@linux.microsoft.com>
> ---
>  arch/x86/hyperv/hv_init.c          | 64 ++++++++++++++++++++----------
>  arch/x86/include/asm/hyperv-tlfs.h |  9 +++++
>  2 files changed, 53 insertions(+), 20 deletions(-)
> 
> changelog:
> v1: initial patch
> v2: commit message changes, removal of HV_MSR_APIC_ACCESS_AVAILABLE
>     check and addition of null check before reading the VP assist MSR
>     for root partition
> v3: added new data structure to handle VP ASSIST MSR page and done
>     handling in hv_cpu_init and hv_cpu_die
> v4: better code alignment, VP ASSIST handling correction for root
>     partition in hv_cpu_die and renaming of hv_vp_assist_msr_contents
>     attribute
> v5: disable VP ASSIST page for root partition during hv_cpu_die
> ---
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 6f247e7e07eb..a46bd92c532a 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -44,6 +44,7 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
> 
>  static int hv_cpu_init(unsigned int cpu)
>  {
> +	union hv_vp_assist_msr_contents msr = {0};
>  	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
>  	int ret;
> 
> @@ -54,25 +55,34 @@ static int hv_cpu_init(unsigned int cpu)
>  	if (!hv_vp_assist_page)
>  		return 0;
> 
> -	/*
> -	 * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
> -	 * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
> -	 * we always write the EOI MSR in hv_apic_eoi_write() *after* the
> -	 * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
> -	 * not be stopped in the case of CPU offlining and the VM will hang.
> -	 */
>  	if (!*hvp) {
> -		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
> -	}
> -
> -	if (*hvp) {
> -		u64 val;
> -
> -		val = vmalloc_to_pfn(*hvp);
> -		val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) |
> -			HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
> -
> -		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
> +		if (hv_root_partition) {
> +			/*
> +			 * For Root partition we get the hypervisor provided VP ASSIST
> +			 * PAGE, instead of allocating a new page.
> +			 */
> +			rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> +			*hvp = memremap(msr.pfn <<
> +					HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
> +					PAGE_SIZE, MEMREMAP_WB);
> +		} else {
> +			/*
> +			 * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's
> +			 * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
> +			 * out to make sure we always write the EOI MSR in
> +			 * hv_apic_eoi_write() *after* theEOI optimization is disabled
> +			 * in hv_cpu_die(), otherwise a CPU may not be stopped in the
> +			 * case of CPU offlining and the VM will hang.
> +			 */
> +			*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
> +			if (*hvp)
> +				msr.pfn = vmalloc_to_pfn(*hvp);
> +		}
> +		WARN_ON(!(*hvp));
> +		if (*hvp) {
> +			msr.enable = 1;
> +			wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> +		}
>  	}
> 
>  	return 0;
> @@ -170,8 +180,22 @@ static int hv_cpu_die(unsigned int cpu)
> 
>  	hv_common_cpu_die(cpu);
> 
> -	if (hv_vp_assist_page && hv_vp_assist_page[cpu])
> -		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
> +	if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
> +		union hv_vp_assist_msr_contents msr = {0};
> +		if (hv_root_partition) {
> +			/*
> +			 * For Root partition the VP ASSIST page is mapped to
> +			 * hypervisor provided page, and thus, we unmap the
> +			 * page here and nullify it, so that in future we have
> +			 * correct page address mapped in hv_cpu_init.
> +			 */
> +			memunmap(hv_vp_assist_page[cpu]);
> +			hv_vp_assist_page[cpu] = NULL;
> +			rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> +			msr.enable = 0;
> +		}
> +		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
> +	}
> 
>  	if (hv_reenlightenment_cb == NULL)
>  		return 0;
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index f1366ce609e3..2322d6bd5883 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -288,6 +288,15 @@ union hv_x64_msr_hypercall_contents {
>  	} __packed;
>  };
> 
> +union hv_vp_assist_msr_contents {
> +	u64 as_uint64;
> +	struct {
> +		u64 enable:1;
> +		u64 reserved:11;
> +		u64 pfn:52;
> +	} __packed;
> +};
> +
>  struct hv_reenlightenment_control {
>  	__u64 vector:8;
>  	__u64 reserved1:8;
> --
> 2.25.1

Reviewed-by: Sunil Muthuswamy <sunilmut@microsoft.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] hyperv: root partition faults writing to VP ASSIST MSR PAGE
  2021-07-31 12:05 [PATCH v5] hyperv: root partition faults writing to VP ASSIST MSR PAGE Praveen Kumar
  2021-07-31 20:36 ` Sunil Muthuswamy
@ 2021-08-02 12:51 ` Wei Liu
  2021-08-04 12:20   ` Wei Liu
  1 sibling, 1 reply; 4+ messages in thread
From: Wei Liu @ 2021-08-02 12:51 UTC (permalink / raw)
  To: Praveen Kumar
  Cc: linux-hyperv, linux-kernel, kys, haiyangz, sthemmin, wei.liu,
	decui, tglx, mingo, bp, x86, hpa, viremana, sunilmut,
	nunodasneves

On Sat, Jul 31, 2021 at 05:35:19PM +0530, Praveen Kumar wrote:
> For Root partition the VP assist pages are pre-determined by the
> hypervisor. The Root kernel is not allowed to change them to
> different locations. And thus, we are getting below stack as in
> current implementation Root is trying to perform write to specific
> MSR.
> 
> [ 2.778197] unchecked MSR access error: WRMSR to 0x40000073 (tried to
> write 0x0000000145ac5001) at rIP: 0xffffffff810c1084
> (native_write_msr+0x4/0x30)
> [ 2.784867] Call Trace:
> [ 2.791507] hv_cpu_init+0xf1/0x1c0
> [ 2.798144] ? hyperv_report_panic+0xd0/0xd0
> [ 2.804806] cpuhp_invoke_callback+0x11a/0x440
> [ 2.811465] ? hv_resume+0x90/0x90
> [ 2.818137] cpuhp_issue_call+0x126/0x130
> [ 2.824782] __cpuhp_setup_state_cpuslocked+0x102/0x2b0
> [ 2.831427] ? hyperv_report_panic+0xd0/0xd0
> [ 2.838075] ? hyperv_report_panic+0xd0/0xd0
> [ 2.844723] ? hv_resume+0x90/0x90
> [ 2.851375] __cpuhp_setup_state+0x3d/0x90
> [ 2.858030] hyperv_init+0x14e/0x410
> [ 2.864689] ? enable_IR_x2apic+0x190/0x1a0
> [ 2.871349] apic_intr_mode_init+0x8b/0x100
> [ 2.878017] x86_late_time_init+0x20/0x30
> [ 2.884675] start_kernel+0x459/0x4fb
> [ 2.891329] secondary_startup_64_no_verify+0xb0/0xbb
> 
> Since, the hypervisor already provides the VP assist page for root
> partition, we need to memremap the memory from hypervisor for root
> kernel to use. The mapping is done in hv_cpu_init during bringup and
> is unmaped in hv_cpu_die during teardown.
> 
> Signed-off-by: Praveen Kumar <kumarpraveen@linux.microsoft.com>

Looks good. I can fix a few styling issues in code and comments when I
commit this patch.

Wei.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] hyperv: root partition faults writing to VP ASSIST MSR PAGE
  2021-08-02 12:51 ` Wei Liu
@ 2021-08-04 12:20   ` Wei Liu
  0 siblings, 0 replies; 4+ messages in thread
From: Wei Liu @ 2021-08-04 12:20 UTC (permalink / raw)
  To: Praveen Kumar
  Cc: linux-hyperv, linux-kernel, kys, haiyangz, sthemmin, wei.liu,
	decui, tglx, mingo, bp, x86, hpa, viremana, sunilmut,
	nunodasneves

On Mon, Aug 02, 2021 at 12:51:33PM +0000, Wei Liu wrote:
> On Sat, Jul 31, 2021 at 05:35:19PM +0530, Praveen Kumar wrote:
> > For Root partition the VP assist pages are pre-determined by the
> > hypervisor. The Root kernel is not allowed to change them to
> > different locations. And thus, we are getting below stack as in
> > current implementation Root is trying to perform write to specific
> > MSR.
> > 
> > [ 2.778197] unchecked MSR access error: WRMSR to 0x40000073 (tried to
> > write 0x0000000145ac5001) at rIP: 0xffffffff810c1084
> > (native_write_msr+0x4/0x30)
> > [ 2.784867] Call Trace:
> > [ 2.791507] hv_cpu_init+0xf1/0x1c0
> > [ 2.798144] ? hyperv_report_panic+0xd0/0xd0
> > [ 2.804806] cpuhp_invoke_callback+0x11a/0x440
> > [ 2.811465] ? hv_resume+0x90/0x90
> > [ 2.818137] cpuhp_issue_call+0x126/0x130
> > [ 2.824782] __cpuhp_setup_state_cpuslocked+0x102/0x2b0
> > [ 2.831427] ? hyperv_report_panic+0xd0/0xd0
> > [ 2.838075] ? hyperv_report_panic+0xd0/0xd0
> > [ 2.844723] ? hv_resume+0x90/0x90
> > [ 2.851375] __cpuhp_setup_state+0x3d/0x90
> > [ 2.858030] hyperv_init+0x14e/0x410
> > [ 2.864689] ? enable_IR_x2apic+0x190/0x1a0
> > [ 2.871349] apic_intr_mode_init+0x8b/0x100
> > [ 2.878017] x86_late_time_init+0x20/0x30
> > [ 2.884675] start_kernel+0x459/0x4fb
> > [ 2.891329] secondary_startup_64_no_verify+0xb0/0xbb
> > 
> > Since, the hypervisor already provides the VP assist page for root
> > partition, we need to memremap the memory from hypervisor for root
> > kernel to use. The mapping is done in hv_cpu_init during bringup and
> > is unmaped in hv_cpu_die during teardown.
> > 
> > Signed-off-by: Praveen Kumar <kumarpraveen@linux.microsoft.com>
> 
> Looks good. I can fix a few styling issues in code and comments when I
> commit this patch.

Applied to hyperv-next. Thanks.

Wei.

> 
> Wei.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-08-04 12:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-31 12:05 [PATCH v5] hyperv: root partition faults writing to VP ASSIST MSR PAGE Praveen Kumar
2021-07-31 20:36 ` Sunil Muthuswamy
2021-08-02 12:51 ` Wei Liu
2021-08-04 12:20   ` Wei Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).