All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/15] KVM: X86: Miscellaneous cleanups
@ 2021-11-18 11:07 Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base Lai Jiangshan
                   ` (14 more replies)
  0 siblings, 15 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:07 UTC (permalink / raw)
  To: linux-kernel; +Cc: kvm, Lai Jiangshan

From: Lai Jiangshan <laijs@linux.alibaba.com>

All are just cleanups and independent to each other except the last 3
patches.

Lai Jiangshan (15):
  KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base
  KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP)
  KVM: VMX: Update msr value after kvm_set_user_return_msr() succeeds
  KVM: VMX: Save HOST_CR3 in vmx_prepare_switch_to_guest()
  KVM: VMX: Add document to state that write to uret msr should always
    be intercepted
  KVM: VMX: Use kvm_set_msr_common() for MSR_IA32_TSC_ADJUST in the
    default way
  KVM: VMX: Change comments about vmx_get_msr()
  KVM: SVM: Rename get_max_npt_level() to get_npt_level()
  KVM: SVM: Allocate sd->save_area with __GFP_ZERO
  KVM: X86: Skip allocating pae_root for vcpu->arch.guest_mmu when
    !tdp_enabled
  KVM: X86: Fix comment in __kvm_mmu_create()
  KVM: X86: Remove unused declaration of __kvm_mmu_free_some_pages()
  KVM: X86: Remove useless code to set role.gpte_is_8_bytes when
    role.direct
  KVM: X86: Calculate quadrant when !role.gpte_is_8_bytes
  KVM: X86: Always set gpte_is_8_bytes when direct map

 Documentation/virt/kvm/mmu.rst  |  2 +-
 arch/x86/include/asm/kvm_host.h | 11 ------
 arch/x86/kernel/process_64.c    |  2 ++
 arch/x86/kvm/mmu/mmu.c          | 12 ++++---
 arch/x86/kvm/svm/svm.c          | 12 +++----
 arch/x86/kvm/vmx/nested.c       |  8 +----
 arch/x86/kvm/vmx/vmx.c          | 64 +++++++++++++++++++--------------
 7 files changed, 53 insertions(+), 58 deletions(-)

-- 
2.19.1.6.gb485710b


^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 16:05   ` Paolo Bonzini
  2021-11-21 15:17   ` Thomas Gleixner
  2021-11-18 11:08 ` [PATCH 02/15] KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP) Lai Jiangshan
                   ` (13 subsequent siblings)
  14 siblings, 2 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, x86, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
	H. Peter Anvin, Kees Cook

From: Lai Jiangshan <laijs@linux.alibaba.com>

And they use FSGSBASE instructions when enabled.

Cc: x86@kernel.org
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/include/asm/kvm_host.h | 10 ----------
 arch/x86/kernel/process_64.c    |  2 ++
 arch/x86/kvm/vmx/vmx.c          | 14 +++++++-------
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1fcb345bc107..4cbb402f5636 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1808,16 +1808,6 @@ static inline void kvm_load_ldt(u16 sel)
 	asm("lldt %0" : : "rm"(sel));
 }
 
-#ifdef CONFIG_X86_64
-static inline unsigned long read_msr(unsigned long msr)
-{
-	u64 value;
-
-	rdmsrl(msr, value);
-	return value;
-}
-#endif
-
 static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
 {
 	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3402edec236c..296bd5c2e38b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -443,6 +443,7 @@ unsigned long x86_gsbase_read_cpu_inactive(void)
 
 	return gsbase;
 }
+EXPORT_SYMBOL_GPL(x86_gsbase_read_cpu_inactive);
 
 void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
 {
@@ -456,6 +457,7 @@ void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
 		wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
 	}
 }
+EXPORT_SYMBOL_GPL(x86_gsbase_write_cpu_inactive);
 
 unsigned long x86_fsbase_read_task(struct task_struct *task)
 {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3127c66a1651..48a34d1a2989 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1156,11 +1156,11 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	} else {
 		savesegment(fs, fs_sel);
 		savesegment(gs, gs_sel);
-		fs_base = read_msr(MSR_FS_BASE);
-		vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
+		fs_base = x86_fsbase_read_cpu();
+		vmx->msr_host_kernel_gs_base = x86_gsbase_read_cpu_inactive();
 	}
 
-	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+	x86_gsbase_write_cpu_inactive(vmx->msr_guest_kernel_gs_base);
 #else
 	savesegment(fs, fs_sel);
 	savesegment(gs, gs_sel);
@@ -1184,7 +1184,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
 	++vmx->vcpu.stat.host_state_reload;
 
 #ifdef CONFIG_X86_64
-	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+	vmx->msr_guest_kernel_gs_base = x86_gsbase_read_cpu_inactive();
 #endif
 	if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
 		kvm_load_ldt(host_state->ldt_sel);
@@ -1204,7 +1204,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
 #endif
 	invalidate_tss_limit();
 #ifdef CONFIG_X86_64
-	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+	x86_gsbase_write_cpu_inactive(vmx->msr_host_kernel_gs_base);
 #endif
 	load_fixmap_gdt(raw_smp_processor_id());
 	vmx->guest_state_loaded = false;
@@ -1216,7 +1216,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
 {
 	preempt_disable();
 	if (vmx->guest_state_loaded)
-		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+		vmx->msr_guest_kernel_gs_base = x86_gsbase_read_cpu_inactive();
 	preempt_enable();
 	return vmx->msr_guest_kernel_gs_base;
 }
@@ -1225,7 +1225,7 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
 {
 	preempt_disable();
 	if (vmx->guest_state_loaded)
-		wrmsrl(MSR_KERNEL_GS_BASE, data);
+		x86_gsbase_write_cpu_inactive(data);
 	preempt_enable();
 	vmx->msr_guest_kernel_gs_base = data;
 }
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 02/15] KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP)
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:18   ` Paolo Bonzini
  2021-11-18 11:08 ` [PATCH 03/15] KVM: VMX: Update msr value after kvm_set_user_return_msr() succeeds Lai Jiangshan
                   ` (12 subsequent siblings)
  14 siblings, 1 reply; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

The value of host MSR_IA32_SYSENTER_ESP is known to be constant for
each CPU: (cpu_entry_stack(cpu) + 1) when 32 bit syscall is enabled or
NULL is 32 bit syscall is not enabled.

So rdmsrl() can be avoided for the first case and both rdmsrl() and
vmcs_writel() can be avoided for the second case.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/vmx/vmx.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 48a34d1a2989..4e4a33226edb 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1271,7 +1271,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 
 	if (!already_loaded) {
 		void *gdt = get_current_gdt_ro();
-		unsigned long sysenter_esp;
 
 		/*
 		 * Flush all EPTP/VPID contexts, the new pCPU may have stale
@@ -1287,8 +1286,11 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 			    (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
 		vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
-		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
-		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+		if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) {
+			/* 22.2.3 */
+			vmcs_writel(HOST_IA32_SYSENTER_ESP,
+				    (unsigned long)(cpu_entry_stack(cpu) + 1));
+		}
 
 		vmx->loaded_vmcs->cpu = cpu;
 	}
@@ -4021,6 +4023,8 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 
 	rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
+	rdmsrl(MSR_IA32_SYSENTER_ESP, tmpl);
+	vmcs_writel(HOST_IA32_SYSENTER_ESP, tmpl);
 	rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
 	vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);   /* 22.2.3 */
 
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 03/15] KVM: VMX: Update msr value after kvm_set_user_return_msr() succeeds
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 02/15] KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP) Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 04/15] KVM: VMX: Save HOST_CR3 in vmx_prepare_switch_to_guest() Lai Jiangshan
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

Aoid earlier modification.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/vmx/vmx.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4e4a33226edb..fc7aa7f30ad5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -602,15 +602,13 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
 	unsigned int slot = msr - vmx->guest_uret_msrs;
 	int ret = 0;
 
-	u64 old_msr_data = msr->data;
-	msr->data = data;
 	if (msr->load_into_hardware) {
 		preempt_disable();
-		ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
+		ret = kvm_set_user_return_msr(slot, data, msr->mask);
 		preempt_enable();
-		if (ret)
-			msr->data = old_msr_data;
 	}
+	if (!ret)
+		msr->data = data;
 	return ret;
 }
 
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 04/15] KVM: VMX: Save HOST_CR3 in vmx_prepare_switch_to_guest()
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (2 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 03/15] KVM: VMX: Update msr value after kvm_set_user_return_msr() succeeds Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 05/15] KVM: VMX: Add document to state that write to uret msr should always be intercepted Lai Jiangshan
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

The host CR3 in the vcpu thread can only be changed when scheduling.
Moving the code in vmx_prepare_switch_to_guest() makes the code
simpler.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/vmx/nested.c |  8 +-------
 arch/x86/kvm/vmx/vmx.c    | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2d9565b37fe0..d8d0dbc4fc18 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3028,7 +3028,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long cr3, cr4;
+	unsigned long cr4;
 	bool vm_fail;
 
 	if (!nested_early_check)
@@ -3051,12 +3051,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 	 */
 	vmcs_writel(GUEST_RFLAGS, 0);
 
-	cr3 = __get_current_cr3_fast();
-	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
-		vmcs_writel(HOST_CR3, cr3);
-		vmx->loaded_vmcs->host_state.cr3 = cr3;
-	}
-
 	cr4 = cr4_read_shadow();
 	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fc7aa7f30ad5..e8a41fdc3c4d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1103,6 +1103,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_X86_64
 	int cpu = raw_smp_processor_id();
 #endif
+	unsigned long cr3;
 	unsigned long fs_base, gs_base;
 	u16 fs_sel, gs_sel;
 	int i;
@@ -1167,6 +1168,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 #endif
 
 	vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
+
+	/* Host CR3 including its PCID is stable when guest state is loaded. */
+	cr3 = __get_current_cr3_fast();
+	if (unlikely(cr3 != host_state->cr3)) {
+		vmcs_writel(HOST_CR3, cr3);
+		host_state->cr3 = cr3;
+	}
+
 	vmx->guest_state_loaded = true;
 }
 
@@ -6590,7 +6599,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long cr3, cr4;
+	unsigned long cr4;
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!enable_vnmi &&
@@ -6634,12 +6643,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
-	cr3 = __get_current_cr3_fast();
-	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
-		vmcs_writel(HOST_CR3, cr3);
-		vmx->loaded_vmcs->host_state.cr3 = cr3;
-	}
-
 	cr4 = cr4_read_shadow();
 	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 05/15] KVM: VMX: Add document to state that write to uret msr should always be intercepted
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (3 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 04/15] KVM: VMX: Save HOST_CR3 in vmx_prepare_switch_to_guest() Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 16:05   ` Paolo Bonzini
  2021-11-18 11:08 ` [PATCH 06/15] KVM: VMX: Use kvm_set_msr_common() for MSR_IA32_TSC_ADJUST in the default way Lai Jiangshan
                   ` (9 subsequent siblings)
  14 siblings, 1 reply; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

And adds a corresponding sanity check code.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/vmx/vmx.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e8a41fdc3c4d..cd081219b668 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3703,13 +3703,21 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 	if (!cpu_has_vmx_msr_bitmap())
 		return;
 
+	/*
+	 * Write to uret msr should always be intercepted due to the mechanism
+	 * must know the current value.  Santity check to avoid any inadvertent
+	 * mistake in coding.
+	 */
+	if (WARN_ON_ONCE(vmx_find_uret_msr(vmx, msr) && (type & MSR_TYPE_W)))
+		return;
+
 	if (static_branch_unlikely(&enable_evmcs))
 		evmcs_touch_msr_bitmap();
 
 	/*
 	 * Mark the desired intercept state in shadow bitmap, this is needed
 	 * for resync when the MSR filters change.
-	*/
+	 */
 	if (is_valid_passthrough_msr(msr)) {
 		int idx = possible_passthrough_msr_slot(msr);
 
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 06/15] KVM: VMX: Use kvm_set_msr_common() for MSR_IA32_TSC_ADJUST in the default way
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (4 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 05/15] KVM: VMX: Add document to state that write to uret msr should always be intercepted Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 07/15] KVM: VMX: Change comments about vmx_get_msr() Lai Jiangshan
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

MSR_IA32_TSC_ADJUST can be left to the default way which also uese
kvm_set_msr_common().

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/vmx/vmx.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cd081219b668..a0efc1e74311 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2104,9 +2104,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		}
 		ret = kvm_set_msr_common(vcpu, msr_info);
 		break;
-	case MSR_IA32_TSC_ADJUST:
-		ret = kvm_set_msr_common(vcpu, msr_info);
-		break;
 	case MSR_IA32_MCG_EXT_CTL:
 		if ((!msr_info->host_initiated &&
 		     !(to_vmx(vcpu)->msr_ia32_feature_control &
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 07/15] KVM: VMX: Change comments about vmx_get_msr()
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (5 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 06/15] KVM: VMX: Use kvm_set_msr_common() for MSR_IA32_TSC_ADJUST in the default way Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 08/15] KVM: SVM: Rename get_max_npt_level() to get_npt_level() Lai Jiangshan
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

The variable name is changed in the code.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/vmx/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a0efc1e74311..c6d9c50ea5d4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1757,7 +1757,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 }
 
 /*
- * Reads an msr value (of 'msr_index') into 'pdata'.
+ * Reads an msr value (of 'msr_info->index') into 'msr_info->data'.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
  */
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 08/15] KVM: SVM: Rename get_max_npt_level() to get_npt_level()
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (6 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 07/15] KVM: VMX: Change comments about vmx_get_msr() Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 09/15] KVM: SVM: Allocate sd->save_area with __GFP_ZERO Lai Jiangshan
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

It returns the only proper NPT level, so the "max" in the name
is not appropriate.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/svm/svm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 943da8a7d850..33b434fd5d9b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -265,7 +265,7 @@ u32 svm_msrpm_offset(u32 msr)
 
 #define MAX_INST_SIZE 15
 
-static int get_max_npt_level(void)
+static int get_npt_level(void)
 {
 #ifdef CONFIG_X86_64
 	return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
@@ -1029,9 +1029,9 @@ static __init int svm_hardware_setup(void)
 	if (!boot_cpu_has(X86_FEATURE_NPT))
 		npt_enabled = false;
 
-	/* Force VM NPT level equal to the host's max NPT level */
-	kvm_configure_mmu(npt_enabled, get_max_npt_level(),
-			  get_max_npt_level(), PG_LEVEL_1G);
+	/* Force VM NPT level equal to the host's paging level */
+	kvm_configure_mmu(npt_enabled, get_npt_level(),
+			  get_npt_level(), PG_LEVEL_1G);
 	pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
 
 	/* Note, SEV setup consumes npt_enabled. */
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 09/15] KVM: SVM: Allocate sd->save_area with __GFP_ZERO
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (7 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 08/15] KVM: SVM: Rename get_max_npt_level() to get_npt_level() Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 10/15] KVM: X86: Skip allocating pae_root for vcpu->arch.guest_mmu when !tdp_enabled Lai Jiangshan
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

And remove clear_page() on it.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/svm/svm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 33b434fd5d9b..d855ba664fc2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -585,12 +585,10 @@ static int svm_cpu_init(int cpu)
 	if (!sd)
 		return ret;
 	sd->cpu = cpu;
-	sd->save_area = alloc_page(GFP_KERNEL);
+	sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!sd->save_area)
 		goto free_cpu_data;
 
-	clear_page(page_address(sd->save_area));
-
 	ret = sev_cpu_init(sd);
 	if (ret)
 		goto free_save_area;
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 10/15] KVM: X86: Skip allocating pae_root for vcpu->arch.guest_mmu when !tdp_enabled
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (8 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 09/15] KVM: SVM: Allocate sd->save_area with __GFP_ZERO Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 11/15] KVM: X86: Fix comment in __kvm_mmu_create() Lai Jiangshan
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

It is never used.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/mmu/mmu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8f0035517450..e1fe8af874ef 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5501,6 +5501,10 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
 		mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
+	/* vcpu->arch.guest_mmu isn't used when !tdp_enabled. */
+	if (!tdp_enabled && mmu == &vcpu->arch.guest_mmu)
+		return 0;
+
 	/*
 	 * When using PAE paging, the four PDPTEs are treated as 'root' pages,
 	 * while the PDP table is a per-vCPU construct that's allocated at MMU
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 11/15] KVM: X86: Fix comment in __kvm_mmu_create()
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (9 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 10/15] KVM: X86: Skip allocating pae_root for vcpu->arch.guest_mmu when !tdp_enabled Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 12/15] KVM: X86: Remove unused declaration of __kvm_mmu_free_some_pages() Lai Jiangshan
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

The allocation of special roots is moved to mmu_alloc_special_roots().

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e1fe8af874ef..64599fa333c4 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5514,7 +5514,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 	 * generally doesn't use PAE paging and can skip allocating the PDP
 	 * table.  The main exception, handled here, is SVM's 32-bit NPT.  The
 	 * other exception is for shadowing L1's 32-bit or PAE NPT on 64-bit
-	 * KVM; that horror is handled on-demand by mmu_alloc_shadow_roots().
+	 * KVM; that horror is handled on-demand by mmu_alloc_special_roots().
 	 */
 	if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
 		return 0;
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 12/15] KVM: X86: Remove unused declaration of __kvm_mmu_free_some_pages()
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (10 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 11/15] KVM: X86: Fix comment in __kvm_mmu_create() Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 13/15] KVM: X86: Remove useless code to set role.gpte_is_8_bytes when role.direct Lai Jiangshan
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

The body of __kvm_mmu_free_some_pages() has been removed.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4cbb402f5636..eb6ef0209ee6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1760,7 +1760,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 void kvm_update_dr7(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			ulong roots_to_free);
 void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu);
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 13/15] KVM: X86: Remove useless code to set role.gpte_is_8_bytes when role.direct
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (11 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 12/15] KVM: X86: Remove unused declaration of __kvm_mmu_free_some_pages() Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 14/15] KVM: X86: Calculate quadrant when !role.gpte_is_8_bytes Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map Lai Jiangshan
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

role.gpte_is_8_bytes is unused when role.direct, so this code is
useless so far.

And role.gpte_is_8_bytes is going to be used for indicating if gpte is 4
bytes even when role.direct is true when shadowpaging, so this code
has to be removed for preparation.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 64599fa333c4..db8b64971f25 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2082,8 +2082,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	role = vcpu->arch.mmu->mmu_role.base;
 	role.level = level;
 	role.direct = direct;
-	if (role.direct)
-		role.gpte_is_8_bytes = true;
 	role.access = access;
 	if (!direct_mmu && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
 		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 14/15] KVM: X86: Calculate quadrant when !role.gpte_is_8_bytes
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (12 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 13/15] KVM: X86: Remove useless code to set role.gpte_is_8_bytes when role.direct Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:08 ` [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map Lai Jiangshan
  14 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

From: Lai Jiangshan <laijs@linux.alibaba.com>

role.quadrant is only valid when gpte size is 4 bytes and only be
calculated when gpte size is 4 bytes.

Although "vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL" also means
gpte size is 4 bytes, but using "!role.gpte_is_8_bytes" is clearer

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index db8b64971f25..6948f2d696c3 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2083,7 +2083,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	role.level = level;
 	role.direct = direct;
 	role.access = access;
-	if (!direct_mmu && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
+	if (!direct_mmu && !role.gpte_is_8_bytes) {
 		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map
  2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
                   ` (13 preceding siblings ...)
  2021-11-18 11:08 ` [PATCH 14/15] KVM: X86: Calculate quadrant when !role.gpte_is_8_bytes Lai Jiangshan
@ 2021-11-18 11:08 ` Lai Jiangshan
  2021-11-18 11:12   ` Paolo Bonzini
  14 siblings, 1 reply; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 11:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Jonathan Corbet,
	Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	Dave Hansen, x86, H. Peter Anvin, linux-doc

From: Lai Jiangshan <laijs@linux.alibaba.com>

When direct map, gpte_is_8_bytes has no meaning, but it is true for all
other cases except direct map when nonpaping.

Setting gpte_is_8_bytes to true when nonpaping can ensure that
!gpte_is_8_bytes means 32-bit gptes for shadow paging.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
 Documentation/virt/kvm/mmu.rst | 2 +-
 arch/x86/kvm/mmu/mmu.c         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
index f60f5488e121..5d1086602759 100644
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@@ -179,7 +179,7 @@ Shadow pages contain the following information:
     unpinned it will be destroyed.
   role.gpte_is_8_bytes:
     Reflects the size of the guest PTE for which the page is valid, i.e. '1'
-    if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
+    if direct map or 64-bit gptes are in use, '0' if 32-bit gptes are in use.
   role.efer_nx:
     Contains the value of efer.nx for which the page is valid.
   role.cr0_wp:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6948f2d696c3..0c92cbc07320 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2083,7 +2083,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	role.level = level;
 	role.direct = direct;
 	role.access = access;
-	if (!direct_mmu && !role.gpte_is_8_bytes) {
+	if (!role.gpte_is_8_bytes) {
 		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
@@ -4777,7 +4777,7 @@ kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu,
 
 	role.base.smep_andnot_wp = role.ext.cr4_smep && !____is_cr0_wp(regs);
 	role.base.smap_andnot_wp = role.ext.cr4_smap && !____is_cr0_wp(regs);
-	role.base.gpte_is_8_bytes = ____is_cr0_pg(regs) && ____is_cr4_pae(regs);
+	role.base.gpte_is_8_bytes = !____is_cr0_pg(regs) || ____is_cr4_pae(regs);
 
 	return role;
 }
-- 
2.19.1.6.gb485710b


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map
  2021-11-18 11:08 ` [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map Lai Jiangshan
@ 2021-11-18 11:12   ` Paolo Bonzini
  2021-11-18 14:34     ` Lai Jiangshan
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2021-11-18 11:12 UTC (permalink / raw)
  To: Lai Jiangshan, linux-kernel
  Cc: kvm, Lai Jiangshan, Jonathan Corbet, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, linux-doc

On 11/18/21 12:08, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> When direct map, gpte_is_8_bytes has no meaning, but it is true for all
> other cases except direct map when nonpaping.
> 
> Setting gpte_is_8_bytes to true when nonpaping can ensure that
> !gpte_is_8_bytes means 32-bit gptes for shadow paging.

Then the right thing to do would be to rename it to has_4_byte_gptes and 
invert the direction.  But as things stand, it's a bit more confusing to 
make gpte_is_8_bytes=1 if there are no guest PTEs at all.

Paolo

> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
> ---
>   Documentation/virt/kvm/mmu.rst | 2 +-
>   arch/x86/kvm/mmu/mmu.c         | 4 ++--
>   2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
> index f60f5488e121..5d1086602759 100644
> --- a/Documentation/virt/kvm/mmu.rst
> +++ b/Documentation/virt/kvm/mmu.rst
> @@ -179,7 +179,7 @@ Shadow pages contain the following information:
>       unpinned it will be destroyed.
>     role.gpte_is_8_bytes:
>       Reflects the size of the guest PTE for which the page is valid, i.e. '1'
> -    if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
> +    if direct map or 64-bit gptes are in use, '0' if 32-bit gptes are in use.
>     role.efer_nx:
>       Contains the value of efer.nx for which the page is valid.
>     role.cr0_wp:
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 6948f2d696c3..0c92cbc07320 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2083,7 +2083,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>   	role.level = level;
>   	role.direct = direct;
>   	role.access = access;
> -	if (!direct_mmu && !role.gpte_is_8_bytes) {
> +	if (!role.gpte_is_8_bytes) {
>   		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
>   		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
>   		role.quadrant = quadrant;
> @@ -4777,7 +4777,7 @@ kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu,
>   
>   	role.base.smep_andnot_wp = role.ext.cr4_smep && !____is_cr0_wp(regs);
>   	role.base.smap_andnot_wp = role.ext.cr4_smap && !____is_cr0_wp(regs);
> -	role.base.gpte_is_8_bytes = ____is_cr0_pg(regs) && ____is_cr4_pae(regs);
> +	role.base.gpte_is_8_bytes = !____is_cr0_pg(regs) || ____is_cr4_pae(regs);
>   
>   	return role;
>   }
> 


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/15] KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP)
  2021-11-18 11:08 ` [PATCH 02/15] KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP) Lai Jiangshan
@ 2021-11-18 11:18   ` Paolo Bonzini
  2021-11-18 14:17     ` Lai Jiangshan
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2021-11-18 11:18 UTC (permalink / raw)
  To: Lai Jiangshan, linux-kernel
  Cc: kvm, Lai Jiangshan, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin

On 11/18/21 12:08, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> The value of host MSR_IA32_SYSENTER_ESP is known to be constant for
> each CPU: (cpu_entry_stack(cpu) + 1) when 32 bit syscall is enabled or
> NULL is 32 bit syscall is not enabled.
> 
> So rdmsrl() can be avoided for the first case and both rdmsrl() and
> vmcs_writel() can be avoided for the second case.
> 
> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>

Then it's not constant host state, isn't?  The hunk in 
vmx_set_constant_host_state seems wrong.

Paolo

> ---
>   arch/x86/kvm/vmx/vmx.c | 10 +++++++---
>   1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 48a34d1a2989..4e4a33226edb 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1271,7 +1271,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>   
>   	if (!already_loaded) {
>   		void *gdt = get_current_gdt_ro();
> -		unsigned long sysenter_esp;
>   
>   		/*
>   		 * Flush all EPTP/VPID contexts, the new pCPU may have stale
> @@ -1287,8 +1286,11 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>   			    (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
>   		vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
>   
> -		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
> -		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
> +		if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) {
> +			/* 22.2.3 */
> +			vmcs_writel(HOST_IA32_SYSENTER_ESP,
> +				    (unsigned long)(cpu_entry_stack(cpu) + 1));
> +		}
>   
>   		vmx->loaded_vmcs->cpu = cpu;
>   	}
> @@ -4021,6 +4023,8 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
>   
>   	rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
>   	vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
> +	rdmsrl(MSR_IA32_SYSENTER_ESP, tmpl);
> +	vmcs_writel(HOST_IA32_SYSENTER_ESP, tmpl);
>   	rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
>   	vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);   /* 22.2.3 */



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/15] KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP)
  2021-11-18 11:18   ` Paolo Bonzini
@ 2021-11-18 14:17     ` Lai Jiangshan
  2021-11-18 14:38       ` Paolo Bonzini
  0 siblings, 1 reply; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 14:17 UTC (permalink / raw)
  To: Paolo Bonzini, Lai Jiangshan, linux-kernel
  Cc: kvm, Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin



On 2021/11/18 19:18, Paolo Bonzini wrote:
> On 11/18/21 12:08, Lai Jiangshan wrote:
>> From: Lai Jiangshan <laijs@linux.alibaba.com>
>>
>> The value of host MSR_IA32_SYSENTER_ESP is known to be constant for
>> each CPU: (cpu_entry_stack(cpu) + 1) when 32 bit syscall is enabled or
>> NULL is 32 bit syscall is not enabled.
>>
>> So rdmsrl() can be avoided for the first case and both rdmsrl() and
>> vmcs_writel() can be avoided for the second case.
>>
>> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> Then it's not constant host state, isn't?  The hunk in vmx_set_constant_host_state seems wrong.
> 

The change in vmx_vcpu_load_vmcs() handles only the percpu constant case:
(cpu_entry_stack(cpu) + 1), it doesn't handle the case where
MSR_IA32_SYSENTER_ESP is NULL.

The change in vmx_set_constant_host_state() handles the case where
MSR_IA32_SYSENTER_ESP is NULL, it does be constant host state in this case.
If it is not the case, the added code in vmx_vcpu_load_vmcs() will override
it safely.

If an else branch with "vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);" is added to
vmx_vcpu_load_vmcs(), we will not need to change vmx_set_constant_host_state().

-		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
-		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+		if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) {
+			/* 22.2.3 */
+			vmcs_writel(HOST_IA32_SYSENTER_ESP,
+				    (unsigned long)(cpu_entry_stack(cpu) + 1));
+		} else {
+			vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
+		}


I'm paranoid about the case when vmcs.HOST_IA32_SYSENTER_ESP is not reset to be
NULL when the vcpu is reset.  So the hunk in vmx_set_constant_host_state() or
the else branch in vmx_vcpu_load_vmcs() is required. I just chose to change
the vmx_set_constant_host_state() so that we can reduce a vmcs_writel() in the
case where CONFIG_IA32_EMULATION is off in x86_64.

> Paolo
> 
>> ---
>>   arch/x86/kvm/vmx/vmx.c | 10 +++++++---
>>   1 file changed, 7 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 48a34d1a2989..4e4a33226edb 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -1271,7 +1271,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>>       if (!already_loaded) {
>>           void *gdt = get_current_gdt_ro();
>> -        unsigned long sysenter_esp;
>>           /*
>>            * Flush all EPTP/VPID contexts, the new pCPU may have stale
>> @@ -1287,8 +1286,11 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>>                   (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
>>           vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
>> -        rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
>> -        vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
>> +        if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) {
>> +            /* 22.2.3 */
>> +            vmcs_writel(HOST_IA32_SYSENTER_ESP,
>> +                    (unsigned long)(cpu_entry_stack(cpu) + 1));
>> +        }
>>           vmx->loaded_vmcs->cpu = cpu;
>>       }
>> @@ -4021,6 +4023,8 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
>>       rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
>>       vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
>> +    rdmsrl(MSR_IA32_SYSENTER_ESP, tmpl);
>> +    vmcs_writel(HOST_IA32_SYSENTER_ESP, tmpl);
>>       rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
>>       vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);   /* 22.2.3 */
> 

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map
  2021-11-18 11:12   ` Paolo Bonzini
@ 2021-11-18 14:34     ` Lai Jiangshan
  2021-11-18 15:01       ` Paolo Bonzini
  0 siblings, 1 reply; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-18 14:34 UTC (permalink / raw)
  To: Paolo Bonzini, Lai Jiangshan, linux-kernel
  Cc: kvm, Jonathan Corbet, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	linux-doc



On 2021/11/18 19:12, Paolo Bonzini wrote:
> On 11/18/21 12:08, Lai Jiangshan wrote:
>> From: Lai Jiangshan <laijs@linux.alibaba.com>
>>
>> When direct map, gpte_is_8_bytes has no meaning, but it is true for all
>> other cases except direct map when nonpaping.
>>
>> Setting gpte_is_8_bytes to true when nonpaping can ensure that
>> !gpte_is_8_bytes means 32-bit gptes for shadow paging.
> 
> Then the right thing to do would be to rename it to has_4_byte_gptes and invert the direction.  But as things stand, 
> it's a bit more confusing to make gpte_is_8_bytes=1 if there are no guest PTEs at all.
> 

I will make the last 3 patches be a separated patchset and will do the rename.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 02/15] KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP)
  2021-11-18 14:17     ` Lai Jiangshan
@ 2021-11-18 14:38       ` Paolo Bonzini
  0 siblings, 0 replies; 30+ messages in thread
From: Paolo Bonzini @ 2021-11-18 14:38 UTC (permalink / raw)
  To: Lai Jiangshan, Lai Jiangshan, linux-kernel
  Cc: kvm, Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin

On 11/18/21 15:17, Lai Jiangshan wrote:
> 
> The change in vmx_vcpu_load_vmcs() handles only the percpu constant case:
> (cpu_entry_stack(cpu) + 1), it doesn't handle the case where
> MSR_IA32_SYSENTER_ESP is NULL.
> 
> The change in vmx_set_constant_host_state() handles the case where
> MSR_IA32_SYSENTER_ESP is NULL, it does be constant host state in this case.
> If it is not the case, the added code in vmx_vcpu_load_vmcs() will override
> it safely.
> 
> If an else branch with "vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);" is 
> added to
> vmx_vcpu_load_vmcs(), we will not need to change 
> vmx_set_constant_host_state().

We can change vmx_set_constant_host_state to write 0.

Paolo


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map
  2021-11-18 14:34     ` Lai Jiangshan
@ 2021-11-18 15:01       ` Paolo Bonzini
  2021-11-19 10:30         ` Lai Jiangshan
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2021-11-18 15:01 UTC (permalink / raw)
  To: Lai Jiangshan, Lai Jiangshan, linux-kernel
  Cc: kvm, Jonathan Corbet, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	linux-doc

On 11/18/21 15:34, Lai Jiangshan wrote:
> 
> 
> On 2021/11/18 19:12, Paolo Bonzini wrote:
>> On 11/18/21 12:08, Lai Jiangshan wrote:
>>> From: Lai Jiangshan <laijs@linux.alibaba.com>
>>>
>>> When direct map, gpte_is_8_bytes has no meaning, but it is true for all
>>> other cases except direct map when nonpaping.
>>>
>>> Setting gpte_is_8_bytes to true when nonpaping can ensure that
>>> !gpte_is_8_bytes means 32-bit gptes for shadow paging.
>>
>> Then the right thing to do would be to rename it to has_4_byte_gptes 
>> and invert the direction.  But as things stand, it's a bit more 
>> confusing to make gpte_is_8_bytes=1 if there are no guest PTEs at all.
>>
> 
> I will make the last 3 patches be a separated patchset and will do the 
> rename.

Patches 13 and 14 are fine actually.

Paolo


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 05/15] KVM: VMX: Add document to state that write to uret msr should always be intercepted
  2021-11-18 11:08 ` [PATCH 05/15] KVM: VMX: Add document to state that write to uret msr should always be intercepted Lai Jiangshan
@ 2021-11-18 16:05   ` Paolo Bonzini
  2021-12-07 20:38     ` Sean Christopherson
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2021-11-18 16:05 UTC (permalink / raw)
  To: Lai Jiangshan, linux-kernel
  Cc: kvm, Lai Jiangshan, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin

On 11/18/21 12:08, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> And adds a corresponding sanity check code.
> 
> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
> ---
>   arch/x86/kvm/vmx/vmx.c | 10 +++++++++-
>   1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index e8a41fdc3c4d..cd081219b668 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -3703,13 +3703,21 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
>   	if (!cpu_has_vmx_msr_bitmap())
>   		return;
>   
> +	/*
> +	 * Write to uret msr should always be intercepted due to the mechanism
> +	 * must know the current value.  Santity check to avoid any inadvertent
> +	 * mistake in coding.
> +	 */
> +	if (WARN_ON_ONCE(vmx_find_uret_msr(vmx, msr) && (type & MSR_TYPE_W)))
> +		return;
> +

I'm not sure about this one, it's relatively expensive to call 
vmx_find_uret_msr.

User-return MSRs and disable-intercept MSRs are almost the opposite: 
uret is for MSRs that the host (not even the processor) never uses, 
disable-intercept is for MSRs that the guest reads/writes often.  As 
such it seems almost impossible that they overlap.

Paolo


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base
  2021-11-18 11:08 ` [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base Lai Jiangshan
@ 2021-11-18 16:05   ` Paolo Bonzini
  2021-11-21 15:17   ` Thomas Gleixner
  1 sibling, 0 replies; 30+ messages in thread
From: Paolo Bonzini @ 2021-11-18 16:05 UTC (permalink / raw)
  To: Lai Jiangshan, linux-kernel
  Cc: kvm, Lai Jiangshan, x86, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, H. Peter Anvin,
	Kees Cook

On 11/18/21 12:08, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
> 
> And they use FSGSBASE instructions when enabled.
> 
> Cc: x86@kernel.org
> Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>

This needs ACK from x86 maintainers.

I queued 2-4 and 6-14.

Paolo

> ---
>   arch/x86/include/asm/kvm_host.h | 10 ----------
>   arch/x86/kernel/process_64.c    |  2 ++
>   arch/x86/kvm/vmx/vmx.c          | 14 +++++++-------
>   3 files changed, 9 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1fcb345bc107..4cbb402f5636 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1808,16 +1808,6 @@ static inline void kvm_load_ldt(u16 sel)
>   	asm("lldt %0" : : "rm"(sel));
>   }
>   
> -#ifdef CONFIG_X86_64
> -static inline unsigned long read_msr(unsigned long msr)
> -{
> -	u64 value;
> -
> -	rdmsrl(msr, value);
> -	return value;
> -}
> -#endif
> -
>   static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
>   {
>   	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 3402edec236c..296bd5c2e38b 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -443,6 +443,7 @@ unsigned long x86_gsbase_read_cpu_inactive(void)
>   
>   	return gsbase;
>   }
> +EXPORT_SYMBOL_GPL(x86_gsbase_read_cpu_inactive);
>   
>   void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
>   {
> @@ -456,6 +457,7 @@ void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
>   		wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
>   	}
>   }
> +EXPORT_SYMBOL_GPL(x86_gsbase_write_cpu_inactive);
>   
>   unsigned long x86_fsbase_read_task(struct task_struct *task)
>   {
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 3127c66a1651..48a34d1a2989 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1156,11 +1156,11 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
>   	} else {
>   		savesegment(fs, fs_sel);
>   		savesegment(gs, gs_sel);
> -		fs_base = read_msr(MSR_FS_BASE);
> -		vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
> +		fs_base = x86_fsbase_read_cpu();
> +		vmx->msr_host_kernel_gs_base = x86_gsbase_read_cpu_inactive();
>   	}
>   
> -	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
> +	x86_gsbase_write_cpu_inactive(vmx->msr_guest_kernel_gs_base);
>   #else
>   	savesegment(fs, fs_sel);
>   	savesegment(gs, gs_sel);
> @@ -1184,7 +1184,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
>   	++vmx->vcpu.stat.host_state_reload;
>   
>   #ifdef CONFIG_X86_64
> -	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
> +	vmx->msr_guest_kernel_gs_base = x86_gsbase_read_cpu_inactive();
>   #endif
>   	if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
>   		kvm_load_ldt(host_state->ldt_sel);
> @@ -1204,7 +1204,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
>   #endif
>   	invalidate_tss_limit();
>   #ifdef CONFIG_X86_64
> -	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
> +	x86_gsbase_write_cpu_inactive(vmx->msr_host_kernel_gs_base);
>   #endif
>   	load_fixmap_gdt(raw_smp_processor_id());
>   	vmx->guest_state_loaded = false;
> @@ -1216,7 +1216,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
>   {
>   	preempt_disable();
>   	if (vmx->guest_state_loaded)
> -		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
> +		vmx->msr_guest_kernel_gs_base = x86_gsbase_read_cpu_inactive();
>   	preempt_enable();
>   	return vmx->msr_guest_kernel_gs_base;
>   }
> @@ -1225,7 +1225,7 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
>   {
>   	preempt_disable();
>   	if (vmx->guest_state_loaded)
> -		wrmsrl(MSR_KERNEL_GS_BASE, data);
> +		x86_gsbase_write_cpu_inactive(data);
>   	preempt_enable();
>   	vmx->msr_guest_kernel_gs_base = data;
>   }
> 


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map
  2021-11-18 15:01       ` Paolo Bonzini
@ 2021-11-19 10:30         ` Lai Jiangshan
  2021-11-19 10:34           ` Paolo Bonzini
  0 siblings, 1 reply; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-19 10:30 UTC (permalink / raw)
  To: Paolo Bonzini, Lai Jiangshan, linux-kernel
  Cc: kvm, Jonathan Corbet, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	linux-doc



On 2021/11/18 23:01, Paolo Bonzini wrote:
> On 11/18/21 15:34, Lai Jiangshan wrote:
>>
>>
>> On 2021/11/18 19:12, Paolo Bonzini wrote:
>>> On 11/18/21 12:08, Lai Jiangshan wrote:
>>>> From: Lai Jiangshan <laijs@linux.alibaba.com>
>>>>
>>>> When direct map, gpte_is_8_bytes has no meaning, but it is true for all
>>>> other cases except direct map when nonpaping.
>>>>
>>>> Setting gpte_is_8_bytes to true when nonpaping can ensure that
>>>> !gpte_is_8_bytes means 32-bit gptes for shadow paging.
>>>
>>> Then the right thing to do would be to rename it to has_4_byte_gptes and invert the direction.  But as things stand, 
>>> it's a bit more confusing to make gpte_is_8_bytes=1 if there are no guest PTEs at all.
>>>
>>
>> I will make the last 3 patches be a separated patchset and will do the rename.
> 
> Patches 13 and 14 are fine actually.
> 

Hello

Since 13, and 14 is queued, could you also queue this one and I will do the rename
separately in the next patchset.  I found that the intent of this patch is hidden
in the lengthened squashed patch (of this patch and the renaming patch).

Thanks
Lai

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map
  2021-11-19 10:30         ` Lai Jiangshan
@ 2021-11-19 10:34           ` Paolo Bonzini
  2021-11-19 10:42             ` Lai Jiangshan
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2021-11-19 10:34 UTC (permalink / raw)
  To: Lai Jiangshan, Lai Jiangshan, linux-kernel
  Cc: kvm, Jonathan Corbet, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	linux-doc

On 11/19/21 11:30, Lai Jiangshan wrote:
>> 
> 
> Hello
> 
> Since 13, and 14 is queued, could you also queue this one and I will
> do the rename separately in the next patchset.  I found that the
> intent of this patch is hidden in the lengthened squashed patch (of
> this patch and the renaming patch).

Then you can do the renaming first?

Paolo


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map
  2021-11-19 10:34           ` Paolo Bonzini
@ 2021-11-19 10:42             ` Lai Jiangshan
  0 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-19 10:42 UTC (permalink / raw)
  To: Paolo Bonzini, Lai Jiangshan, linux-kernel
  Cc: kvm, Jonathan Corbet, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	linux-doc



On 2021/11/19 18:34, Paolo Bonzini wrote:
> On 11/19/21 11:30, Lai Jiangshan wrote:
>>>
>>
>> Hello
>>
>> Since 13, and 14 is queued, could you also queue this one and I will
>> do the rename separately in the next patchset.  I found that the
>> intent of this patch is hidden in the lengthened squashed patch (of
>> this patch and the renaming patch).
> 
> Then you can do the renaming first?
> 

Then I would prefer to make the code match the name for the first time
when the name is introduced, which means it would be a squashed patch.

I will not fuss about it and will send the squashed patch.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base
  2021-11-18 11:08 ` [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base Lai Jiangshan
  2021-11-18 16:05   ` Paolo Bonzini
@ 2021-11-21 15:17   ` Thomas Gleixner
  2021-11-22  3:27     ` Lai Jiangshan
  1 sibling, 1 reply; 30+ messages in thread
From: Thomas Gleixner @ 2021-11-21 15:17 UTC (permalink / raw)
  To: Lai Jiangshan, linux-kernel
  Cc: kvm, Lai Jiangshan, x86, Paolo Bonzini, Sean Christopherson,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Ingo Molnar, Borislav Petkov, Dave Hansen, H. Peter Anvin,
	Kees Cook

Lai,

On Thu, Nov 18 2021 at 19:08, Lai Jiangshan wrote:
> From: Lai Jiangshan <laijs@linux.alibaba.com>
>
> And they use FSGSBASE instructions when enabled.

That's really not a proper explanation for adding yet more exports.

Thanks,

        tglx



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base
  2021-11-21 15:17   ` Thomas Gleixner
@ 2021-11-22  3:27     ` Lai Jiangshan
  0 siblings, 0 replies; 30+ messages in thread
From: Lai Jiangshan @ 2021-11-22  3:27 UTC (permalink / raw)
  To: Thomas Gleixner, Lai Jiangshan, linux-kernel
  Cc: kvm, x86, Paolo Bonzini, Sean Christopherson, Vitaly Kuznetsov,
	Wanpeng Li, Jim Mattson, Joerg Roedel, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H. Peter Anvin, Kees Cook



On 2021/11/21 23:17, Thomas Gleixner wrote:
> Lai,
> 
> On Thu, Nov 18 2021 at 19:08, Lai Jiangshan wrote:
>> From: Lai Jiangshan <laijs@linux.alibaba.com>
>>
>> And they use FSGSBASE instructions when enabled.
> 
> That's really not a proper explanation for adding yet more exports.
> 

Hello

---
When a vCPU thread is rescheduled, 1 rdmsr and 2 wrmsr are called for
MSR_KERNEL_GS_BASE.

In scheduler, the core kernel uses x86_gsbase_[read|write]_cpu_inactive()
to accelerate the access to inactive GSBASE, but when the scheduler calls
in the preemption notifier in kvm, {rd|wr}msr(MSR_KERNEL_GS_BASE) is used.

To make the way of how kvm access to inactive GSBASE consistent with the
scheduler, kvm is changed to use x86 core API to access to fs_base and
inactive gs_base.  And they use FSGSBASE instructions when enabled.

It would add 2 more exports, but it doesn't export any extra software nor
hardware resources since the resources can be access via {rd|wr}msr.
---

Not so persuasive.  If it needs to be accelerated in the preemption notifier,
there are some other more aggressive ways.

Thanks
Lai

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 05/15] KVM: VMX: Add document to state that write to uret msr should always be intercepted
  2021-11-18 16:05   ` Paolo Bonzini
@ 2021-12-07 20:38     ` Sean Christopherson
  0 siblings, 0 replies; 30+ messages in thread
From: Sean Christopherson @ 2021-12-07 20:38 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Lai Jiangshan, linux-kernel, kvm, Lai Jiangshan,
	Vitaly Kuznetsov, Wanpeng Li, Jim Mattson, Joerg Roedel,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin

On Thu, Nov 18, 2021, Paolo Bonzini wrote:
> On 11/18/21 12:08, Lai Jiangshan wrote:
> > From: Lai Jiangshan <laijs@linux.alibaba.com>
> > 
> > And adds a corresponding sanity check code.
> > 
> > Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
> > ---
> >   arch/x86/kvm/vmx/vmx.c | 10 +++++++++-
> >   1 file changed, 9 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index e8a41fdc3c4d..cd081219b668 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -3703,13 +3703,21 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
> >   	if (!cpu_has_vmx_msr_bitmap())
> >   		return;
> > +	/*
> > +	 * Write to uret msr should always be intercepted due to the mechanism
> > +	 * must know the current value.  Santity check to avoid any inadvertent
> > +	 * mistake in coding.
> > +	 */
> > +	if (WARN_ON_ONCE(vmx_find_uret_msr(vmx, msr) && (type & MSR_TYPE_W)))
> > +		return;
> > +
> 
> I'm not sure about this one, it's relatively expensive to call
> vmx_find_uret_msr.
> 
> User-return MSRs and disable-intercept MSRs are almost the opposite: uret is
> for MSRs that the host (not even the processor) never uses,
> disable-intercept is for MSRs that the guest reads/writes often.  As such it
> seems almost impossible that they overlap.

And they aren't fundamentally mutually exclusive, e.g. KVM could pass-through an
MSR and then do RDMSR in vmx_prepare_switch_to_host() to refresh the uret data
with the current (guest) value.  It'd be silly, but it would work.

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2021-12-07 20:38 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-18 11:07 [PATCH 00/15] KVM: X86: Miscellaneous cleanups Lai Jiangshan
2021-11-18 11:08 ` [PATCH 01/15] KVM: VMX: Use x86 core API to access to fs_base and inactive gs_base Lai Jiangshan
2021-11-18 16:05   ` Paolo Bonzini
2021-11-21 15:17   ` Thomas Gleixner
2021-11-22  3:27     ` Lai Jiangshan
2021-11-18 11:08 ` [PATCH 02/15] KVM: VMX: Avoid to rdmsrl(MSR_IA32_SYSENTER_ESP) Lai Jiangshan
2021-11-18 11:18   ` Paolo Bonzini
2021-11-18 14:17     ` Lai Jiangshan
2021-11-18 14:38       ` Paolo Bonzini
2021-11-18 11:08 ` [PATCH 03/15] KVM: VMX: Update msr value after kvm_set_user_return_msr() succeeds Lai Jiangshan
2021-11-18 11:08 ` [PATCH 04/15] KVM: VMX: Save HOST_CR3 in vmx_prepare_switch_to_guest() Lai Jiangshan
2021-11-18 11:08 ` [PATCH 05/15] KVM: VMX: Add document to state that write to uret msr should always be intercepted Lai Jiangshan
2021-11-18 16:05   ` Paolo Bonzini
2021-12-07 20:38     ` Sean Christopherson
2021-11-18 11:08 ` [PATCH 06/15] KVM: VMX: Use kvm_set_msr_common() for MSR_IA32_TSC_ADJUST in the default way Lai Jiangshan
2021-11-18 11:08 ` [PATCH 07/15] KVM: VMX: Change comments about vmx_get_msr() Lai Jiangshan
2021-11-18 11:08 ` [PATCH 08/15] KVM: SVM: Rename get_max_npt_level() to get_npt_level() Lai Jiangshan
2021-11-18 11:08 ` [PATCH 09/15] KVM: SVM: Allocate sd->save_area with __GFP_ZERO Lai Jiangshan
2021-11-18 11:08 ` [PATCH 10/15] KVM: X86: Skip allocating pae_root for vcpu->arch.guest_mmu when !tdp_enabled Lai Jiangshan
2021-11-18 11:08 ` [PATCH 11/15] KVM: X86: Fix comment in __kvm_mmu_create() Lai Jiangshan
2021-11-18 11:08 ` [PATCH 12/15] KVM: X86: Remove unused declaration of __kvm_mmu_free_some_pages() Lai Jiangshan
2021-11-18 11:08 ` [PATCH 13/15] KVM: X86: Remove useless code to set role.gpte_is_8_bytes when role.direct Lai Jiangshan
2021-11-18 11:08 ` [PATCH 14/15] KVM: X86: Calculate quadrant when !role.gpte_is_8_bytes Lai Jiangshan
2021-11-18 11:08 ` [PATCH 15/15] KVM: X86: Always set gpte_is_8_bytes when direct map Lai Jiangshan
2021-11-18 11:12   ` Paolo Bonzini
2021-11-18 14:34     ` Lai Jiangshan
2021-11-18 15:01       ` Paolo Bonzini
2021-11-19 10:30         ` Lai Jiangshan
2021-11-19 10:34           ` Paolo Bonzini
2021-11-19 10:42             ` Lai Jiangshan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.