All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zeng Guang <guang.zeng@intel.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	kvm@vger.kernel.org, Dave Hansen <dave.hansen@linux.intel.com>,
	Tony Luck <tony.luck@intel.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Kim Phillips <kim.phillips@amd.com>,
	Jarkko Sakkinen <jarkko@kernel.org>,
	Jethro Beekman <jethro@fortanix.com>,
	Kai Huang <kai.huang@intel.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	Robert Hu <robert.hu@intel.com>, Gao Chao <chao.gao@intel.com>,
	Zeng Guang <guang.zeng@intel.com>
Subject: [PATCH v5 8/8] KVM: VMX: Resize PID-ponter table on demand for IPI virtualization
Date: Fri, 31 Dec 2021 22:28:49 +0800	[thread overview]
Message-ID: <20211231142849.611-9-guang.zeng@intel.com> (raw)
In-Reply-To: <20211231142849.611-1-guang.zeng@intel.com>

Current kvm allocates 8 pages in advance for Posted Interrupt
Descriptor pointer (PID-pointer) table to accommodate vCPUs
with APIC ID up to KVM_MAX_VCPU_IDS - 1. This policy wastes
some memory because most of VMs have less than 512 vCPUs and
then just need one page.

To reduce the memory consumption of most of VMs, KVM initially
allocates one page for PID-pointer table for each VM and bumps
up the table on demand according to the maximum APIC ID of all
vCPUs of a VM. Bumping up PID-pointer table involves allocating
a new table, requesting all vCPUs to update related VMCS fields
and freeing the old table.

In worst case that new memory allocation fails, KVM keep using
the present PID-pointer table. Thus IPI virtualization won't
take effect to those vCPUs not set in the table without impact
on others.

Signed-off-by: Zeng Guang <guang.zeng@intel.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  3 ++
 arch/x86/kvm/vmx/vmx.c             | 77 +++++++++++++++++++++++++-----
 arch/x86/kvm/vmx/vmx.h             |  6 +++
 arch/x86/kvm/x86.c                 |  2 +
 5 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index cefe1d81e2e8..847246f2537d 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -121,6 +121,7 @@ KVM_X86_OP_NULL(enable_direct_tlbflush)
 KVM_X86_OP_NULL(migrate_timers)
 KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP_NULL(complete_emulated_msr)
+KVM_X86_OP(update_ipiv_pid_table)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 753bf2a7cebc..24990d4e94c4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -102,6 +102,8 @@
 #define KVM_REQ_MSR_FILTER_CHANGED	KVM_ARCH_REQ(29)
 #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
 	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_PID_TABLE_UPDATE \
+	KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -1494,6 +1496,7 @@ struct kvm_x86_ops {
 
 	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 	void (*update_ipiv_pid_entry)(struct kvm_vcpu *vcpu, u8 old_id, u8 new_id);
+	void (*update_ipiv_pid_table)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f21ce15c5eb8..fb8e2b52b5f7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -228,8 +228,9 @@ static const struct {
 
 #define L1D_CACHE_ORDER 4
 
-/* PID(Posted-Interrupt Descriptor)-pointer table entry is 64-bit long */
-#define MAX_PID_TABLE_ORDER get_order(KVM_MAX_VCPU_IDS * sizeof(u64))
+/* Each entry in PID(Posted-Interrupt Descriptor)-pointer table is 8 bytes */
+#define table_index_to_size(index) ((index) << 3)
+#define table_size_to_index(size) ((size) >> 3)
 #define PID_TABLE_ENTRY_VALID 1
 
 static void *vmx_l1d_flush_pages;
@@ -4332,6 +4333,42 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 	return exec_control;
 }
 
+static int vmx_alloc_pid_table(struct kvm_vmx *kvm_vmx, int order)
+{
+	u64 *pid_table;
+
+	pid_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!pid_table)
+		return -ENOMEM;
+
+	kvm_vmx->pid_table = pid_table;
+	kvm_vmx->pid_last_index = table_size_to_index(PAGE_SIZE << order) - 1;
+	return 0;
+}
+
+static int vmx_expand_pid_table(struct kvm_vmx *kvm_vmx, int entry_idx)
+{
+	u64 *last_pid_table;
+	int last_table_size, new_order;
+
+	if (entry_idx <= kvm_vmx->pid_last_index)
+		return 0;
+
+	last_pid_table = kvm_vmx->pid_table;
+	last_table_size = table_index_to_size(kvm_vmx->pid_last_index + 1);
+	new_order = get_order(table_index_to_size(entry_idx + 1));
+
+	if (vmx_alloc_pid_table(kvm_vmx, new_order))
+		return -ENOMEM;
+
+	memcpy(kvm_vmx->pid_table, last_pid_table, last_table_size);
+	kvm_make_all_cpus_request(&kvm_vmx->kvm, KVM_REQ_PID_TABLE_UPDATE);
+
+	/* Now old PID table can be freed safely as no vCPU is using it. */
+	free_pages((unsigned long)last_pid_table, get_order(last_table_size));
+	return 0;
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 
 static void init_vmcs(struct vcpu_vmx *vmx)
@@ -4370,10 +4407,19 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
 
 		if (enable_ipiv) {
-			WRITE_ONCE(kvm_vmx->pid_table[vcpu->vcpu_id],
-				__pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID);
+			down_write(&kvm_vmx->pid_table_lock);
+
+			/*
+			 * In case new memory allocation for PID table fails,
+			 * skip setting Posted-Interrupt descriptor of current
+			 * vCPU which index is beyond present table limit.
+			 */
+			if (!vmx_expand_pid_table(kvm_vmx, vcpu->vcpu_id))
+				WRITE_ONCE(kvm_vmx->pid_table[vcpu->vcpu_id],
+					__pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID);
 			vmcs_write64(PID_POINTER_TABLE, __pa(kvm_vmx->pid_table));
 			vmcs_write16(LAST_PID_POINTER_INDEX, kvm_vmx->pid_last_index);
+			up_write(&kvm_vmx->pid_table_lock);
 		}
 	}
 
@@ -7001,14 +7047,11 @@ static int vmx_vm_init(struct kvm *kvm)
 	}
 
 	if (enable_ipiv) {
-		struct page *pages;
+		struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
 
-		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PID_TABLE_ORDER);
-		if (!pages)
+		if (vmx_alloc_pid_table(kvm_vmx, 0))
 			return -ENOMEM;
-
-		to_kvm_vmx(kvm)->pid_table = (void *)page_address(pages);
-		to_kvm_vmx(kvm)->pid_last_index = KVM_MAX_VCPU_IDS - 1;
+		init_rwsem(&kvm_vmx->pid_table_lock);
 	}
 
 	return 0;
@@ -7630,7 +7673,18 @@ static void vmx_vm_destroy(struct kvm *kvm)
 	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
 
 	if (kvm_vmx->pid_table)
-		free_pages((unsigned long)kvm_vmx->pid_table, MAX_PID_TABLE_ORDER);
+		free_pages((unsigned long)kvm_vmx->pid_table,
+			get_order(table_index_to_size(kvm_vmx->pid_last_index)));
+}
+
+static void vmx_update_ipiv_pid_table(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
+
+	down_read(&kvm_vmx->pid_table_lock);
+	vmcs_write64(PID_POINTER_TABLE, __pa(kvm_vmx->pid_table));
+	vmcs_write16(LAST_PID_POINTER_INDEX, kvm_vmx->pid_last_index);
+	up_read(&kvm_vmx->pid_table_lock);
 }
 
 static void vmx_update_ipiv_pid_entry(struct kvm_vcpu *vcpu, u8 old_id, u8 new_id)
@@ -7782,6 +7836,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
 	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 	.update_ipiv_pid_entry = vmx_update_ipiv_pid_entry,
+	.update_ipiv_pid_table = vmx_update_ipiv_pid_table,
 };
 
 static __init void vmx_setup_user_return_msrs(void)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index c8ae1458eb9e..8c437a7be08a 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -356,6 +356,12 @@ struct kvm_vmx {
 	/* PID table for IPI virtualization */
 	u64 *pid_table;
 	u16 pid_last_index;
+	/*
+	 * Protects accesses to pid_table and pid_last_index.
+	 * Request to reallocate and update PID table could
+	 * happen on multiple vCPUs simultaneously.
+	 */
+	struct rw_semaphore pid_table_lock;
 };
 
 bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a2972fdae82..97ec2adb76bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9783,6 +9783,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 		if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
 			static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
+		if (kvm_check_request(KVM_REQ_PID_TABLE_UPDATE, vcpu))
+			static_call(kvm_x86_update_ipiv_pid_table)(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
-- 
2.27.0


  parent reply	other threads:[~2021-12-31 14:59 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-31 14:28 [PATCH v5 0/8] IPI virtualization support for VM Zeng Guang
2021-12-31 14:28 ` [PATCH v5 1/8] x86/cpu: Add new VMX feature, Tertiary VM-Execution control Zeng Guang
2021-12-31 14:28 ` [PATCH v5 2/8] KVM: VMX: Extend BUILD_CONTROLS_SHADOW macro to support 64-bit variation Zeng Guang
2021-12-31 14:28 ` [PATCH v5 3/8] KVM: VMX: Detect Tertiary VM-Execution control when setup VMCS config Zeng Guang
2021-12-31 14:28 ` [PATCH v5 4/8] KVM: VMX: dump_vmcs() reports tertiary_exec_control field as well Zeng Guang
2022-01-13 21:03   ` Sean Christopherson
2022-01-14  4:19     ` Zeng Guang
2022-01-20  1:06       ` Sean Christopherson
2022-01-20  5:34         ` Zeng Guang
2021-12-31 14:28 ` [PATCH v5 5/8] KVM: x86: Support interrupt dispatch in x2APIC mode with APIC-write VM exit Zeng Guang
2022-01-13 21:29   ` Sean Christopherson
2022-01-14  7:52     ` Zeng Guang
2022-01-14 17:34       ` Sean Christopherson
2022-01-15  2:08         ` Zeng Guang
2022-01-18  0:44           ` Yuan Yao
2022-01-18  3:06             ` Zeng Guang
2022-01-18 18:17           ` Sean Christopherson
2022-01-19  2:48             ` Zeng Guang
2021-12-31 14:28 ` [PATCH v5 6/8] KVM: VMX: enable IPI virtualization Zeng Guang
2022-01-13 21:47   ` Sean Christopherson
2022-01-14  5:36     ` Zeng Guang
2021-12-31 14:28 ` [PATCH v5 7/8] KVM: VMX: Update PID-pointer table entry when APIC ID is changed Zeng Guang
2022-01-05 19:13   ` Tom Lendacky
2022-01-06  1:44     ` Zeng Guang
2022-01-06 14:06       ` Tom Lendacky
2022-01-07  8:05         ` Zeng Guang
2022-01-07  8:31           ` Maxim Levitsky
2022-01-10  7:45             ` Chao Gao
2022-01-10 22:24               ` Maxim Levitsky
2022-01-13 22:19                 ` Sean Christopherson
2022-01-14  2:58                   ` Chao Gao
2022-01-14  8:17                     ` Maxim Levitsky
2022-01-17  3:17                       ` Chao Gao
2022-02-02 23:23                   ` Sean Christopherson
2022-02-03 20:22                     ` Sean Christopherson
2022-02-23  6:10                       ` Chao Gao
2022-02-23 10:26                         ` Maxim Levitsky
2022-01-14  0:22               ` Yuan Yao
2021-12-31 14:28 ` Zeng Guang [this message]
2022-01-13 22:09   ` [PATCH v5 8/8] KVM: VMX: Resize PID-ponter table on demand for IPI virtualization Sean Christopherson
2022-01-14 15:59     ` Zeng Guang
2022-01-14 16:18       ` Sean Christopherson
2022-01-17 15:04         ` Zeng Guang
2022-01-18 17:15           ` Sean Christopherson
2022-01-19  7:55             ` Zeng Guang
2022-01-20  1:01               ` Sean Christopherson
2022-01-24 16:40                 ` Zeng Guang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211231142849.611-9-guang.zeng@intel.com \
    --to=guang.zeng@intel.com \
    --cc=bp@alien8.de \
    --cc=chao.gao@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jarkko@kernel.org \
    --cc=jethro@fortanix.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kai.huang@intel.com \
    --cc=kan.liang@linux.intel.com \
    --cc=kim.phillips@amd.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=robert.hu@intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.