linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vipin Sharma <vipinsh@google.com>
To: dmatlack@google.com, bgardon@google.com, seanjc@google.com,
	pbonzini@redhat.com
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	Vipin Sharma <vipinsh@google.com>
Subject: [Patch v2 1/2] KVM: x86/mmu: Allocate page table pages on TDP splits during dirty log enable on the underlying page's numa node
Date: Thu,  1 Dec 2022 11:57:17 -0800	[thread overview]
Message-ID: <20221201195718.1409782-2-vipinsh@google.com> (raw)
In-Reply-To: <20221201195718.1409782-1-vipinsh@google.com>

Huge pages are split when dirty log is enabled. New page table pages are
allocated based on the current thread NUMA node or mempolicy. This
causes inefficient page table accesses if underlying page is on a
different NUMA node

Allocate page table pages on the same NUMA node as the underlying huge
page when dirty log is enabled and huge pages are split.

The performance gain during the pre-copy phase of live migrations of a
416 vCPUs and 11 TiB memory VM  on a 8 node host was seen in the range
of 130% to 150%.

Suggested-by: David Matlack <dmatlack@google.com>
Signed-off-by: Vipin Sharma <vipinsh@google.com>
---
 arch/x86/kvm/mmu.h         |  1 +
 arch/x86/kvm/mmu/mmu.c     | 19 +++++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.c | 12 ++++++++----
 include/linux/kvm_host.h   | 15 +++++++++++++++
 4 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 6bdaacb6faa0..c960fb096e5c 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -119,6 +119,7 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
+void *kvm_mmu_get_free_page(int nid, gfp_t gfp);
 
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4736d7849c60..0554dfc55553 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -90,6 +90,9 @@ __MODULE_PARM_TYPE(nx_huge_pages_recovery_period_ms, "uint");
 static bool __read_mostly force_flush_and_sync_on_reuse;
 module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644);
 
+static bool __read_mostly numa_aware_pagetable = true;
+module_param_named(numa_aware_pagetable, numa_aware_pagetable, bool, 0644);
+
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
  * where the hardware walks 2 page tables:
@@ -6984,3 +6987,19 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
 	if (kvm->arch.nx_huge_page_recovery_thread)
 		kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
 }
+
+void *kvm_mmu_get_free_page(int nid, gfp_t gfp)
+{
+	struct page *spt_page;
+	void *address = NULL;
+
+	if (numa_aware_pagetable) {
+		spt_page = alloc_pages_node(nid, gfp, 0);
+		if (spt_page)
+			address = page_address(spt_page);
+	} else {
+		address = (void *)__get_free_page(gfp);
+	}
+
+	return address;
+}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 771210ce5181..1607afbfcc0b 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1413,7 +1413,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
 	return spte_set;
 }
 
-static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
+static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(int nid, gfp_t gfp)
 {
 	struct kvm_mmu_page *sp;
 
@@ -1423,7 +1423,8 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
 	if (!sp)
 		return NULL;
 
-	sp->spt = (void *)__get_free_page(gfp);
+	sp->spt = kvm_mmu_get_free_page(nid, gfp);
+
 	if (!sp->spt) {
 		kmem_cache_free(mmu_page_header_cache, sp);
 		return NULL;
@@ -1437,6 +1438,9 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
 						       bool shared)
 {
 	struct kvm_mmu_page *sp;
+	int nid;
+
+	nid = kvm_pfn_to_refcounted_page_nid(spte_to_pfn(iter->old_spte));
 
 	/*
 	 * Since we are allocating while under the MMU lock we have to be
@@ -1447,7 +1451,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
 	 * If this allocation fails we drop the lock and retry with reclaim
 	 * allowed.
 	 */
-	sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT);
+	sp = __tdp_mmu_alloc_sp_for_split(nid, GFP_NOWAIT | __GFP_ACCOUNT);
 	if (sp)
 		return sp;
 
@@ -1459,7 +1463,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
 		write_unlock(&kvm->mmu_lock);
 
 	iter->yielded = true;
-	sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT);
+	sp = __tdp_mmu_alloc_sp_for_split(nid, GFP_KERNEL_ACCOUNT);
 
 	if (shared)
 		read_lock(&kvm->mmu_lock);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8f874a964313..558ded73f660 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1596,6 +1596,21 @@ void kvm_arch_sync_events(struct kvm *kvm);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 
 struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
+
+/*
+ * Returns the nid of a 'struct page' if pfn is valid and backed by a refcounted
+ * page, NUMA_NO_NODE otherwise.
+ */
+static inline int kvm_pfn_to_refcounted_page_nid(kvm_pfn_t pfn)
+{
+	struct page *page = kvm_pfn_to_refcounted_page(pfn);
+
+	if (page)
+		return page_to_nid(page);
+	else
+		return NUMA_NO_NODE;
+}
+
 bool kvm_is_zone_device_page(struct page *page);
 
 struct kvm_irq_ack_notifier {
-- 
2.39.0.rc0.267.gcb52ba06e7-goog


  reply	other threads:[~2022-12-01 19:58 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-01 19:57 [Patch v2 0/2] NUMA aware page table allocation Vipin Sharma
2022-12-01 19:57 ` Vipin Sharma [this message]
2022-12-05 18:24   ` [Patch v2 1/2] KVM: x86/mmu: Allocate page table pages on TDP splits during dirty log enable on the underlying page's numa node Ben Gardon
2022-12-05 18:47     ` Sean Christopherson
2022-12-05 18:51       ` Ben Gardon
2022-12-05 21:07         ` Sean Christopherson
2022-12-08 22:44           ` Vipin Sharma
2022-12-01 19:57 ` [Patch v2 2/2] KVM: x86/mmu: Allocate page table pages on NUMA node of underlying pages Vipin Sharma
2022-12-05 18:17   ` Ben Gardon
2022-12-05 23:40     ` Vipin Sharma
2022-12-06 18:17       ` Ben Gardon
     [not found]         ` <CAHVum0f_6UQvcqWAJxDJyL_LN-6ryAXNuh9xY6nFtLxCOMtoXA@mail.gmail.com>
     [not found]           ` <CANgfPd-XkHPZyFsPe75WbUrufLpKtdr1Neri1JrrApQrjRLRJw@mail.gmail.com>
     [not found]             ` <CAHVum0dkKSY9e90xgfBVBHUqntwJOmONK+TYBXFEwg6acvUrAw@mail.gmail.com>
2022-12-07 19:05               ` Vipin Sharma
2022-12-09  0:06                 ` David Matlack
2022-12-09 18:47                   ` Vipin Sharma
2022-12-09  0:27   ` David Matlack
2022-12-09 18:51     ` Vipin Sharma
2022-12-09  0:21 ` [Patch v2 0/2] NUMA aware page table allocation David Matlack

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221201195718.1409782-2-vipinsh@google.com \
    --to=vipinsh@google.com \
    --cc=bgardon@google.com \
    --cc=dmatlack@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).