[PATCH RFC v1 1/9] KVM: Introduce pinning flag to hva_to_pfn*

From: Nikunj A Dadhania <nikunj@amd.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Brijesh Singh <brijesh.singh@amd.com>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Peter Gonda <pgonda@google.com>, Bharata B Rao <bharata@amd.com>,
	"Maciej S . Szmigiero" <mail@maciej.szmigiero.name>,
	Mingwei Zhang <mizhang@google.com>,
	"David Hildenbrand" <david@redhat.com>, <kvm@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>,
	Nikunj A Dadhania <nikunj@amd.com>
Subject: [PATCH RFC v1 1/9] KVM: Introduce pinning flag to hva_to_pfn*
Date: Tue, 8 Mar 2022 10:08:49 +0530	[thread overview]
Message-ID: <20220308043857.13652-2-nikunj@amd.com> (raw)
In-Reply-To: <20220308043857.13652-1-nikunj@amd.com>

KVM allocates pages with get_user_pages_* (that use FOLL_GET). For
long term pinning of guest pages pin_user_pages_* (that use FOLL_PIN)
need to be used. Add a flag to hva_to_pfn* to allocate pinned pages
when the memslot represents encrypted memory.

Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
---
 include/linux/kvm_host.h |  6 ++++
 virt/kvm/kvm_main.c      | 63 ++++++++++++++++++++++++++++++----------
 virt/kvm/kvm_mm.h        |  2 +-
 virt/kvm/pfncache.c      |  2 +-
 4 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f11039944c08..c23022960d51 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -55,6 +55,7 @@
  * include/linux/kvm_h.
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
+#define KVM_MEMSLOT_ENCRYPTED   (1UL << 17)
 
 /*
  * Bit 63 of the memslot generation number is an "update in-progress flag",
@@ -583,6 +584,11 @@ static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *mem
 	return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
 }
 
+static inline bool memslot_is_encrypted(const struct kvm_memory_slot *slot)
+{
+	return slot && (slot->flags & KVM_MEMSLOT_ENCRYPTED);
+}
+
 #ifndef KVM_DIRTY_LOG_MANUAL_CAPS
 #define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE
 #endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0afc016cc54d..c035fe6b39ec 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2381,9 +2381,10 @@ static inline int check_user_page_hwpoison(unsigned long addr)
  * only part that runs if we can in atomic context.
  */
 static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
-			    bool *writable, kvm_pfn_t *pfn)
+			    bool *writable, kvm_pfn_t *pfn, bool use_pin)
 {
 	struct page *page[1];
+	bool ret;
 
 	/*
 	 * Fast pin a writable pfn only if it is a write fault request
@@ -2393,7 +2394,12 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
 	if (!(write_fault || writable))
 		return false;
 
-	if (get_user_page_fast_only(addr, FOLL_WRITE, page)) {
+	if (!use_pin)
+		ret = get_user_page_fast_only(addr, FOLL_WRITE, page);
+	else
+		ret = pin_user_pages_fast_only(addr, 1, FOLL_WRITE | FOLL_LONGTERM, page);
+
+	if (ret) {
 		*pfn = page_to_pfn(page[0]);
 
 		if (writable)
@@ -2409,9 +2415,9 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
  * 1 indicates success, -errno is returned if error is detected.
  */
 static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
-			   bool *writable, kvm_pfn_t *pfn)
+			   bool *writable, kvm_pfn_t *pfn, bool use_pin)
 {
-	unsigned int flags = FOLL_HWPOISON;
+	unsigned int flags = 0;
 	struct page *page;
 	int npages = 0;
 
@@ -2422,20 +2428,41 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 
 	if (write_fault)
 		flags |= FOLL_WRITE;
-	if (async)
-		flags |= FOLL_NOWAIT;
 
-	npages = get_user_pages_unlocked(addr, 1, &page, flags);
+	if (!use_pin) {
+		flags |= FOLL_HWPOISON;
+		if (async)
+			flags |= FOLL_NOWAIT;
+
+		npages = get_user_pages_unlocked(addr, 1, &page, flags);
+	} else {
+		/*
+		 * FOLL_LONGTERM is not supported in pin_user_pages_unlocked,
+		 * use *_fast instead.
+		 */
+		flags |= FOLL_LONGTERM;
+		npages = pin_user_pages_fast(addr, 1, flags, &page);
+	}
+
 	if (npages != 1)
 		return npages;
 
 	/* map read fault as writable if possible */
 	if (unlikely(!write_fault) && writable) {
 		struct page *wpage;
+		bool ret;
+
+		if (!use_pin)
+			ret = get_user_page_fast_only(addr, FOLL_WRITE, &wpage);
+		else
+			ret = pin_user_pages_fast_only(addr, 1, FOLL_WRITE | FOLL_LONGTERM, &wpage);
 
-		if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) {
+		if (ret) {
 			*writable = true;
-			put_page(page);
+			if (!use_pin)
+				put_page(page);
+			else
+				unpin_user_page(page);
 			page = wpage;
 		}
 	}
@@ -2541,7 +2568,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
  *     whether the mapping is writable.
  */
 kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
-		     bool write_fault, bool *writable)
+		     bool write_fault, bool *writable, bool use_pin)
 {
 	struct vm_area_struct *vma;
 	kvm_pfn_t pfn = 0;
@@ -2550,13 +2577,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	/* we can do it either atomically or asynchronously, not both */
 	BUG_ON(atomic && async);
 
-	if (hva_to_pfn_fast(addr, write_fault, writable, &pfn))
+	if (hva_to_pfn_fast(addr, write_fault, writable, &pfn, use_pin))
 		return pfn;
 
 	if (atomic)
 		return KVM_PFN_ERR_FAULT;
 
-	npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
+	npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn, use_pin);
 	if (npages == 1)
 		return pfn;
 
@@ -2616,7 +2643,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
 	}
 
 	return hva_to_pfn(addr, atomic, async, write_fault,
-			  writable);
+			  writable, memslot_is_encrypted(slot));
 }
 EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
 
@@ -2788,8 +2815,14 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
 void kvm_release_pfn_clean(kvm_pfn_t pfn)
 {
-	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
-		put_page(pfn_to_page(pfn));
+	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn)) {
+		struct page *page = pfn_to_page(pfn);
+
+		if (page_maybe_dma_pinned(page))
+			unpin_user_page(page);
+		else
+			put_page(page);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
 
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 34ca40823260..b1a5e379949b 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -25,7 +25,7 @@
 #endif /* KVM_HAVE_MMU_RWLOCK */
 
 kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
-		     bool write_fault, bool *writable);
+		     bool write_fault, bool *writable, bool use_pin);
 
 #ifdef CONFIG_HAVE_KVM_PFNCACHE
 void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index ce878f4be4da..44384f06c81b 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -135,7 +135,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
 		smp_rmb();
 
 		/* We always request a writeable mapping */
-		new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
+		new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL, false);
 		if (is_error_noslot_pfn(new_pfn))
 			break;
 
-- 
2.32.0