linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: seanjc@google.com, michael.roth@amd.com,
	isaku.yamahata@intel.com, thomas.lendacky@amd.com
Subject: [PATCH 18/21] KVM: x86: Add gmem hook for initializing memory
Date: Tue, 27 Feb 2024 18:20:57 -0500	[thread overview]
Message-ID: <20240227232100.478238-19-pbonzini@redhat.com> (raw)
In-Reply-To: <20240227232100.478238-1-pbonzini@redhat.com>

guest_memfd pages are generally expected to be in some arch-defined
initial state prior to using them for guest memory. For SEV-SNP this
initial state is 'private', or 'guest-owned', and requires additional
operations to move these pages into a 'private' state by updating the
corresponding entries the RMP table.

Allow for an arch-defined hook to handle updates of this sort, and go
ahead and implement one for x86 so KVM implementations like AMD SVM can
register a kvm_x86_ops callback to handle these updates for SEV-SNP
guests.

The preparation callback is always called when allocating/grabbing
folios via gmem, and it is up to the architecture to keep track of
whether or not the pages are already in the expected state (e.g. the RMP
table in the case of SEV-SNP).

In some cases, it is necessary to defer the preparation of the pages to
handle things like in-place encryption of initial guest memory payloads
before marking these pages as 'private'/'guest-owned', so also add a
helper that performs the same function as kvm_gmem_get_pfn(), but allows
for the preparation callback to be bypassed to allow for pages to be
accessed beforehand.

Link: https://lore.kernel.org/lkml/ZLqVdvsF11Ddo7Dq@google.com/
Co-developed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-Id: <20231230172351.574091-5-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  1 +
 arch/x86/kvm/x86.c                 |  6 +++
 include/linux/kvm_host.h           | 14 ++++++
 virt/kvm/Kconfig                   |  4 ++
 virt/kvm/guest_memfd.c             | 72 +++++++++++++++++++++++++++---
 6 files changed, 92 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index ac8b7614e79d..adfaad15e7e6 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -139,6 +139,7 @@ KVM_X86_OP(complete_emulated_msr)
 KVM_X86_OP(vcpu_deliver_sipi_vector)
 KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
 KVM_X86_OP_OPTIONAL(get_untagged_addr)
+KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7de8a3f2a118..6d873d08f739 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1804,6 +1804,7 @@ struct kvm_x86_ops {
 	unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
 
 	gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+	int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f10a5a617120..eff532ea59c9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13598,6 +13598,12 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
 
+#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
+{
+	return static_call(kvm_x86_gmem_prepare)(kvm, pfn, gfn, max_order);
+}
+#endif
 
 int kvm_spec_ctrl_test_value(u64 value)
 {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 97afe4519772..03bf616b7308 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2434,6 +2434,8 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
 #ifdef CONFIG_KVM_PRIVATE_MEM
 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
+int kvm_gmem_get_uninit_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+		            gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
 #else
 static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn,
@@ -2442,6 +2444,18 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 	KVM_BUG_ON(1, kvm);
 	return -EIO;
 }
+
+static inline int kvm_gmem_get_uninit_pfn(struct kvm *kvm,
+				          struct kvm_memory_slot *slot, gfn_t gfn,
+				          kvm_pfn_t *pfn, int *max_order)
+{
+	KVM_BUG_ON(1, kvm);
+	return -EIO;
+}
 #endif /* CONFIG_KVM_PRIVATE_MEM */
 
+#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
+#endif
+
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index a11e9c80fac9..dcce0c3b5b13 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -111,3 +111,7 @@ config KVM_GENERIC_PRIVATE_MEM
        select KVM_GENERIC_MEMORY_ATTRIBUTES
        select KVM_PRIVATE_MEM
        bool
+
+config HAVE_KVM_GMEM_PREPARE
+       bool
+       depends on KVM_PRIVATE_MEM
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index de0d5a5c210c..7ec7afafc960 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -13,12 +13,50 @@ struct kvm_gmem {
 	struct list_head entry;
 };
 
-static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
+static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio)
+{
+#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+	struct list_head *gmem_list = &inode->i_mapping->i_private_list;
+	struct kvm_gmem *gmem;
+
+	list_for_each_entry(gmem, gmem_list, entry) {
+		struct kvm_memory_slot *slot;
+		struct kvm *kvm = gmem->kvm;
+		struct page *page;
+		kvm_pfn_t pfn;
+		gfn_t gfn;
+		int rc;
+
+		slot = xa_load(&gmem->bindings, index);
+		if (!slot)
+			continue;
+
+		page = folio_file_page(folio, index);
+		pfn = page_to_pfn(page);
+		gfn = slot->base_gfn + index - slot->gmem.pgoff;
+		rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page)));
+		if (rc) {
+			pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx, error %d.\n",
+					    index, rc);
+			return rc;
+		}
+	}
+
+#endif
+	return 0;
+}
+
+static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
 {
 	struct folio *folio;
+	fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
+
+	if (!prepare)
+		fgp_flags |= FGP_CREAT_ONLY;
 
 	/* TODO: Support huge pages. */
-	folio = filemap_grab_folio(inode->i_mapping, index);
+	folio = __filemap_get_folio(inode->i_mapping, index, fgp_flags,
+				    mapping_gfp_mask(inode->i_mapping));
 	if (IS_ERR_OR_NULL(folio))
 		return folio;
 
@@ -41,6 +79,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
 		folio_mark_uptodate(folio);
 	}
 
+	if (prepare) {
+		int r =	kvm_gmem_prepare_folio(inode, index, folio);
+		if (r < 0) {
+			folio_unlock(folio);
+			folio_put(folio);
+			return ERR_PTR(r);
+		}
+	}
+
 	/*
 	 * Ignore accessed, referenced, and dirty flags.  The memory is
 	 * unevictable and there is no storage to write back to.
@@ -145,7 +192,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
 			break;
 		}
 
-		folio = kvm_gmem_get_folio(inode, index);
+		folio = kvm_gmem_get_folio(inode, index, true);
 		if (IS_ERR_OR_NULL(folio)) {
 			r = folio ? PTR_ERR(folio) : -ENOMEM;
 			break;
@@ -482,8 +529,8 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
 	fput(file);
 }
 
-int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
-		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
+static int __kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+		       gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
 {
 	pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
 	struct kvm_gmem *gmem;
@@ -503,7 +550,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 		goto out_fput;
 	}
 
-	folio = kvm_gmem_get_folio(file_inode(file), index);
+	folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
 	if (!folio) {
 		r = -ENOMEM;
 		goto out_fput;
@@ -529,4 +576,17 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
 
 	return r;
 }
+
+int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
+{
+	return __kvm_gmem_get_pfn(kvm, slot, gfn, pfn, max_order, true);
+}
 EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
+
+int kvm_gmem_get_uninit_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+		            gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
+{
+	return __kvm_gmem_get_pfn(kvm, slot, gfn, pfn, max_order, false);
+}
+EXPORT_SYMBOL_GPL(kvm_gmem_get_uninit_pfn);
-- 
2.39.0



  parent reply	other threads:[~2024-02-27 23:21 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-27 23:20 [PATCH 00/21] TDX/SNP part 1 of n, for 6.9 Paolo Bonzini
2024-02-27 23:20 ` [PATCH 01/21] KVM: x86: Split core of hypercall emulation to helper function Paolo Bonzini
2024-02-28  2:09   ` Xiaoyao Li
2024-03-05  6:24   ` Binbin Wu
2024-02-27 23:20 ` [PATCH 02/21] KVM: Allow page-sized MMU caches to be initialized with custom 64-bit values Paolo Bonzini
2024-02-29 13:46   ` Xiaoyao Li
2024-03-05  6:55   ` Binbin Wu
2024-03-26 15:56     ` Binbin Wu
2024-05-13 20:38       ` Isaku Yamahata
2024-05-13 20:51         ` Isaku Yamahata
2024-05-13 20:56         ` Sean Christopherson
2024-02-27 23:20 ` [PATCH 03/21] KVM: x86/mmu: Replace hardcoded value 0 for the initial value for SPTE Paolo Bonzini
2024-02-29 13:50   ` Xiaoyao Li
2024-03-05  7:09   ` Binbin Wu
2024-02-27 23:20 ` [PATCH 04/21] KVM: x86/mmu: Allow non-zero value for non-present SPTE and removed SPTE Paolo Bonzini
2024-02-29  7:00   ` Xu Yilun
2024-02-29 13:55   ` Xiaoyao Li
2024-03-11 23:26   ` Huang, Kai
2024-02-27 23:20 ` [PATCH 05/21] KVM: x86/mmu: Add Suppress VE bit to EPT shadow_mmio_mask/shadow_present_mask Paolo Bonzini
2024-03-01  7:26   ` Xiaoyao Li
2024-03-05 13:17   ` Binbin Wu
2024-02-27 23:20 ` [PATCH 06/21] KVM: x86/mmu: Track shadow MMIO value on a per-VM basis Paolo Bonzini
2024-03-01  7:44   ` Xiaoyao Li
2024-03-05  8:35   ` Binbin Wu
2024-03-12  1:21   ` Huang, Kai
2024-02-27 23:20 ` [PATCH 07/21] KVM: VMX: Introduce test mode related to EPT violation VE Paolo Bonzini
2024-02-28  1:56   ` Sean Christopherson
2024-03-12  1:35   ` Huang, Kai
2024-03-12 16:54     ` Sean Christopherson
2024-03-12 21:03       ` Huang, Kai
2024-02-27 23:20 ` [PATCH 08/21] KVM: VMX: Move out vmx_x86_ops to 'main.c' to dispatch VMX and TDX Paolo Bonzini
2024-02-27 23:20 ` [PATCH 09/21] KVM: VMX: Modify NMI and INTR handlers to take intr_info as function argument Paolo Bonzini
2024-03-04  8:09   ` Xiaoyao Li
2024-03-05 13:42   ` Binbin Wu
2024-03-12  1:43   ` Huang, Kai
2024-02-27 23:20 ` [PATCH 10/21] KVM: SEV: Use a VMSA physical address variable for populating VMCB Paolo Bonzini
2024-02-28  2:00   ` Sean Christopherson
2024-02-28 17:32     ` Paolo Bonzini
2024-02-29 16:02       ` Sean Christopherson
2024-02-27 23:20 ` [PATCH 11/21] KVM: x86/tdp_mmu: Init role member of struct kvm_mmu_page at allocation Paolo Bonzini
2024-03-03  4:47   ` Xu Yilun
2024-03-25 23:32   ` Edgecombe, Rick P
2024-02-27 23:20 ` [PATCH 12/21] KVM: x86/tdp_mmu: Sprinkle __must_check Paolo Bonzini
2024-03-04  8:29   ` Xiaoyao Li
2024-02-27 23:20 ` [PATCH 13/21] KVM: x86/mmu: Pass around full 64-bit error code for KVM page faults Paolo Bonzini
2024-03-04  8:56   ` Xiaoyao Li
2024-03-04 15:39     ` Sean Christopherson
2024-04-05 17:57     ` Paolo Bonzini
2024-02-27 23:20 ` [PATCH 14/21] KVM: x86/mmu: pass error code back to MMU when async pf is ready Paolo Bonzini
2024-02-28  2:03   ` Sean Christopherson
2024-02-28 13:13     ` Paolo Bonzini
2024-02-27 23:20 ` [PATCH 15/21] KVM: x86/mmu: Use PFERR_GUEST_ENC_MASK to indicate fault is private Paolo Bonzini
2024-02-27 23:20 ` [PATCH 16/21] KVM: guest_memfd: pass error up from filemap_grab_folio Paolo Bonzini
2024-03-03 14:41   ` Xu Yilun
2024-02-27 23:20 ` [PATCH 17/21] filemap: add FGP_CREAT_ONLY Paolo Bonzini
2024-02-28  2:14   ` Sean Christopherson
2024-02-28  2:17     ` Yosry Ahmed
2024-02-28 13:15       ` Matthew Wilcox
2024-02-28 13:28         ` Paolo Bonzini
2024-02-28 19:24           ` Matthew Wilcox
2024-02-28 20:17             ` Paolo Bonzini
2024-03-04  2:55           ` Xu Yilun
2024-02-27 23:20 ` Paolo Bonzini [this message]
2024-02-28 20:29   ` [PATCH 18/21] KVM: x86: Add gmem hook for initializing memory Isaku Yamahata
2024-02-27 23:20 ` [PATCH 19/21] KVM: guest_memfd: add API to undo kvm_gmem_get_uninit_pfn Paolo Bonzini
2024-03-04  4:44   ` Xu Yilun
2024-02-27 23:20 ` [PATCH 20/21] KVM: x86: Add gmem hook for invalidating memory Paolo Bonzini
2024-02-27 23:21 ` [PATCH 21/21] KVM: x86: Add gmem hook for determining max NPT mapping level Paolo Bonzini
2024-03-12  0:39   ` Binbin Wu
2024-03-12  0:48   ` Binbin Wu
2024-02-28  1:24 ` [PATCH 00/21] TDX/SNP part 1 of n, for 6.9 Sean Christopherson
2024-02-28 13:29   ` Paolo Bonzini
2024-02-28 16:39     ` Sean Christopherson
2024-02-28 17:20       ` Paolo Bonzini
2024-02-28 18:04         ` Sean Christopherson
2024-02-28  2:11 ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240227232100.478238-19-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=isaku.yamahata@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael.roth@amd.com \
    --cc=seanjc@google.com \
    --cc=thomas.lendacky@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).