Re: [PATCH v6 2/6] x86/tdx: Support vmalloc() for tdx_enc_status_changed()

From: "Kirill A. Shutemov" <kirill@shutemov.name>
To: Dave Hansen <dave.hansen@intel.com>
Cc: kirill.shutemov@linux.intel.com, Dexuan Cui <decui@microsoft.com>,
	ak@linux.intel.com, arnd@arndb.de, bp@alien8.de,
	brijesh.singh@amd.com, dan.j.williams@intel.com,
	dave.hansen@linux.intel.com, haiyangz@microsoft.com,
	hpa@zytor.com, jane.chu@oracle.com, kys@microsoft.com,
	linux-arch@vger.kernel.org, linux-hyperv@vger.kernel.org,
	luto@kernel.org, mingo@redhat.com, peterz@infradead.org,
	rostedt@goodmis.org, sathyanarayanan.kuppuswamy@linux.intel.com,
	seanjc@google.com, tglx@linutronix.de, tony.luck@intel.com,
	wei.liu@kernel.org, x86@kernel.org, mikelley@microsoft.com,
	linux-kernel@vger.kernel.org, Tianyu.Lan@microsoft.com
Subject: Re: [PATCH v6 2/6] x86/tdx: Support vmalloc() for tdx_enc_status_changed()
Date: Thu, 25 May 2023 22:08:12 +0300	[thread overview]
Message-ID: <20230525190812.bz5hg5k3uaibtcys@box> (raw)
In-Reply-To: <20230523232851.a3djqxmpjyfghbvc@box.shutemov.name>

On Wed, May 24, 2023 at 02:28:51AM +0300, kirill.shutemov@linux.intel.com wrote:
> On Tue, May 23, 2023 at 03:43:15PM -0700, Dave Hansen wrote:
> > On 5/23/23 15:37, kirill.shutemov@linux.intel.com wrote:
> > >> How does this work with load_unaligned_zeropad()?  Couldn't it be
> > >> running around poking at one of these vmalloc()'d pages via the direct
> > >> map during a shared->private conversion before the page has been accepted?
> > > Alias processing in __change_page_attr_set_clr() will change direct
> > > mapping if you call it on vmalloc()ed memory. I think we are safe wrt
> > > load_unaligned_zeropad() here.
> > 
> > We're *eventually* OK:
> > 
> > >         /* Notify hypervisor that we are about to set/clr encryption attribute. */
> > >         x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
> > > 
> > >         ret = __change_page_attr_set_clr(&cpa, 1);
> > 
> > But what about in the middle between enc_status_change_prepare() and
> > __change_page_attr_set_clr()?  Don't the direct map and the
> > shared/private status of the page diverge in there?
> 
> Hmm. Maybe we would need to go through making the range in direct mapping
> non-present before notifying VMM about the change.
> 
> I need to look at this again in the morning.

Okay, I've got around to it finally.

Private->Shared conversion is safe: we first set shared bit in the direct
mapping and all aliases and then call MapGPA enc_status_change_finish().
So we don't have privately mapped pages that we converted to shared with
MapGPA.

Shared->Private is not safe. As with Private->Shared, we adjust direct
mapping before notifying VMM and accepting the memory, so there's short
window when privately mapped memory that is neither mapped into SEPT nor
accepted. It is a problem as it can race with load_unaligned_zeropad().

Shared->Private conversion is rare. I only see one call total during the
boot in my setup. Worth fixing anyway.


The patch below fixes the issue by hooking up enc_status_change_prepare()
and doing conversion from Shared to Private there.

enc_status_change_finish() only covers Private to Shared conversion.

The patch is on top of unaccepted memory patchset. It is more convenient
base. I will rebase to Linus' tree if the approach looks sane to you.

Any comments?

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 32501277ef84..b73ec2449c64 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -713,16 +713,32 @@ static bool tdx_cache_flush_required(void)
 	return true;
 }
 
+static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
+					  bool enc)
+{
+	phys_addr_t start = __pa(vaddr);
+	phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
+
+	if (!enc)
+		return true;
+
+	return tdx_enc_status_changed_phys(start, end, enc);
+}
+
 /*
  * Inform the VMM of the guest's intent for this physical page: shared with
  * the VMM or private to the guest.  The VMM is expected to change its mapping
  * of the page in response.
  */
-static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
+static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
+					 bool enc)
 {
 	phys_addr_t start = __pa(vaddr);
 	phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE);
 
+	if (enc)
+		return true;
+
 	return tdx_enc_status_changed_phys(start, end, enc);
 }
 
@@ -753,9 +769,10 @@ void __init tdx_early_init(void)
 	 */
 	physical_mask &= cc_mask - 1;
 
-	x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
-	x86_platform.guest.enc_tlb_flush_required   = tdx_tlb_flush_required;
-	x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
+	x86_platform.guest.enc_cache_flush_required  = tdx_cache_flush_required;
+	x86_platform.guest.enc_tlb_flush_required    = tdx_tlb_flush_required;
+	x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare;
+	x86_platform.guest.enc_status_change_finish  = tdx_enc_status_change_finish;
 
 	pr_info("Guest detected\n");
 }
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 88085f369ff6..1ca9701917c5 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -150,7 +150,7 @@ struct x86_init_acpi {
  * @enc_cache_flush_required	Returns true if a cache flush is needed before changing page encryption status
  */
 struct x86_guest {
-	void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+	bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
 	bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
 	bool (*enc_tlb_flush_required)(bool enc);
 	bool (*enc_cache_flush_required)(void);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index d82f4fa2f1bf..64664311ac2b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -130,8 +130,8 @@ struct x86_cpuinit_ops x86_cpuinit = {
 
 static void default_nmi_init(void) { };
 
-static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { }
-static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
+static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
+static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; }
 static bool enc_tlb_flush_required_noop(bool enc) { return false; }
 static bool enc_cache_flush_required_noop(void) { return false; }
 static bool is_private_mmio_noop(u64 addr) {return false; }
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index e0b51c09109f..4f95c449a406 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -319,7 +319,7 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
 #endif
 }
 
-static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
 {
 	/*
 	 * To maintain the security guarantees of SEV-SNP guests, make sure
@@ -327,6 +327,8 @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool
 	 */
 	if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
 		snp_set_memory_shared(vaddr, npages);
+
+	return true;
 }
 
 /* Return true unconditionally: return value doesn't matter for the SEV side */
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 7159cf787613..b8f48ebe753c 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2151,7 +2151,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
 		cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
 
 	/* Notify hypervisor that we are about to set/clr encryption attribute. */
-	x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
+	if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
+		return -EIO;
 
 	ret = __change_page_attr_set_clr(&cpa, 1);
 
-- 
  Kiryl Shutsemau / Kirill A. Shutemov