[PATCH v9 4/6] x86/coco: Add cc_decrypted_alloc/free() interfaces

From: Kuppuswamy Sathyanarayanan  <sathyanarayanan.kuppuswamy@linux.intel.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org
Cc: "H . Peter Anvin" <hpa@zytor.com>,
	Kuppuswamy Sathyanarayanan 
	<sathyanarayanan.kuppuswamy@linux.intel.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Tony Luck <tony.luck@intel.com>, Andi Kleen <ak@linux.intel.com>,
	Kai Huang <kai.huang@intel.com>,
	Wander Lairson Costa <wander@redhat.com>,
	Isaku Yamahata <isaku.yamahata@gmail.com>,
	marcelo.cerri@canonical.com, tim.gardner@canonical.com,
	khalid.elmously@canonical.com, philip.cox@canonical.com,
	linux-kernel@vger.kernel.org
Subject: [PATCH v9 4/6] x86/coco: Add cc_decrypted_alloc/free() interfaces
Date: Wed, 27 Jul 2022 20:44:18 -0700	[thread overview]
Message-ID: <20220728034420.648314-5-sathyanarayanan.kuppuswamy@linux.intel.com> (raw)
In-Reply-To: <20220728034420.648314-1-sathyanarayanan.kuppuswamy@linux.intel.com>

Confidential computing platforms, such as AMD SEV and Intel TDX,
protect memory from VMM access. Any memory that is required for
communication with the VMM must be explicitly shared. It involves
adjusting page table entries to indicate that the memory is shared and
notifies VMM about the change.

set_memory_decrypted() converts memory to shared. Before freeing
memory it has to be converted back with set_memory_encrypted().

The interface works fine for long-term allocations, but for frequent
short-lived allocations it causes problems. Conversion takes time and
direct mapping modification leads to its fracturing and performance
degradation over time.

Direct mapping modifications can be avoided by creating a vmap that
maps allocated pages as shared while direct mapping is untouched.

But having private mapping of a shared memory causes problems too.
Any access of such memory via private mapping in TDX guest would
trigger unrecoverable SEPT violation and termination of the virtual
machine. It is known that load_unaligned_zeropad() can issue such
unwanted loads across page boundaries that can trigger the issue.

It can also be fixed by allocating a guard page in front of any memory
that has to be converted to shared, so load_unaligned_zeropad() will
roll off to the guard page instead. But it is wasteful and does not
address cost of the memory conversion.

The next logical step is to introduce a pool of shared memory that can
share a single guard page and makes conversion less frequent.

Fortunately, the kernel already has such a pool of memory: SWIOTLB
buffer is used by the DMA API to allocate memory for I/O. The buffer is
allocated once during the boot, so direct mapping fracturing is not an
issue and no need for vmap tricks.

Tapping into the SWIOTLB pool requires a device structure and using DMA
API. Provide a couple of simple helpers to allocate and free shared
memory that hide required plumbing.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
 arch/x86/coco/Makefile      |  2 +-
 arch/x86/coco/mem.c         | 90 +++++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/coco.h | 10 +++++
 3 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/coco/mem.c

diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile
index c816acf78b6a..96fc4ec4497f 100644
--- a/arch/x86/coco/Makefile
+++ b/arch/x86/coco/Makefile
@@ -3,6 +3,6 @@ CFLAGS_REMOVE_core.o	= -pg
 KASAN_SANITIZE_core.o	:= n
 CFLAGS_core.o		+= -fno-stack-protector
 
-obj-y += core.o
+obj-y += core.o mem.o
 
 obj-$(CONFIG_INTEL_TDX_GUEST)	+= tdx/
diff --git a/arch/x86/coco/mem.c b/arch/x86/coco/mem.c
new file mode 100644
index 000000000000..78bcce11452e
--- /dev/null
+++ b/arch/x86/coco/mem.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Confidential Computing Decrypted Memory Allocator
+ *
+ * Copyright (C) 2022 Intel Corporation, Inc.
+ *
+ */
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "cc/mem: " fmt
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/cc_platform.h>
+#include <linux/set_memory.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
+
+#include <asm/coco.h>
+#include <asm/processor.h>
+
+#define CC_MEM_DRIVER		"ccmem"
+
+static struct platform_device *mem_pdev;
+
+static inline dma_addr_t virt_to_dma(void *vaddr)
+{
+	return phys_to_dma(&mem_pdev->dev, virt_to_phys(vaddr));
+}
+
+/* Allocate decrypted memory of given size */
+void *cc_decrypted_alloc(size_t size, gfp_t gfp)
+{
+	dma_addr_t handle;
+	void *vaddr;
+
+	if (!mem_pdev)
+		return NULL;
+
+	vaddr = dma_alloc_coherent(&mem_pdev->dev, size, &handle, gfp);
+
+	/*
+	 * Since we rely on virt_to_dma() in cc_decrypted_free() to
+	 * calculate DMA address, make sure address translation works.
+	 */
+	VM_BUG_ON(virt_to_dma(vaddr) != handle);
+
+	return vaddr;
+}
+
+/* Free the given decrypted memory */
+void cc_decrypted_free(void *addr, size_t size)
+{
+	if (!mem_pdev || !addr)
+		return;
+
+	dma_free_coherent(&mem_pdev->dev, size, addr, virt_to_phys(addr));
+}
+
+static struct platform_driver cc_mem_driver = {
+	.driver.name = CC_MEM_DRIVER,
+};
+
+static int __init cc_mem_init(void)
+{
+	struct platform_device *pdev;
+	int ret;
+
+	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+		return -ENODEV;
+
+	ret =  platform_driver_register(&cc_mem_driver);
+	if (ret)
+		return ret;
+
+	pdev = platform_device_register_simple(CC_MEM_DRIVER, -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		platform_driver_unregister(&cc_mem_driver);
+		return PTR_ERR(pdev);
+	}
+
+	if (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)))
+		return -EIO;
+
+	mem_pdev = pdev;
+
+	return 0;
+}
+device_initcall(cc_mem_init);
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index 3d98c3a60d34..74e10213289c 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -17,6 +17,8 @@ void cc_set_mask(u64 mask);
 #ifdef CONFIG_ARCH_HAS_CC_PLATFORM
 u64 cc_mkenc(u64 val);
 u64 cc_mkdec(u64 val);
+void *cc_decrypted_alloc(size_t size, gfp_t gfp);
+void cc_decrypted_free(void *addr, size_t size);
 #else
 static inline u64 cc_mkenc(u64 val)
 {
@@ -27,6 +29,14 @@ static inline u64 cc_mkdec(u64 val)
 {
 	return val;
 }
+
+static inline void *cc_decrypted_alloc(size_t size, gfp_t gfp)
+{
+	return NULL;
+}
+
+static inline void cc_decrypted_free(void *addr, size_t size) { }
+
 #endif
 
 #endif /* _ASM_X86_COCO_H */
-- 
2.25.1