[PATCH -next] memregion: Add arch_flush_memregion() interface

* [PATCH -next] memregion: Add arch_flush_memregion() interface
@ 2022-08-29 21:29 Davidlohr Bueso
  2022-08-29 23:02 ` Dan Williams
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Davidlohr Bueso @ 2022-08-29 21:29 UTC (permalink / raw)
  To: dan.j.williams
  Cc: x86, nvdimm, linux-cxl, peterz, bp, akpm, dave.jiang,
	Jonathan.Cameron, vishal.l.verma, ira.weiny, a.manzanares,
	linux-kernel, dave

With CXL security features, global CPU cache flushing nvdimm requirements
are no longer specific to that subsystem, even beyond the scope of
security_ops. CXL will need such semantics for features not necessarily
limited to persistent memory.

The functionality this is enabling is to be able to instantaneously
secure erase potentially terabytes of memory at once and the kernel
needs to be sure that none of the data from before the secure is still
present in the cache. It is also used when unlocking a memory device
where speculative reads and firmware accesses could have cached poison
from before the device was unlocked.

This capability is typically only used once per-boot (for unlock), or
once per bare metal provisioning event (secure erase), like when handing
off the system to another tenant or decommissioning a device.

Users must first call arch_has_flush_memregion() to know whether this
functionality is available on the architecture. Only enable it on x86-64
via the wbinvd() hammer.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---

Changes from v2 (https://lore.kernel.org/all/20220819171024.1766857-1-dave@stgolabs.net/):
- Redid to use memregion based interfaces + VMM check on x86 (Dan)
- Restricted the flushing to x86-64.

Note: Since we still are dealing with a physical "range" at this level,
added the spa range for nfit even though this is unused.

 arch/x86/Kconfig             |  1 +
 arch/x86/mm/pat/set_memory.c | 14 +++++++++++
 drivers/acpi/nfit/intel.c    | 45 ++++++++++++++++++------------------
 include/linux/memregion.h    | 25 ++++++++++++++++++++
 lib/Kconfig                  |  3 +++
 5 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f9920f1341c8..594e6b6a4925 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,6 +81,7 @@ config X86
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MEM_ENCRYPT
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
+	select ARCH_HAS_MEMREGION_INVALIDATE    if X86_64
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_PTE_DEVMAP		if X86_64
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 1abd5438f126..18463cb704fb 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -330,6 +330,20 @@ void arch_invalidate_pmem(void *addr, size_t size)
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 #endif
 
+#ifdef CONFIG_ARCH_HAS_MEMREGION_INVALIDATE
+bool arch_has_flush_memregion(void)
+{
+	return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR);
+}
+EXPORT_SYMBOL(arch_has_flush_memregion);
+
+void arch_flush_memregion(phys_addr_t phys, resource_size_t size)
+{
+	wbinvd_on_all_cpus();
+}
+EXPORT_SYMBOL(arch_flush_memregion);
+#endif
+
 static void __cpa_flush_all(void *arg)
 {
 	unsigned long cache = (unsigned long)arg;
diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c
index 8dd792a55730..32e622f51cde 100644
--- a/drivers/acpi/nfit/intel.c
+++ b/drivers/acpi/nfit/intel.c
@@ -3,6 +3,7 @@
 #include <linux/libnvdimm.h>
 #include <linux/ndctl.h>
 #include <linux/acpi.h>
+#include <linux/memregion.h>
 #include <asm/smp.h>
 #include "intel.h"
 #include "nfit.h"
@@ -190,12 +191,11 @@ static int intel_security_change_key(struct nvdimm *nvdimm,
 	}
 }
 
-static void nvdimm_invalidate_cache(void);
-
 static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
 		const struct nvdimm_key_data *key_data)
 {
 	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+	struct acpi_nfit_system_address *spa = nfit_mem->spa_dcr;
 	struct {
 		struct nd_cmd_pkg pkg;
 		struct nd_intel_unlock_unit cmd;
@@ -213,6 +213,9 @@ static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
 	if (!test_bit(NVDIMM_INTEL_UNLOCK_UNIT, &nfit_mem->dsm_mask))
 		return -ENOTTY;
 
+	if (!arch_has_flush_memregion())
+		return -EINVAL;
+
 	memcpy(nd_cmd.cmd.passphrase, key_data->data,
 			sizeof(nd_cmd.cmd.passphrase));
 	rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
@@ -228,7 +231,7 @@ static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
 	}
 
 	/* DIMM unlocked, invalidate all CPU caches before we read it */
-	nvdimm_invalidate_cache();
+	arch_flush_memregion(spa->address, spa->length);
 
 	return 0;
 }
@@ -279,6 +282,7 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm,
 {
 	int rc;
 	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+	struct acpi_nfit_system_address *spa = nfit_mem->spa_dcr;
 	unsigned int cmd = ptype == NVDIMM_MASTER ?
 		NVDIMM_INTEL_MASTER_SECURE_ERASE : NVDIMM_INTEL_SECURE_ERASE;
 	struct {
@@ -297,8 +301,11 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm,
 	if (!test_bit(cmd, &nfit_mem->dsm_mask))
 		return -ENOTTY;
 
+	if (!arch_has_flush_memregion())
+		return -EINVAL;
+
 	/* flush all cache before we erase DIMM */
-	nvdimm_invalidate_cache();
+	arch_flush_memregion(spa->address, spa->length);
 	memcpy(nd_cmd.cmd.passphrase, key->data,
 			sizeof(nd_cmd.cmd.passphrase));
 	rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
@@ -318,7 +325,7 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm,
 	}
 
 	/* DIMM erased, invalidate all CPU caches before we read it */
-	nvdimm_invalidate_cache();
+	arch_flush_memregion(spa->address, spa->length);
 	return 0;
 }
 
@@ -326,6 +333,7 @@ static int __maybe_unused intel_security_query_overwrite(struct nvdimm *nvdimm)
 {
 	int rc;
 	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+	struct acpi_nfit_system_address *spa = nfit_mem->spa_dcr;
 	struct {
 		struct nd_cmd_pkg pkg;
 		struct nd_intel_query_overwrite cmd;
@@ -341,6 +349,9 @@ static int __maybe_unused intel_security_query_overwrite(struct nvdimm *nvdimm)
 	if (!test_bit(NVDIMM_INTEL_QUERY_OVERWRITE, &nfit_mem->dsm_mask))
 		return -ENOTTY;
 
+	if (!arch_has_flush_memregion())
+		return -EINVAL;
+
 	rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
 	if (rc < 0)
 		return rc;
@@ -355,7 +366,7 @@ static int __maybe_unused intel_security_query_overwrite(struct nvdimm *nvdimm)
 	}
 
 	/* flush all cache before we make the nvdimms available */
-	nvdimm_invalidate_cache();
+	arch_flush_memregion(spa->address, spa->length);
 	return 0;
 }
 
@@ -364,6 +375,7 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
 {
 	int rc;
 	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+	struct acpi_nfit_system_address *spa = nfit_mem->spa_dcr;
 	struct {
 		struct nd_cmd_pkg pkg;
 		struct nd_intel_overwrite cmd;
@@ -380,8 +392,11 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
 	if (!test_bit(NVDIMM_INTEL_OVERWRITE, &nfit_mem->dsm_mask))
 		return -ENOTTY;
 
+	if (!arch_has_flush_memregion())
+		return -EINVAL;
+
 	/* flush all cache before we erase DIMM */
-	nvdimm_invalidate_cache();
+	arch_flush_memregion(spa->address, spa->length);
 	memcpy(nd_cmd.cmd.passphrase, nkey->data,
 			sizeof(nd_cmd.cmd.passphrase));
 	rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
@@ -401,22 +416,6 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
 	}
 }
 
-/*
- * TODO: define a cross arch wbinvd equivalent when/if
- * NVDIMM_FAMILY_INTEL command support arrives on another arch.
- */
-#ifdef CONFIG_X86
-static void nvdimm_invalidate_cache(void)
-{
-	wbinvd_on_all_cpus();
-}
-#else
-static void nvdimm_invalidate_cache(void)
-{
-	WARN_ON_ONCE("cache invalidation required after unlock\n");
-}
-#endif
-
 static const struct nvdimm_security_ops __intel_security_ops = {
 	.get_flags = intel_security_flags,
 	.freeze = intel_security_freeze,
diff --git a/include/linux/memregion.h b/include/linux/memregion.h
index c04c4fd2e209..c35201c0696f 100644
--- a/include/linux/memregion.h
+++ b/include/linux/memregion.h
@@ -20,4 +20,29 @@ static inline void memregion_free(int id)
 {
 }
 #endif
+
+/*
+ * Device memory technologies like NVDIMM and CXL have events like
+ * secure erase and dynamic region provision that can invalidate an
+ * entire physical memory address range at once. Limit that
+ * functionality to architectures that have an efficient way to
+ * writeback and invalidate potentially terabytes of memory at once.
+ *
+ * To ensure this, users must first call arch_has_flush_memregion()
+ * before anything, to verify the operation is feasible.
+ */
+#ifdef CONFIG_ARCH_HAS_MEMREGION_INVALIDATE
+void arch_flush_memregion(phys_addr_t phys, resource_size_t size);
+bool arch_has_flush_memregion(void);
+#else
+static inline bool arch_has_flush_memregion(void)
+{
+       return false;
+}
+static inline void arch_flush_memregion(phys_addr_t phys, resource_size_t size)
+{
+	WARN_ON_ONCE("cache invalidation required");
+}
+#endif
+
 #endif /* _MEMREGION_H_ */
diff --git a/lib/Kconfig b/lib/Kconfig
index dc1ab2ed1dc6..8319e7731e7b 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -662,6 +662,9 @@ config ARCH_HAS_PMEM_API
 config MEMREGION
 	bool
 
+config ARCH_HAS_MEMREGION_INVALIDATE
+       bool
+
 config ARCH_HAS_MEMREMAP_COMPAT_ALIGN
 	bool
 
-- 
2.37.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread