All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tom Lendacky <thomas.lendacky@amd.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org,
	linux-arch@vger.kernel.org, linux-efi@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-mm@kvack.org,
	kvm@vger.kernel.org, kasan-dev@googlegroups.com
Cc: "Radim Krčmář" <rkrcmar@redhat.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Matt Fleming" <matt@codeblueprint.co.uk>,
	"Konrad Rzeszutek Wilk" <konrad.wilk@oracle.com>,
	"Andrey Ryabinin" <aryabinin@virtuozzo.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Borislav Petkov" <bp@alien8.de>,
	"Andy Lutomirski" <luto@kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Alexander Potapenko" <glider@google.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Dmitry Vyukov" <dvyukov@google.com>,
	"Rik van Riel" <riel@redhat.com>,
	"Larry Woodman" <lwoodman@redhat.com>,
	"Dave Young" <dyoung@redhat.com>,
	"Toshimitsu Kani" <toshi.kani@hpe.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Brijesh Singh" <brijesh.singh@amd.com>,
	kexec@lists.infradead.org
Subject: [PATCH v10 31/38] x86/mm, kexec: Allow kexec to be used with SME
Date: Mon, 17 Jul 2017 16:10:28 -0500	[thread overview]
Message-ID: <b95ff075db3e7cd545313f2fb609a49619a09625.1500319216.git.thomas.lendacky@amd.com> (raw)
In-Reply-To: <cover.1500319216.git.thomas.lendacky@amd.com>

Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.

Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Cc: <kexec@lists.infradead.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/init.h          |  1 +
 arch/x86/include/asm/kexec.h         |  8 ++++++++
 arch/x86/include/asm/pgtable_types.h |  1 +
 arch/x86/kernel/machine_kexec_64.c   | 22 +++++++++++++++++++++-
 arch/x86/kernel/process.c            | 17 +++++++++++++++--
 arch/x86/mm/ident_map.c              | 12 ++++++++----
 include/linux/kexec.h                |  8 ++++++++
 kernel/kexec_core.c                  | 12 +++++++++++-
 8 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 474eb8c..05c4aa0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -7,6 +7,7 @@ struct x86_mapping_info {
 	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
 	bool direct_gbpages;		 /* PUD level 1GB page support */
+	unsigned long kernpg_flag;	 /* kernel pagetable flag override */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205..e8183ac 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,6 +207,14 @@ struct kexec_entry64_regs {
 	uint64_t r15;
 	uint64_t rip;
 };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+				       gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 32095af..830992f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -213,6 +213,7 @@ enum page_cache_mode {
 #define PAGE_KERNEL		__pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC	__pgprot(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX		__pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
 #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cb0a304..9cf8daa 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
 	return 0;
 err:
 	free_transition_pgtable(image);
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
 		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.kernpg_flag	= _KERNPG_TABLE_NOENC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
@@ -602,3 +603,22 @@ void arch_kexec_unprotect_crashkres(void)
 {
 	kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+	/*
+	 * If SME is active we need to be sure that kexec pages are
+	 * not encrypted because when we boot to the new kernel the
+	 * pages won't be accessed encrypted (initially).
+	 */
+	return set_memory_decrypted((unsigned long)vaddr, pages);
+}
+
+void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
+{
+	/*
+	 * If SME is active we need to reset the pages back to being
+	 * an encrypted mapping before freeing them.
+	 */
+	set_memory_encrypted((unsigned long)vaddr, pages);
+}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca1980..bd6b85f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -355,6 +355,7 @@ bool xen_set_default_idle(void)
 	return ret;
 }
 #endif
+
 void stop_this_cpu(void *dummy)
 {
 	local_irq_disable();
@@ -365,8 +366,20 @@ void stop_this_cpu(void *dummy)
 	disable_local_APIC();
 	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 
-	for (;;)
-		halt();
+	for (;;) {
+		/*
+		 * Use wbinvd followed by hlt to stop the processor. This
+		 * provides support for kexec on a processor that supports
+		 * SME. With kexec, going from SME inactive to SME active
+		 * requires clearing cache entries so that addresses without
+		 * the encryption bit set don't corrupt the same physical
+		 * address that has the encryption bit set when caches are
+		 * flushed. To achieve this a wbinvd is performed followed by
+		 * a hlt. Even if the processor is not in the kexec/SME
+		 * scenario this only adds a wbinvd to a halting processor.
+		 */
+		asm volatile("wbinvd; hlt" : : : "memory");
+	}
 }
 
 /*
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index adab159..31cea98 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -51,7 +51,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (!pmd)
 			return -ENOMEM;
 		ident_pmd_init(info, pmd, addr, next);
-		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -79,7 +79,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
 		if (!pud)
 			return -ENOMEM;
 		ident_pud_init(info, pud, addr, next);
-		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+		set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -93,6 +93,10 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	unsigned long next;
 	int result;
 
+	/* Set the default pagetable flags if not supplied */
+	if (!info->kernpg_flag)
+		info->kernpg_flag = _KERNPG_TABLE;
+
 	for (; addr < end; addr = next) {
 		pgd_t *pgd = pgd_page + pgd_index(addr);
 		p4d_t *p4d;
@@ -116,14 +120,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 		if (result)
 			return result;
 		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
 		} else {
 			/*
 			 * With p4d folded, pgd is equal to p4d.
 			 * The pgd entry has to point to the pud page table in this case.
 			 */
 			pud_t *pud = pud_offset(p4d, 0);
-			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
 		}
 	}
 
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index dd056fa..2b7590f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -327,6 +327,14 @@ static inline void *boot_phys_to_virt(unsigned long entry)
 	return phys_to_virt(boot_phys_to_phys(entry));
 }
 
+#ifndef arch_kexec_post_alloc_pages
+static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { return 0; }
+#endif
+
+#ifndef arch_kexec_pre_free_pages
+static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
+#endif
+
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 1ae7c41..20fef1a 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -301,7 +301,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page *pages;
 
-	pages = alloc_pages(gfp_mask, order);
+	pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
 	if (pages) {
 		unsigned int count, i;
 
@@ -310,6 +310,13 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 		count = 1 << order;
 		for (i = 0; i < count; i++)
 			SetPageReserved(pages + i);
+
+		arch_kexec_post_alloc_pages(page_address(pages), count,
+					    gfp_mask);
+
+		if (gfp_mask & __GFP_ZERO)
+			for (i = 0; i < count; i++)
+				clear_highpage(pages + i);
 	}
 
 	return pages;
@@ -321,6 +328,9 @@ static void kimage_free_pages(struct page *page)
 
 	order = page_private(page);
 	count = 1 << order;
+
+	arch_kexec_pre_free_pages(page_address(page), count);
+
 	for (i = 0; i < count; i++)
 		ClearPageReserved(page + i);
 	__free_pages(page, order);
-- 
1.9.1

WARNING: multiple messages have this Message-ID (diff)
From: Tom Lendacky <thomas.lendacky@amd.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org,
	linux-arch@vger.kernel.org, linux-efi@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-mm@kvack.org,
	kvm@vger.kernel.org, kasan-dev@googlegroups.com
Cc: "Radim Krčmář" <rkrcmar@redhat.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Matt Fleming" <matt@codeblueprint.co.uk>,
	"Konrad Rzeszutek Wilk" <konrad.wilk@oracle.com>,
	"Andrey Ryabinin" <aryabinin@virtuozzo.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Borislav Petkov" <bp@alien8.de>,
	"Andy Lutomirski" <luto@kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Alexander Potapenko" <glider@google.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Dmitry Vyukov" <dvyukov@google.com>,
	"Rik van Riel" <riel@redhat.com>,
	"Larry Woodman" <lwoodman@redhat.com>,
	"Dave Young" <dyoung@redhat.com>,
	"Toshimitsu Kani" <toshi.kani@hpe.com>,
	"Michael S. Tsirkin" <mst@redhat.com>
Subject: [PATCH v10 31/38] x86/mm, kexec: Allow kexec to be used with SME
Date: Mon, 17 Jul 2017 16:10:28 -0500	[thread overview]
Message-ID: <b95ff075db3e7cd545313f2fb609a49619a09625.1500319216.git.thomas.lendacky@amd.com> (raw)
In-Reply-To: <cover.1500319216.git.thomas.lendacky@amd.com>

Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.

Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Cc: <kexec@lists.infradead.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/init.h          |  1 +
 arch/x86/include/asm/kexec.h         |  8 ++++++++
 arch/x86/include/asm/pgtable_types.h |  1 +
 arch/x86/kernel/machine_kexec_64.c   | 22 +++++++++++++++++++++-
 arch/x86/kernel/process.c            | 17 +++++++++++++++--
 arch/x86/mm/ident_map.c              | 12 ++++++++----
 include/linux/kexec.h                |  8 ++++++++
 kernel/kexec_core.c                  | 12 +++++++++++-
 8 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 474eb8c..05c4aa0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -7,6 +7,7 @@ struct x86_mapping_info {
 	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
 	bool direct_gbpages;		 /* PUD level 1GB page support */
+	unsigned long kernpg_flag;	 /* kernel pagetable flag override */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205..e8183ac 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,6 +207,14 @@ struct kexec_entry64_regs {
 	uint64_t r15;
 	uint64_t rip;
 };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+				       gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 32095af..830992f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -213,6 +213,7 @@ enum page_cache_mode {
 #define PAGE_KERNEL		__pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC	__pgprot(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX		__pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
 #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cb0a304..9cf8daa 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
 	return 0;
 err:
 	free_transition_pgtable(image);
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
 		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.kernpg_flag	= _KERNPG_TABLE_NOENC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
@@ -602,3 +603,22 @@ void arch_kexec_unprotect_crashkres(void)
 {
 	kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+	/*
+	 * If SME is active we need to be sure that kexec pages are
+	 * not encrypted because when we boot to the new kernel the
+	 * pages won't be accessed encrypted (initially).
+	 */
+	return set_memory_decrypted((unsigned long)vaddr, pages);
+}
+
+void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
+{
+	/*
+	 * If SME is active we need to reset the pages back to being
+	 * an encrypted mapping before freeing them.
+	 */
+	set_memory_encrypted((unsigned long)vaddr, pages);
+}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca1980..bd6b85f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -355,6 +355,7 @@ bool xen_set_default_idle(void)
 	return ret;
 }
 #endif
+
 void stop_this_cpu(void *dummy)
 {
 	local_irq_disable();
@@ -365,8 +366,20 @@ void stop_this_cpu(void *dummy)
 	disable_local_APIC();
 	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 
-	for (;;)
-		halt();
+	for (;;) {
+		/*
+		 * Use wbinvd followed by hlt to stop the processor. This
+		 * provides support for kexec on a processor that supports
+		 * SME. With kexec, going from SME inactive to SME active
+		 * requires clearing cache entries so that addresses without
+		 * the encryption bit set don't corrupt the same physical
+		 * address that has the encryption bit set when caches are
+		 * flushed. To achieve this a wbinvd is performed followed by
+		 * a hlt. Even if the processor is not in the kexec/SME
+		 * scenario this only adds a wbinvd to a halting processor.
+		 */
+		asm volatile("wbinvd; hlt" : : : "memory");
+	}
 }
 
 /*
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index adab159..31cea98 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -51,7 +51,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (!pmd)
 			return -ENOMEM;
 		ident_pmd_init(info, pmd, addr, next);
-		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -79,7 +79,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
 		if (!pud)
 			return -ENOMEM;
 		ident_pud_init(info, pud, addr, next);
-		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+		set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -93,6 +93,10 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	unsigned long next;
 	int result;
 
+	/* Set the default pagetable flags if not supplied */
+	if (!info->kernpg_flag)
+		info->kernpg_flag = _KERNPG_TABLE;
+
 	for (; addr < end; addr = next) {
 		pgd_t *pgd = pgd_page + pgd_index(addr);
 		p4d_t *p4d;
@@ -116,14 +120,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 		if (result)
 			return result;
 		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
 		} else {
 			/*
 			 * With p4d folded, pgd is equal to p4d.
 			 * The pgd entry has to point to the pud page table in this case.
 			 */
 			pud_t *pud = pud_offset(p4d, 0);
-			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
 		}
 	}
 
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index dd056fa..2b7590f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -327,6 +327,14 @@ static inline void *boot_phys_to_virt(unsigned long entry)
 	return phys_to_virt(boot_phys_to_phys(entry));
 }
 
+#ifndef arch_kexec_post_alloc_pages
+static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { return 0; }
+#endif
+
+#ifndef arch_kexec_pre_free_pages
+static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
+#endif
+
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 1ae7c41..20fef1a 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -301,7 +301,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page *pages;
 
-	pages = alloc_pages(gfp_mask, order);
+	pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
 	if (pages) {
 		unsigned int count, i;
 
@@ -310,6 +310,13 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 		count = 1 << order;
 		for (i = 0; i < count; i++)
 			SetPageReserved(pages + i);
+
+		arch_kexec_post_alloc_pages(page_address(pages), count,
+					    gfp_mask);
+
+		if (gfp_mask & __GFP_ZERO)
+			for (i = 0; i < count; i++)
+				clear_highpage(pages + i);
 	}
 
 	return pages;
@@ -321,6 +328,9 @@ static void kimage_free_pages(struct page *page)
 
 	order = page_private(page);
 	count = 1 << order;
+
+	arch_kexec_pre_free_pages(page_address(page), count);
+
 	for (i = 0; i < count; i++)
 		ClearPageReserved(page + i);
 	__free_pages(page, order);
-- 
1.9.1

WARNING: multiple messages have this Message-ID (diff)
From: Tom Lendacky <thomas.lendacky@amd.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org,
	linux-arch@vger.kernel.org, linux-efi@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-mm@kvack.org,
	kvm@vger.kernel.org, kasan-dev@googlegroups.com
Cc: "Radim Krčmář" <rkrcmar@redhat.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Matt Fleming" <matt@codeblueprint.co.uk>,
	"Konrad Rzeszutek Wilk" <konrad.wilk@oracle.com>,
	"Andrey Ryabinin" <aryabinin@virtuozzo.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Borislav Petkov" <bp@alien8.de>,
	"Andy Lutomirski" <luto@kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Alexander Potapenko" <glider@google.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Dmitry Vyukov" <dvyukov@google.com>,
	"Rik van Riel" <riel@redhat.com>,
	"Larry Woodman" <lwoodman@redhat.com>,
	"Dave Young" <dyoung@redhat.com>,
	"Toshimitsu Kani" <toshi.kani@hpe.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Brijesh Singh" <brijesh.singh@amd.com>,
	kexec@lists.infradead.org
Subject: [PATCH v10 31/38] x86/mm, kexec: Allow kexec to be used with SME
Date: Mon, 17 Jul 2017 16:10:28 -0500	[thread overview]
Message-ID: <b95ff075db3e7cd545313f2fb609a49619a09625.1500319216.git.thomas.lendacky@amd.com> (raw)
In-Reply-To: <cover.1500319216.git.thomas.lendacky@amd.com>

Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.

Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Cc: <kexec@lists.infradead.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/init.h          |  1 +
 arch/x86/include/asm/kexec.h         |  8 ++++++++
 arch/x86/include/asm/pgtable_types.h |  1 +
 arch/x86/kernel/machine_kexec_64.c   | 22 +++++++++++++++++++++-
 arch/x86/kernel/process.c            | 17 +++++++++++++++--
 arch/x86/mm/ident_map.c              | 12 ++++++++----
 include/linux/kexec.h                |  8 ++++++++
 kernel/kexec_core.c                  | 12 +++++++++++-
 8 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 474eb8c..05c4aa0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -7,6 +7,7 @@ struct x86_mapping_info {
 	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
 	bool direct_gbpages;		 /* PUD level 1GB page support */
+	unsigned long kernpg_flag;	 /* kernel pagetable flag override */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205..e8183ac 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,6 +207,14 @@ struct kexec_entry64_regs {
 	uint64_t r15;
 	uint64_t rip;
 };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+				       gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 32095af..830992f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -213,6 +213,7 @@ enum page_cache_mode {
 #define PAGE_KERNEL		__pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC	__pgprot(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX		__pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
 #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cb0a304..9cf8daa 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
 	return 0;
 err:
 	free_transition_pgtable(image);
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
 		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.kernpg_flag	= _KERNPG_TABLE_NOENC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
@@ -602,3 +603,22 @@ void arch_kexec_unprotect_crashkres(void)
 {
 	kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+	/*
+	 * If SME is active we need to be sure that kexec pages are
+	 * not encrypted because when we boot to the new kernel the
+	 * pages won't be accessed encrypted (initially).
+	 */
+	return set_memory_decrypted((unsigned long)vaddr, pages);
+}
+
+void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
+{
+	/*
+	 * If SME is active we need to reset the pages back to being
+	 * an encrypted mapping before freeing them.
+	 */
+	set_memory_encrypted((unsigned long)vaddr, pages);
+}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca1980..bd6b85f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -355,6 +355,7 @@ bool xen_set_default_idle(void)
 	return ret;
 }
 #endif
+
 void stop_this_cpu(void *dummy)
 {
 	local_irq_disable();
@@ -365,8 +366,20 @@ void stop_this_cpu(void *dummy)
 	disable_local_APIC();
 	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 
-	for (;;)
-		halt();
+	for (;;) {
+		/*
+		 * Use wbinvd followed by hlt to stop the processor. This
+		 * provides support for kexec on a processor that supports
+		 * SME. With kexec, going from SME inactive to SME active
+		 * requires clearing cache entries so that addresses without
+		 * the encryption bit set don't corrupt the same physical
+		 * address that has the encryption bit set when caches are
+		 * flushed. To achieve this a wbinvd is performed followed by
+		 * a hlt. Even if the processor is not in the kexec/SME
+		 * scenario this only adds a wbinvd to a halting processor.
+		 */
+		asm volatile("wbinvd; hlt" : : : "memory");
+	}
 }
 
 /*
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index adab159..31cea98 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -51,7 +51,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (!pmd)
 			return -ENOMEM;
 		ident_pmd_init(info, pmd, addr, next);
-		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -79,7 +79,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
 		if (!pud)
 			return -ENOMEM;
 		ident_pud_init(info, pud, addr, next);
-		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+		set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -93,6 +93,10 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	unsigned long next;
 	int result;
 
+	/* Set the default pagetable flags if not supplied */
+	if (!info->kernpg_flag)
+		info->kernpg_flag = _KERNPG_TABLE;
+
 	for (; addr < end; addr = next) {
 		pgd_t *pgd = pgd_page + pgd_index(addr);
 		p4d_t *p4d;
@@ -116,14 +120,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 		if (result)
 			return result;
 		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
 		} else {
 			/*
 			 * With p4d folded, pgd is equal to p4d.
 			 * The pgd entry has to point to the pud page table in this case.
 			 */
 			pud_t *pud = pud_offset(p4d, 0);
-			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
 		}
 	}
 
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index dd056fa..2b7590f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -327,6 +327,14 @@ static inline void *boot_phys_to_virt(unsigned long entry)
 	return phys_to_virt(boot_phys_to_phys(entry));
 }
 
+#ifndef arch_kexec_post_alloc_pages
+static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { return 0; }
+#endif
+
+#ifndef arch_kexec_pre_free_pages
+static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
+#endif
+
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 1ae7c41..20fef1a 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -301,7 +301,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page *pages;
 
-	pages = alloc_pages(gfp_mask, order);
+	pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
 	if (pages) {
 		unsigned int count, i;
 
@@ -310,6 +310,13 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 		count = 1 << order;
 		for (i = 0; i < count; i++)
 			SetPageReserved(pages + i);
+
+		arch_kexec_post_alloc_pages(page_address(pages), count,
+					    gfp_mask);
+
+		if (gfp_mask & __GFP_ZERO)
+			for (i = 0; i < count; i++)
+				clear_highpage(pages + i);
 	}
 
 	return pages;
@@ -321,6 +328,9 @@ static void kimage_free_pages(struct page *page)
 
 	order = page_private(page);
 	count = 1 << order;
+
+	arch_kexec_pre_free_pages(page_address(page), count);
+
 	for (i = 0; i < count; i++)
 		ClearPageReserved(page + i);
 	__free_pages(page, order);
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Tom Lendacky <thomas.lendacky@amd.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org,
	linux-arch@vger.kernel.org, linux-efi@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-mm@kvack.org,
	kvm@vger.kernel.org, kasan-dev@googlegroups.com
Cc: "Rik van Riel" <riel@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Toshimitsu Kani" <toshi.kani@hpe.com>,
	"Arnd Bergmann" <arnd@arndb.de>,
	"Konrad Rzeszutek Wilk" <konrad.wilk@oracle.com>,
	"Matt Fleming" <matt@codeblueprint.co.uk>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Jonathan Corbet" <corbet@lwn.net>,
	kexec@lists.infradead.org, "Paolo Bonzini" <pbonzini@redhat.com>,
	"Larry Woodman" <lwoodman@redhat.com>,
	"Brijesh Singh" <brijesh.singh@amd.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Borislav Petkov" <bp@alien8.de>,
	"Andy Lutomirski" <luto@kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Andrey Ryabinin" <aryabinin@virtuozzo.com>,
	"Alexander Potapenko" <glider@google.com>,
	"Dave Young" <dyoung@redhat.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Dmitry Vyukov" <dvyukov@google.com>
Subject: [PATCH v10 31/38] x86/mm, kexec: Allow kexec to be used with SME
Date: Mon, 17 Jul 2017 16:10:28 -0500	[thread overview]
Message-ID: <b95ff075db3e7cd545313f2fb609a49619a09625.1500319216.git.thomas.lendacky@amd.com> (raw)
In-Reply-To: <cover.1500319216.git.thomas.lendacky@amd.com>

Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.

Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Cc: <kexec@lists.infradead.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/init.h          |  1 +
 arch/x86/include/asm/kexec.h         |  8 ++++++++
 arch/x86/include/asm/pgtable_types.h |  1 +
 arch/x86/kernel/machine_kexec_64.c   | 22 +++++++++++++++++++++-
 arch/x86/kernel/process.c            | 17 +++++++++++++++--
 arch/x86/mm/ident_map.c              | 12 ++++++++----
 include/linux/kexec.h                |  8 ++++++++
 kernel/kexec_core.c                  | 12 +++++++++++-
 8 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 474eb8c..05c4aa0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -7,6 +7,7 @@ struct x86_mapping_info {
 	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
 	bool direct_gbpages;		 /* PUD level 1GB page support */
+	unsigned long kernpg_flag;	 /* kernel pagetable flag override */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205..e8183ac 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,6 +207,14 @@ struct kexec_entry64_regs {
 	uint64_t r15;
 	uint64_t rip;
 };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+				       gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 32095af..830992f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -213,6 +213,7 @@ enum page_cache_mode {
 #define PAGE_KERNEL		__pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC	__pgprot(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX		__pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
 #define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cb0a304..9cf8daa 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
 	return 0;
 err:
 	free_transition_pgtable(image);
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 		.alloc_pgt_page	= alloc_pgt_page,
 		.context	= image,
 		.page_flag	= __PAGE_KERNEL_LARGE_EXEC,
+		.kernpg_flag	= _KERNPG_TABLE_NOENC,
 	};
 	unsigned long mstart, mend;
 	pgd_t *level4p;
@@ -602,3 +603,22 @@ void arch_kexec_unprotect_crashkres(void)
 {
 	kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+	/*
+	 * If SME is active we need to be sure that kexec pages are
+	 * not encrypted because when we boot to the new kernel the
+	 * pages won't be accessed encrypted (initially).
+	 */
+	return set_memory_decrypted((unsigned long)vaddr, pages);
+}
+
+void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
+{
+	/*
+	 * If SME is active we need to reset the pages back to being
+	 * an encrypted mapping before freeing them.
+	 */
+	set_memory_encrypted((unsigned long)vaddr, pages);
+}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ca1980..bd6b85f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -355,6 +355,7 @@ bool xen_set_default_idle(void)
 	return ret;
 }
 #endif
+
 void stop_this_cpu(void *dummy)
 {
 	local_irq_disable();
@@ -365,8 +366,20 @@ void stop_this_cpu(void *dummy)
 	disable_local_APIC();
 	mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 
-	for (;;)
-		halt();
+	for (;;) {
+		/*
+		 * Use wbinvd followed by hlt to stop the processor. This
+		 * provides support for kexec on a processor that supports
+		 * SME. With kexec, going from SME inactive to SME active
+		 * requires clearing cache entries so that addresses without
+		 * the encryption bit set don't corrupt the same physical
+		 * address that has the encryption bit set when caches are
+		 * flushed. To achieve this a wbinvd is performed followed by
+		 * a hlt. Even if the processor is not in the kexec/SME
+		 * scenario this only adds a wbinvd to a halting processor.
+		 */
+		asm volatile("wbinvd; hlt" : : : "memory");
+	}
 }
 
 /*
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index adab159..31cea98 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -51,7 +51,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
 		if (!pmd)
 			return -ENOMEM;
 		ident_pmd_init(info, pmd, addr, next);
-		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -79,7 +79,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
 		if (!pud)
 			return -ENOMEM;
 		ident_pud_init(info, pud, addr, next);
-		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+		set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
 	}
 
 	return 0;
@@ -93,6 +93,10 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	unsigned long next;
 	int result;
 
+	/* Set the default pagetable flags if not supplied */
+	if (!info->kernpg_flag)
+		info->kernpg_flag = _KERNPG_TABLE;
+
 	for (; addr < end; addr = next) {
 		pgd_t *pgd = pgd_page + pgd_index(addr);
 		p4d_t *p4d;
@@ -116,14 +120,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 		if (result)
 			return result;
 		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
 		} else {
 			/*
 			 * With p4d folded, pgd is equal to p4d.
 			 * The pgd entry has to point to the pud page table in this case.
 			 */
 			pud_t *pud = pud_offset(p4d, 0);
-			set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+			set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
 		}
 	}
 
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index dd056fa..2b7590f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -327,6 +327,14 @@ static inline void *boot_phys_to_virt(unsigned long entry)
 	return phys_to_virt(boot_phys_to_phys(entry));
 }
 
+#ifndef arch_kexec_post_alloc_pages
+static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { return 0; }
+#endif
+
+#ifndef arch_kexec_pre_free_pages
+static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
+#endif
+
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 1ae7c41..20fef1a 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -301,7 +301,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
 	struct page *pages;
 
-	pages = alloc_pages(gfp_mask, order);
+	pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
 	if (pages) {
 		unsigned int count, i;
 
@@ -310,6 +310,13 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 		count = 1 << order;
 		for (i = 0; i < count; i++)
 			SetPageReserved(pages + i);
+
+		arch_kexec_post_alloc_pages(page_address(pages), count,
+					    gfp_mask);
+
+		if (gfp_mask & __GFP_ZERO)
+			for (i = 0; i < count; i++)
+				clear_highpage(pages + i);
 	}
 
 	return pages;
@@ -321,6 +328,9 @@ static void kimage_free_pages(struct page *page)
 
 	order = page_private(page);
 	count = 1 << order;
+
+	arch_kexec_pre_free_pages(page_address(page), count);
+
 	for (i = 0; i < count; i++)
 		ClearPageReserved(page + i);
 	__free_pages(page, order);
-- 
1.9.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

  parent reply	other threads:[~2017-07-17 21:13 UTC|newest]

Thread overview: 163+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-17 21:09 [PATCH v10 00/38] x86: Secure Memory Encryption (AMD) Tom Lendacky
2017-07-17 21:09 ` Tom Lendacky
2017-07-17 21:09 ` Tom Lendacky
2017-07-17 21:09 ` Tom Lendacky
2017-07-17 21:09 ` [PATCH v10 01/38] x86: Document AMD Secure Memory Encryption (SME) Tom Lendacky
2017-07-17 21:09   ` Tom Lendacky
2017-07-18 10:47   ` [tip:x86/mm] x86/cpu/AMD: " tip-bot for Tom Lendacky
2017-07-17 21:09 ` [PATCH v10 02/38] x86/mm/pat: Set write-protect cache mode for full PAT support Tom Lendacky
2017-07-17 21:09   ` Tom Lendacky
2017-07-18 10:47   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 03/38] x86, mpparse, x86/acpi, x86/PCI, x86/dmi, SFI: Use memremap for RAM mappings Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:48   ` [tip:x86/mm] x86, mpparse, x86/acpi, x86/PCI, x86/dmi, SFI: Use memremap() " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 04/38] x86/CPU/AMD: Add the Secure Memory Encryption CPU feature Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:48   ` [tip:x86/mm] x86/cpu/AMD: " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 05/38] x86/CPU/AMD: Handle SME reduction in physical address size Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:48   ` [tip:x86/mm] x86/cpu/AMD: " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 06/38] x86/mm: Add Secure Memory Encryption (SME) support Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:49   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 07/38] x86/mm: Remove phys_to_virt() usage in ioremap() Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:49   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 08/38] x86/mm: Add support to enable SME in early boot processing Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:50   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 09/38] x86/mm: Simplify p[g4um]d_page() macros Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:50   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 10/38] x86/mm: Provide general kernel support for memory encryption Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:50   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 11/38] x86/mm: Add SME support for read_cr3_pa() Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:51   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 12/38] x86/mm: Extend early_memremap() support with additional attrs Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:51   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 13/38] x86/mm: Add support for early encrypt/decrypt of memory Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:51   ` [tip:x86/mm] x86/mm: Add support for early encryption/decryption " tip-bot for Tom Lendacky
2017-10-25 17:34     ` Dave Hansen
2017-10-26 13:05       ` Tom Lendacky
2017-10-26 14:08         ` Dave Hansen
2017-07-17 21:10 ` [PATCH v10 14/38] x86/mm: Insure that boot memory areas are mapped properly Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:52   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 15/38] x86/boot/e820: Add support to determine the E820 type of an address Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:52   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 16/38] efi: Add an EFI table address match function Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:53   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 17/38] efi: Update efi_mem_type() to return an error rather than 0 Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:53   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 18/38] x86/efi: Update EFI pagetable creation to work with SME Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:53   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 19/38] x86/mm: Add support to access boot related data in the clear Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:54   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 20/38] x86, mpparse: Use memremap to map the mpf and mpc data Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:54   ` [tip:x86/mm] x86/boot: Use memremap() to map the MPF and MPC data tip-bot for Tom Lendacky
2017-07-20  9:23     ` [PATCH] x86/boot: Fix memremap() related build failure Ingo Molnar
2017-07-20 10:27       ` [tip:x86/mm] " tip-bot for Ingo Molnar
2017-11-03 15:12   ` [PATCH v10 20/38] x86, mpparse: Use memremap to map the mpf and mpc data Tomeu Vizoso
2017-11-03 15:12     ` Tomeu Vizoso
2017-11-03 15:12     ` Tomeu Vizoso
2017-11-03 15:31     ` Tom Lendacky
2017-11-03 15:31       ` Tom Lendacky
2017-11-03 15:31       ` Tom Lendacky
2017-11-06  7:13       ` Tomeu Vizoso
2017-11-06  7:13         ` Tomeu Vizoso
2017-11-06  7:13         ` Tomeu Vizoso
2017-07-17 21:10 ` [PATCH v10 21/38] x86/mm: Add support to access persistent memory in the clear Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:54   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 22/38] x86/mm: Add support for changing the memory encryption attribute Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:55   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 23/38] x86/realmode: Decrypt trampoline area if memory encryption is active Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:55   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 24/38] x86, swiotlb: Add memory encryption support Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:56   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 25/38] swiotlb: Add warnings for use of bounce buffers with SME Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:56   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 26/38] x86/CPU/AMD: Make the microcode level available earlier in the boot Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:56   ` [tip:x86/mm] x86/cpu/AMD: " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 27/38] iommu/amd: Allow the AMD IOMMU to work with memory encryption Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:57   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-18 10:57     ` tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 28/38] x86, realmode: Check for memory encryption on the APs Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:57   ` [tip:x86/mm] x86/boot/realmode: " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 29/38] x86, drm, fbdev: Do not specify encrypted memory for video mappings Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:57   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 30/38] kvm: x86: svm: Support Secure Memory Encryption within KVM Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:58   ` [tip:x86/mm] kvm/x86/svm: " tip-bot for Tom Lendacky
2017-07-17 21:10 ` Tom Lendacky [this message]
2017-07-17 21:10   ` [PATCH v10 31/38] x86/mm, kexec: Allow kexec to be used with SME Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:58   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-18 10:58     ` tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 32/38] xen/x86: Remove SME feature in PV guests Tom Lendacky
2017-07-17 21:10 ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:58   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-18 10:58     ` tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 33/38] x86/mm: Use proper encryption attributes with /dev/mem Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:59   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 34/38] x86/mm: Create native_make_p4d() for PGTABLE_LEVELS <= 4 Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 10:59   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 35/38] x86/mm: Add support to encrypt the kernel in-place Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 11:00   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 36/38] x86/boot: Add early cmdline parsing for options with arguments Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 11:00   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 37/38] compiler-gcc.h: Introduce __nostackp function attribute Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18  9:36   ` Ingo Molnar
2017-07-18  9:36     ` Ingo Molnar
2017-07-18  9:36     ` Ingo Molnar
2017-07-18 13:56     ` Tom Lendacky
2017-07-18 13:56       ` Tom Lendacky
2017-07-18 13:56       ` Tom Lendacky
2017-07-18 11:00   ` [tip:x86/mm] compiler-gcc.h: Introduce __nostackprotector " tip-bot for Tom Lendacky
2017-07-18 18:27   ` tip-bot for Tom Lendacky
2017-07-17 21:10 ` [PATCH v10 38/38] x86/mm: Add support to make use of Secure Memory Encryption Tom Lendacky
2017-07-17 21:10   ` Tom Lendacky
2017-07-18 11:01   ` [tip:x86/mm] " tip-bot for Tom Lendacky
2017-07-18 18:28   ` tip-bot for Tom Lendacky
2017-07-18 12:03 ` [PATCH v10 00/38] x86: Secure Memory Encryption (AMD) Thomas Gleixner
2017-07-18 12:03   ` Thomas Gleixner
2017-07-18 12:03   ` Thomas Gleixner
2017-07-18 12:03   ` Thomas Gleixner
2017-07-18 14:02   ` Tom Lendacky
2017-07-18 14:02   ` Tom Lendacky
2017-07-18 14:02     ` Tom Lendacky
2017-07-18 14:02     ` Tom Lendacky
2017-07-18 14:02     ` Tom Lendacky
2017-07-18 12:03 ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b95ff075db3e7cd545313f2fb609a49619a09625.1500319216.git.thomas.lendacky@amd.com \
    --to=thomas.lendacky@amd.com \
    --cc=arnd@arndb.de \
    --cc=aryabinin@virtuozzo.com \
    --cc=bp@alien8.de \
    --cc=brijesh.singh@amd.com \
    --cc=corbet@lwn.net \
    --cc=dvyukov@google.com \
    --cc=dyoung@redhat.com \
    --cc=glider@google.com \
    --cc=hpa@zytor.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=kexec@lists.infradead.org \
    --cc=konrad.wilk@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=lwoodman@redhat.com \
    --cc=matt@codeblueprint.co.uk \
    --cc=mingo@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=riel@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=toshi.kani@hpe.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.