* [RFC Part1 PATCH v3 02/17] x86/CPU/AMD: Add the Secure Encrypted Virtualization CPU feature
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
[not found] ` <20170724190757.11278-3-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 03/17] x86/mm: Secure Encrypted Virtualization (SEV) support Brijesh Singh
` (8 subsequent siblings)
9 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky@amd.com>
Update the CPU features to include identifying and reporting on the
Secure Encrypted Virtualization (SEV) feature. SME is identified by
CPUID 0x8000001f, but requires BIOS support to enable it (set bit 23 of
MSR_K8_SYSCFG and set bit 0 of MSR_K7_HWCR). Only show the SEV feature
as available if reported by CPUID and enabled by BIOS.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/msr-index.h | 2 ++
arch/x86/kernel/cpu/amd.c | 30 +++++++++++++++++++++++++-----
arch/x86/kernel/cpu/scattered.c | 1 +
4 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 14f0f29..b6ae647 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,6 +197,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV ( 7*32+11) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 17f5c12..e399d68 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -378,6 +378,8 @@
#define MSR_K7_PERFCTR3 0xc0010007
#define MSR_K7_CLK_CTL 0xc001001b
#define MSR_K7_HWCR 0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT 0
+#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 110ca5d..c413f04 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -618,11 +618,16 @@ static void early_init_amd(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_E400);
/*
- * BIOS support is required for SME. If BIOS has enabled SME then
- * adjust x86_phys_bits by the SME physical address space reduction
- * value. If BIOS has not enabled SME then don't advertise the
- * feature (set in scattered.c). Also, since the SME support requires
- * long mode, don't advertise the feature under CONFIG_X86_32.
+ * BIOS support is required for SME and SEV.
+ * For SME: If BIOS has enabled SME then adjust x86_phys_bits by
+ * the SME physical address space reduction value.
+ * If BIOS has not enabled SME then don't advertise the
+ * SME feature (set in scattered.c).
+ * For SEV: If BIOS has not enabled SEV then don't advertise the
+ * SEV feature (set in scattered.c).
+ *
+ * In all cases, since support for SME and SEV requires long mode,
+ * don't advertise the feature under CONFIG_X86_32.
*/
if (cpu_has(c, X86_FEATURE_SME)) {
u64 msr;
@@ -637,6 +642,21 @@ static void early_init_amd(struct cpuinfo_x86 *c)
clear_cpu_cap(c, X86_FEATURE_SME);
}
}
+
+ if (cpu_has(c, X86_FEATURE_SEV)) {
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ clear_cpu_cap(c, X86_FEATURE_SEV);
+ } else {
+ u64 syscfg, hwcr;
+
+ /* Check if SEV is enabled */
+ rdmsrl(MSR_K8_SYSCFG, syscfg);
+ rdmsrl(MSR_K7_HWCR, hwcr);
+ if (!(syscfg & MSR_K8_SYSCFG_MEM_ENCRYPT) ||
+ !(hwcr & MSR_K7_HWCR_SMMLOCK))
+ clear_cpu_cap(c, X86_FEATURE_SEV);
+ }
+ }
}
static void init_amd_k8(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 05459ad..63a78d5 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -32,6 +32,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
+ { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
};
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 03/17] x86/mm: Secure Encrypted Virtualization (SEV) support
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
2017-07-24 19:07 ` [RFC Part1 PATCH v3 02/17] x86/CPU/AMD: Add the Secure Encrypted Virtualization CPU feature Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
[not found] ` <20170724190757.11278-4-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 07/17] x86/mm: Include SEV for encryption memory attribute changes Brijesh Singh
` (7 subsequent siblings)
9 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky@amd.com>
Provide support for Secure Encyrpted Virtualization (SEV). This initial
support defines a flag that is used by the kernel to determine if it is
running with SEV active.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/x86/include/asm/mem_encrypt.h | 2 ++
arch/x86/mm/mem_encrypt.c | 3 +++
include/linux/mem_encrypt.h | 8 +++++++-
3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 8e618fc..9274ec7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -22,6 +22,7 @@
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern unsigned long sme_me_mask;
+extern unsigned int sev_enabled;
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
unsigned long decrypted_kernel_vaddr,
@@ -50,6 +51,7 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
#else /* !CONFIG_AMD_MEM_ENCRYPT */
#define sme_me_mask 0UL
+#define sev_enabled 0
static inline void __init sme_early_encrypt(resource_size_t paddr,
unsigned long size) { }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 0fbd092..1e4643e 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -40,6 +40,9 @@ static char sme_cmdline_off[] __initdata = "off";
unsigned long sme_me_mask __section(.data) = 0;
EXPORT_SYMBOL_GPL(sme_me_mask);
+unsigned int sev_enabled __section(.data) = 0;
+EXPORT_SYMBOL_GPL(sev_enabled);
+
/* Buffer used for early in-place encryption by BSP, no locking needed */
static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 1255f09..ea0831a 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -22,12 +22,18 @@
#else /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
#define sme_me_mask 0UL
+#define sev_enabled 0
#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
static inline bool sme_active(void)
{
- return !!sme_me_mask;
+ return (sme_me_mask && !sev_enabled);
+}
+
+static inline bool sev_active(void)
+{
+ return (sme_me_mask && sev_enabled);
}
static inline unsigned long sme_get_me_mask(void)
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 07/17] x86/mm: Include SEV for encryption memory attribute changes
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
2017-07-24 19:07 ` [RFC Part1 PATCH v3 02/17] x86/CPU/AMD: Add the Secure Encrypted Virtualization CPU feature Brijesh Singh
2017-07-24 19:07 ` [RFC Part1 PATCH v3 03/17] x86/mm: Secure Encrypted Virtualization (SEV) support Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
[not found] ` <20170724190757.11278-8-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 08/17] x86/efi: Access EFI data as encrypted when SEV is active Brijesh Singh
` (6 subsequent siblings)
9 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky@amd.com>
The current code checks only for sme_active() when determining whether
to perform the encryption attribute change. Include sev_active() in this
check so that memory attribute changes can occur under SME and SEV.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/x86/mm/pageattr.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dfb7d65..b726b23 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1781,8 +1781,8 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
unsigned long start;
int ret;
- /* Nothing to do if the SME is not active */
- if (!sme_active())
+ /* Nothing to do if SME and SEV are not active */
+ if (!sme_active() && !sev_active())
return 0;
/* Should not be working on unaligned addresses */
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 08/17] x86/efi: Access EFI data as encrypted when SEV is active
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
` (2 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 07/17] x86/mm: Include SEV for encryption memory attribute changes Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
[not found] ` <20170724190757.11278-9-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 10/17] resource: Provide resource struct in resource walk callback Brijesh Singh
` (5 subsequent siblings)
9 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky@amd.com>
EFI data is encrypted when the kernel is run under SEV. Update the
page table references to be sure the EFI memory areas are accessed
encrypted.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/x86/platform/efi/efi_64.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 12e8388..1ecb3f6 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -32,6 +32,7 @@
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/ucs2_string.h>
+#include <linux/mem_encrypt.h>
#include <asm/setup.h>
#include <asm/page.h>
@@ -369,7 +370,10 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
* as trim_bios_range() will reserve the first page and isolate it away
* from memory allocators anyway.
*/
- if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, _PAGE_RW)) {
+ pf = _PAGE_RW;
+ if (sev_active())
+ pf |= _PAGE_ENC;
+ if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) {
pr_err("Failed to create 1:1 mapping for the first page!\n");
return 1;
}
@@ -412,6 +416,9 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va)
if (!(md->attribute & EFI_MEMORY_WB))
flags |= _PAGE_PCD;
+ if (sev_active())
+ flags |= _PAGE_ENC;
+
pfn = md->phys_addr >> PAGE_SHIFT;
if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags))
pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
@@ -511,6 +518,9 @@ static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf)
pgd_t *pgd = efi_pgd;
int err1, err2;
+ if (sev_active())
+ pf |= _PAGE_ENC;
+
/* Update the 1:1 mapping */
pfn = md->phys_addr >> PAGE_SHIFT;
err1 = kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf);
@@ -589,6 +599,9 @@ void __init efi_runtime_update_mappings(void)
(md->type != EFI_RUNTIME_SERVICES_CODE))
pf |= _PAGE_RW;
+ if (sev_active())
+ pf |= _PAGE_ENC;
+
efi_update_mappings(md, pf);
}
}
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 10/17] resource: Provide resource struct in resource walk callback
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
` (3 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 08/17] x86/efi: Access EFI data as encrypted when SEV is active Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-07-31 8:26 ` Borislav Petkov
2017-07-31 22:19 ` Kees Cook
2017-07-24 19:07 ` [RFC Part1 PATCH v3 11/17] x86/mm, resource: Use PAGE_KERNEL protection for ioremap of memory pages Brijesh Singh
` (4 subsequent siblings)
9 siblings, 2 replies; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky@amd.com>
In prep for a new function that will need additional resource information
during the resource walk, update the resource walk callback to pass the
resource structure. Since the current callback start and end arguments
are pulled from the resource structure, the callback functions can obtain
them from the resource structure directly.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/powerpc/kernel/machine_kexec_file_64.c | 12 +++++++++---
arch/x86/kernel/crash.c | 18 +++++++++---------
arch/x86/kernel/pmem.c | 2 +-
include/linux/ioport.h | 4 ++--
include/linux/kexec.h | 2 +-
kernel/kexec_file.c | 5 +++--
kernel/resource.c | 9 +++++----
7 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index 992c0d2..e4395f9 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -91,11 +91,13 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
* and that value will be returned. If all free regions are visited without
* func returning non-zero, then zero will be returned.
*/
-int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
+int arch_kexec_walk_mem(struct kexec_buf *kbuf,
+ int (*func)(struct resource *, void *))
{
int ret = 0;
u64 i;
phys_addr_t mstart, mend;
+ struct resource res = { };
if (kbuf->top_down) {
for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0,
@@ -105,7 +107,9 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
* range while in kexec, end points to the last byte
* in the range.
*/
- ret = func(mstart, mend - 1, kbuf);
+ res.start = mstart;
+ res.end = mend - 1;
+ ret = func(&res, kbuf);
if (ret)
break;
}
@@ -117,7 +121,9 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
* range while in kexec, end points to the last byte
* in the range.
*/
- ret = func(mstart, mend - 1, kbuf);
+ res.start = mstart;
+ res.end = mend - 1;
+ ret = func(&res, kbuf);
if (ret)
break;
}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 44404e2..815008c 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -209,7 +209,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC_FILE
-static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
+static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
@@ -342,7 +342,7 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced,
return ret;
}
-static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
+static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
{
struct crash_elf_data *ced = arg;
Elf64_Ehdr *ehdr;
@@ -355,7 +355,7 @@ static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
ehdr = ced->ehdr;
/* Exclude unwanted mem ranges */
- ret = elf_header_exclude_ranges(ced, start, end);
+ ret = elf_header_exclude_ranges(ced, res->start, res->end);
if (ret)
return ret;
@@ -518,14 +518,14 @@ static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
return 0;
}
-static int memmap_entry_callback(u64 start, u64 end, void *arg)
+static int memmap_entry_callback(struct resource *res, void *arg)
{
struct crash_memmap_data *cmd = arg;
struct boot_params *params = cmd->params;
struct e820_entry ei;
- ei.addr = start;
- ei.size = end - start + 1;
+ ei.addr = res->start;
+ ei.size = res->end - res->start + 1;
ei.type = cmd->type;
add_e820_entry(params, &ei);
@@ -619,12 +619,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
return ret;
}
-static int determine_backup_region(u64 start, u64 end, void *arg)
+static int determine_backup_region(struct resource *res, void *arg)
{
struct kimage *image = arg;
- image->arch.backup_src_start = start;
- image->arch.backup_src_sz = end - start + 1;
+ image->arch.backup_src_start = res->start;
+ image->arch.backup_src_sz = res->end - res->start + 1;
/* Expecting only one range for backup region */
return 1;
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 0c5315d..297bb71 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -6,7 +6,7 @@
#include <linux/init.h>
#include <linux/ioport.h>
-static int found(u64 start, u64 end, void *data)
+static int found(struct resource *res, void *data)
{
return 1;
}
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6230064..1c66b9c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -269,10 +269,10 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg, int (*func)(unsigned long, unsigned long, void *));
extern int
walk_system_ram_res(u64 start, u64 end, void *arg,
- int (*func)(u64, u64, void *));
+ int (*func)(struct resource *, void *));
extern int
walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
- void *arg, int (*func)(u64, u64, void *));
+ void *arg, int (*func)(struct resource *, void *));
/* True if any part of r1 overlaps r2 */
static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 2b7590f..d672f95 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -159,7 +159,7 @@ struct kexec_buf {
};
int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
- int (*func)(u64, u64, void *));
+ int (*func)(struct resource *, void *));
extern int kexec_add_buffer(struct kexec_buf *kbuf);
int kexec_locate_mem_hole(struct kexec_buf *kbuf);
#endif /* CONFIG_KEXEC_FILE */
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 9f48f44..e5bcd94 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -406,9 +406,10 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
return 1;
}
-static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+static int locate_mem_hole_callback(struct resource *res, void *arg)
{
struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+ u64 start = res->start, end = res->end;
unsigned long sz = end - start + 1;
/* Returning 0 will take to next memory range */
@@ -437,7 +438,7 @@ static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
* func returning non-zero, then zero will be returned.
*/
int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
- int (*func)(u64, u64, void *))
+ int (*func)(struct resource *, void *))
{
if (kbuf->image->type == KEXEC_TYPE_CRASH)
return walk_iomem_res_desc(crashk_res.desc,
diff --git a/kernel/resource.c b/kernel/resource.c
index 7b20b3e..5f9ee7bb0 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -403,14 +403,15 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc,
static int __walk_iomem_res_desc(struct resource *res, unsigned long desc,
bool first_level_children_only,
- void *arg, int (*func)(u64, u64, void *))
+ void *arg,
+ int (*func)(struct resource *, void *))
{
u64 orig_end = res->end;
int ret = -1;
while ((res->start < res->end) &&
!find_next_iomem_res(res, desc, first_level_children_only)) {
- ret = (*func)(res->start, res->end, arg);
+ ret = (*func)(res, arg);
if (ret)
break;
@@ -436,7 +437,7 @@ static int __walk_iomem_res_desc(struct resource *res, unsigned long desc,
* <linux/ioport.h> and set it in 'desc' of a target resource entry.
*/
int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
- u64 end, void *arg, int (*func)(u64, u64, void *))
+ u64 end, void *arg, int (*func)(struct resource *, void *))
{
struct resource res;
@@ -455,7 +456,7 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
* ranges.
*/
int walk_system_ram_res(u64 start, u64 end, void *arg,
- int (*func)(u64, u64, void *))
+ int (*func)(struct resource *, void *))
{
struct resource res;
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 10/17] resource: Provide resource struct in resource walk callback
2017-07-24 19:07 ` [RFC Part1 PATCH v3 10/17] resource: Provide resource struct in resource walk callback Brijesh Singh
@ 2017-07-31 8:26 ` Borislav Petkov
2017-07-31 22:19 ` Kees Cook
1 sibling, 0 replies; 79+ messages in thread
From: Borislav Petkov @ 2017-07-31 8:26 UTC (permalink / raw)
To: Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric
On Mon, Jul 24, 2017 at 02:07:50PM -0500, Brijesh Singh wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
>
> In prep for a new function that will need additional resource information
> during the resource walk, update the resource walk callback to pass the
> resource structure. Since the current callback start and end arguments
> are pulled from the resource structure, the callback functions can obtain
> them from the resource structure directly.
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> arch/powerpc/kernel/machine_kexec_file_64.c | 12 +++++++++---
> arch/x86/kernel/crash.c | 18 +++++++++---------
> arch/x86/kernel/pmem.c | 2 +-
> include/linux/ioport.h | 4 ++--
> include/linux/kexec.h | 2 +-
> kernel/kexec_file.c | 5 +++--
> kernel/resource.c | 9 +++++----
> 7 files changed, 30 insertions(+), 22 deletions(-)
Reviewed-by: Borislav Petkov <bp@suse.de>
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 10/17] resource: Provide resource struct in resource walk callback
2017-07-24 19:07 ` [RFC Part1 PATCH v3 10/17] resource: Provide resource struct in resource walk callback Brijesh Singh
2017-07-31 8:26 ` Borislav Petkov
@ 2017-07-31 22:19 ` Kees Cook
1 sibling, 0 replies; 79+ messages in thread
From: Kees Cook @ 2017-07-31 22:19 UTC (permalink / raw)
To: Brijesh Singh
Cc: LKML, x86, linux-efi, linuxppc-dev, KVM, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Borislav Petkov, Andy Lutomirski,
Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu,
Reza Arbab, David Howells, Matt Fleming, Kirill A . Shutemov,
Laura Abbott
On Mon, Jul 24, 2017 at 12:07 PM, Brijesh Singh <brijesh.singh@amd.com> wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
>
> In prep for a new function that will need additional resource information
> during the resource walk, update the resource walk callback to pass the
> resource structure. Since the current callback start and end arguments
> are pulled from the resource structure, the callback functions can obtain
> them from the resource structure directly.
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
This is a nice clean up even without the refactoring need. :)
Reviewed-by: Kees Cook <keescook@chromium.org>
Thanks!
-Kees
--
Kees Cook
Pixel Security
^ permalink raw reply [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 11/17] x86/mm, resource: Use PAGE_KERNEL protection for ioremap of memory pages
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
` (4 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 10/17] resource: Provide resource struct in resource walk callback Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
[not found] ` <20170724190757.11278-12-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active Brijesh Singh
` (3 subsequent siblings)
9 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky@amd.com>
In order for memory pages to be properly mapped when SEV is active, we
need to use the PAGE_KERNEL protection attribute as the base protection.
This will insure that memory mapping of, e.g. ACPI tables, receives the
proper mapping attributes.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/x86/mm/ioremap.c | 28 ++++++++++++++++++++++++++++
include/linux/ioport.h | 3 +++
kernel/resource.c | 17 +++++++++++++++++
3 files changed, 48 insertions(+)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c0be7cf..7b27332 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -69,6 +69,26 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
return 0;
}
+static int __ioremap_res_desc_other(struct resource *res, void *arg)
+{
+ return (res->desc != IORES_DESC_NONE);
+}
+
+/*
+ * This function returns true if the target memory is marked as
+ * IORESOURCE_MEM and IORESOURCE_BUSY and described as other than
+ * IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES).
+ */
+static bool __ioremap_check_if_mem(resource_size_t addr, unsigned long size)
+{
+ u64 start, end;
+
+ start = (u64)addr;
+ end = start + size - 1;
+
+ return (walk_mem_res(start, end, NULL, __ioremap_res_desc_other) == 1);
+}
+
/*
* Remap an arbitrary physical address space into the kernel virtual
* address space. It transparently creates kernel huge I/O mapping when
@@ -146,7 +166,15 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
pcm = new_pcm;
}
+ /*
+ * If the page being mapped is in memory and SEV is active then
+ * make sure the memory encryption attribute is enabled in the
+ * resulting mapping.
+ */
prot = PAGE_KERNEL_IO;
+ if (sev_active() && __ioremap_check_if_mem(phys_addr, size))
+ prot = pgprot_encrypted(prot);
+
switch (pcm) {
case _PAGE_CACHE_MODE_UC:
default:
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 1c66b9c..297f5b8 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -268,6 +268,9 @@ extern int
walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg, int (*func)(unsigned long, unsigned long, void *));
extern int
+walk_mem_res(u64 start, u64 end, void *arg,
+ int (*func)(struct resource *, void *));
+extern int
walk_system_ram_res(u64 start, u64 end, void *arg,
int (*func)(struct resource *, void *));
extern int
diff --git a/kernel/resource.c b/kernel/resource.c
index 5f9ee7bb0..ec3fa0c 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -468,6 +468,23 @@ int walk_system_ram_res(u64 start, u64 end, void *arg,
arg, func);
}
+/*
+ * This function calls the @func callback against all memory ranges, which
+ * are ranges marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ */
+int walk_mem_res(u64 start, u64 end, void *arg,
+ int (*func)(struct resource *, void *))
+{
+ struct resource res;
+
+ res.start = start;
+ res.end = end;
+ res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+ return __walk_iomem_res_desc(&res, IORES_DESC_NONE, true,
+ arg, func);
+}
+
#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
/*
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
` (5 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 11/17] x86/mm, resource: Use PAGE_KERNEL protection for ioremap of memory pages Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-07-25 9:51 ` David Laight
2017-07-24 19:07 ` [RFC Part1 PATCH v3 15/17] x86: Add support for changing memory encryption attribute in early boot Brijesh Singh
` (2 subsequent siblings)
9 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky@amd.com>
Secure Encrypted Virtualization (SEV) does not support string I/O, so
unroll the string I/O operation into a loop operating on one element at
a time.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/x86/include/asm/io.h | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e080a39..2f3c002 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -327,14 +327,32 @@ static inline unsigned type in##bwl##_p(int port) \
\
static inline void outs##bwl(int port, const void *addr, unsigned long count) \
{ \
- asm volatile("rep; outs" #bwl \
- : "+S"(addr), "+c"(count) : "d"(port)); \
+ if (sev_active()) { \
+ unsigned type *value = (unsigned type *)addr; \
+ while (count) { \
+ out##bwl(*value, port); \
+ value++; \
+ count--; \
+ } \
+ } else { \
+ asm volatile("rep; outs" #bwl \
+ : "+S"(addr), "+c"(count) : "d"(port)); \
+ } \
} \
\
static inline void ins##bwl(int port, void *addr, unsigned long count) \
{ \
- asm volatile("rep; ins" #bwl \
- : "+D"(addr), "+c"(count) : "d"(port)); \
+ if (sev_active()) { \
+ unsigned type *value = (unsigned type *)addr; \
+ while (count) { \
+ *value = in##bwl(port); \
+ value++; \
+ count--; \
+ } \
+ } else { \
+ asm volatile("rep; ins" #bwl \
+ : "+D"(addr), "+c"(count) : "d"(port)); \
+ } \
}
BUILDIO(b, b, char)
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* RE: [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active
2017-07-24 19:07 ` [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active Brijesh Singh
@ 2017-07-25 9:51 ` David Laight
2017-07-26 10:45 ` Arnd Bergmann
0 siblings, 1 reply; 79+ messages in thread
From: David Laight @ 2017-07-25 9:51 UTC (permalink / raw)
To: 'Brijesh Singh', linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Fenghua Yu, Matt Fleming, David Howells, Paul Mackerras,
H . Peter Anvin, Christoph Lameter, Jonathan Corbet,
Radim Krcmár, Piotr Luc, Ingo Molnar, Dave Airlie,
Borislav Petkov, Tom Lendacky, Kees Cook, Arnd Bergmann,
Konrad Rzeszutek Wilk, Reza Arbab, Andy Lutomirski,
Thomas Gleixner, Laura Abbott
From: Brijesh Singh
> Sent: 24 July 2017 20:08
> From: Tom Lendacky <thomas.lendacky@amd.com>
>
> Secure Encrypted Virtualization (SEV) does not support string I/O, so
> unroll the string I/O operation into a loop operating on one element at
> a time.
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> arch/x86/include/asm/io.h | 26 ++++++++++++++++++++++----
> 1 file changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> index e080a39..2f3c002 100644
> --- a/arch/x86/include/asm/io.h
> +++ b/arch/x86/include/asm/io.h
> @@ -327,14 +327,32 @@ static inline unsigned type in##bwl##_p(int port) \
> \
> static inline void outs##bwl(int port, const void *addr, unsigned long count) \
> {
Is it even worth leaving these as inline functions?
Given the speed of IO cycles it is unlikely that the cost of calling a real
function will be significant.
The code bloat reduction will be significant.
David
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active
2017-07-25 9:51 ` David Laight
@ 2017-07-26 10:45 ` Arnd Bergmann
[not found] ` <CAK8P3a3h7JpmkW7W=HwqAuWWmro=ngj6HSeiiML_=T82x-FtZQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 79+ messages in thread
From: Arnd Bergmann @ 2017-07-26 10:45 UTC (permalink / raw)
To: David Laight
Cc: Brijesh Singh, linux-kernel, x86, linux-efi, linuxppc-dev, kvm,
Fenghua Yu, Matt Fleming, David Howells, Paul Mackerras,
H . Peter Anvin, Christoph Lameter, Jonathan Corbet,
Radim Krcmár, Piotr Luc, Ingo Molnar, Dave Airlie,
Borislav Petkov
On Tue, Jul 25, 2017 at 11:51 AM, David Laight <David.Laight@aculab.com> wrote:
> From: Brijesh Singh
>> Sent: 24 July 2017 20:08
>> From: Tom Lendacky <thomas.lendacky@amd.com>
>>
>> Secure Encrypted Virtualization (SEV) does not support string I/O, so
>> unroll the string I/O operation into a loop operating on one element at
>> a time.
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>> ---
>> arch/x86/include/asm/io.h | 26 ++++++++++++++++++++++----
>> 1 file changed, 22 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
>> index e080a39..2f3c002 100644
>> --- a/arch/x86/include/asm/io.h
>> +++ b/arch/x86/include/asm/io.h
>> @@ -327,14 +327,32 @@ static inline unsigned type in##bwl##_p(int port) \
>> \
>> static inline void outs##bwl(int port, const void *addr, unsigned long count) \
>> {
This will clash with a fix I did to add a "memory" clobber
for the traditional implementation, see
https://patchwork.kernel.org/patch/9854573/
> Is it even worth leaving these as inline functions?
> Given the speed of IO cycles it is unlikely that the cost of calling a real
> function will be significant.
> The code bloat reduction will be significant.
I think the smallest code would be the original "rep insb" etc, which
should be smaller than a function call, unlike the loop. Then again,
there is a rather small number of affected device drivers, almost all
of them for ancient hardware that you won't even build in a 64-bit
x86 kernel, see the list below. The only user I found that is actually
still relevant is drivers/tty/hvc/hvc_xen.c, which uses it for the early
console.
Arnd
---
Full list for reference
$ git grep -wl '\(__ide_\|\)\(in\|out\)s\(b\|w\|l\)' drivers sound/
drivers/atm/horizon.c
drivers/cdrom/gdrom.c
drivers/gpu/drm/vmwgfx/vmwgfx_msg.h
drivers/ide/ide-io-std.c
drivers/isdn/hardware/avm/avmcard.h
drivers/isdn/hardware/eicon/divasmain.c
drivers/isdn/hardware/mISDN/avmfritz.c
drivers/isdn/hardware/mISDN/iohelper.h
drivers/isdn/hardware/mISDN/netjet.c
drivers/isdn/hardware/mISDN/w6692.c
drivers/isdn/hisax/asuscom.c
drivers/isdn/hisax/avm_a1.c
drivers/isdn/hisax/avm_a1p.c
drivers/isdn/hisax/avm_pci.c
drivers/isdn/hisax/diva.c
drivers/isdn/hisax/elsa.c
drivers/isdn/hisax/gazel.c
drivers/isdn/hisax/hisax_fcpcipnp.c
drivers/isdn/hisax/ix1_micro.c
drivers/isdn/hisax/mic.c
drivers/isdn/hisax/netjet.c
drivers/isdn/hisax/niccy.c
drivers/isdn/hisax/saphir.c
drivers/isdn/hisax/sedlbauer.c
drivers/isdn/hisax/sportster.c
drivers/isdn/hisax/teles3.c
drivers/isdn/hisax/w6692.c
drivers/isdn/hisax/w6692.h
drivers/media/rc/winbond-cir.c
drivers/mtd/spi-nor/aspeed-smc.c
drivers/net/appletalk/cops.c
drivers/net/arcnet/arcdevice.h
drivers/net/arcnet/com20020.c
drivers/net/ethernet/3com/3c509.c
drivers/net/ethernet/3com/3c515.c
drivers/net/ethernet/3com/3c574_cs.c
drivers/net/ethernet/3com/3c589_cs.c
drivers/net/ethernet/8390/axnet_cs.c
drivers/net/ethernet/8390/mcf8390.c
drivers/net/ethernet/8390/ne.c
drivers/net/ethernet/8390/ne2k-pci.c
drivers/net/ethernet/8390/pcnet_cs.c
drivers/net/ethernet/8390/smc-ultra.c
drivers/net/ethernet/amd/nmclan_cs.c
drivers/net/ethernet/fujitsu/fmvj18x_cs.c
drivers/net/ethernet/hp/hp100.c
drivers/net/ethernet/smsc/smc9194.c
drivers/net/ethernet/smsc/smc91c92_cs.c
drivers/net/ethernet/smsc/smc91x.h
drivers/net/ethernet/xircom/xirc2ps_cs.c
drivers/net/sb1000.c
drivers/net/wan/sbni.c
drivers/net/wireless/cisco/airo.c
drivers/net/wireless/intersil/hostap/hostap_cs.c
drivers/net/wireless/intersil/hostap/hostap_plx.c
drivers/net/wireless/marvell/libertas/if_cs.c
drivers/net/wireless/wl3501_cs.c
drivers/parport/parport_pc.c
drivers/pcmcia/m32r_cfc.c
drivers/scsi/NCR53c406a.c
drivers/scsi/aha152x.c
drivers/scsi/eata_pio.c
drivers/scsi/fdomain.c
drivers/scsi/g_NCR5380.c
drivers/scsi/imm.c
drivers/scsi/nsp32_io.h
drivers/scsi/pcmcia/nsp_io.h
drivers/scsi/pcmcia/sym53c500_cs.c
drivers/scsi/ppa.c
drivers/scsi/qlogicfas408.c
drivers/scsi/sym53c416.c
drivers/staging/comedi/drivers/adl_pci9111.c
drivers/staging/comedi/drivers/cb_pcidas.c
drivers/staging/comedi/drivers/das16m1.c
drivers/staging/comedi/drivers/das1800.c
drivers/staging/iio/adc/ad7606_par.c
drivers/tty/hvc/hvc_xen.c
drivers/tty/isicom.c
drivers/tty/rocket_int.h
drivers/usb/host/isp1362.h
drivers/usb/musb/blackfin.c
sound/drivers/opl4/opl4_lib.c
sound/isa/gus/gus_dram.c
sound/isa/gus/gus_pcm.c
^ permalink raw reply [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 15/17] x86: Add support for changing memory encryption attribute in early boot
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
` (6 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-08-28 10:51 ` Borislav Petkov
2017-07-24 19:07 ` [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables Brijesh Singh
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
9 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
Some KVM-specific custom MSRs shares the guest physical address with
hypervisor. When SEV is active, the shared physical address must be mapped
with encryption attribute cleared so that both hypervsior and guest can
access the data.
Add APIs to change memory encryption attribute in early boot code.
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/x86/include/asm/mem_encrypt.h | 17 ++++++
arch/x86/mm/mem_encrypt.c | 117 +++++++++++++++++++++++++++++++++++++
2 files changed, 134 insertions(+)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 9cb6472..30b539e 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -46,6 +46,11 @@ void __init sme_early_init(void);
void __init sme_encrypt_kernel(void);
void __init sme_enable(struct boot_params *bp);
+int __init early_set_memory_decrypted(resource_size_t paddr,
+ unsigned long size);
+int __init early_set_memory_encrypted(resource_size_t paddr,
+ unsigned long size);
+
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
@@ -69,6 +74,18 @@ static inline void __init sme_early_init(void) { }
static inline void __init sme_encrypt_kernel(void) { }
static inline void __init sme_enable(struct boot_params *bp) { }
+static inline int __init early_set_memory_decrypted(resource_size_t paddr,
+ unsigned long size)
+{
+ return 0;
+}
+
+static inline int __init early_set_memory_encrypted(resource_size_t paddr,
+ unsigned long size)
+{
+ return 0;
+}
+
#endif /* CONFIG_AMD_MEM_ENCRYPT */
/*
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index ed8780e..d174b1c 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -28,6 +28,8 @@
#include <asm/msr.h>
#include <asm/cmdline.h>
+#include "mm_internal.h"
+
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
static char sme_cmdline_off[] __initdata = "off";
@@ -257,6 +259,121 @@ static void sme_free(struct device *dev, size_t size, void *vaddr,
swiotlb_free_coherent(dev, size, vaddr, dma_handle);
}
+static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+{
+ pgprot_t old_prot, new_prot;
+ unsigned long pfn;
+ pte_t new_pte;
+
+ switch (level) {
+ case PG_LEVEL_4K:
+ pfn = pte_pfn(*kpte);
+ old_prot = pte_pgprot(*kpte);
+ break;
+ case PG_LEVEL_2M:
+ pfn = pmd_pfn(*(pmd_t *)kpte);
+ old_prot = pmd_pgprot(*(pmd_t *)kpte);
+ break;
+ case PG_LEVEL_1G:
+ pfn = pud_pfn(*(pud_t *)kpte);
+ old_prot = pud_pgprot(*(pud_t *)kpte);
+ break;
+ default:
+ return;
+ }
+
+ new_prot = old_prot;
+ if (enc)
+ pgprot_val(new_prot) |= _PAGE_ENC;
+ else
+ pgprot_val(new_prot) &= ~_PAGE_ENC;
+
+ /* if prot is same then do nothing */
+ if (pgprot_val(old_prot) == pgprot_val(new_prot))
+ return;
+
+ new_pte = pfn_pte(pfn, new_prot);
+ set_pte_atomic(kpte, new_pte);
+}
+
+static int __init early_set_memory_enc_dec(resource_size_t paddr,
+ unsigned long size, bool enc)
+{
+ unsigned long vaddr, vaddr_end, vaddr_next;
+ unsigned long psize, pmask;
+ int split_page_size_mask;
+ pte_t *kpte;
+ int level;
+
+ vaddr = (unsigned long)__va(paddr);
+ vaddr_next = vaddr;
+ vaddr_end = vaddr + size;
+
+ /*
+ * We are going to change the physical page attribute from C=1 to C=0
+ * or vice versa. Flush the caches to ensure that data is written into
+ * memory with correct C-bit before we change attribute.
+ */
+ clflush_cache_range(__va(paddr), size);
+
+ for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+ kpte = lookup_address(vaddr, &level);
+ if (!kpte || pte_none(*kpte))
+ return 1;
+
+ if (level == PG_LEVEL_4K) {
+ __set_clr_pte_enc(kpte, level, enc);
+ vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE;
+ continue;
+ }
+
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+
+ /*
+ * Check, whether we can change the large page in one go.
+ * We request a split, when the address is not aligned and
+ * the number of pages to set/clear encryption bit is smaller
+ * than the number of pages in the large page.
+ */
+ if (vaddr == (vaddr & pmask) &&
+ ((vaddr_end - vaddr) >= psize)) {
+ __set_clr_pte_enc(kpte, level, enc);
+ vaddr_next = (vaddr & pmask) + psize;
+ continue;
+ }
+
+ /*
+ * virtual address is part of large page, create the page table
+ * mapping to use smaller pages (4K or 2M). If virtual address
+ * is part of 2M page the we request spliting the large page
+ * into 4K, similarly 1GB large page is requested to split into
+ * 2M pages.
+ */
+ if (level == PG_LEVEL_2M)
+ split_page_size_mask = 0;
+ else
+ split_page_size_mask = 1 << PG_LEVEL_2M;
+
+ kernel_physical_mapping_init(__pa(vaddr & pmask),
+ __pa((vaddr_end & pmask) + psize),
+ split_page_size_mask);
+ }
+
+ __flush_tlb_all();
+ return 0;
+}
+
+int __init early_set_memory_decrypted(resource_size_t paddr, unsigned long size)
+{
+ return early_set_memory_enc_dec(paddr, size, false);
+}
+
+int __init early_set_memory_encrypted(resource_size_t paddr, unsigned long size)
+{
+ return early_set_memory_enc_dec(paddr, size, true);
+}
+
static const struct dma_map_ops sme_dma_ops = {
.alloc = sme_alloc,
.free = sme_free,
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 15/17] x86: Add support for changing memory encryption attribute in early boot
2017-07-24 19:07 ` [RFC Part1 PATCH v3 15/17] x86: Add support for changing memory encryption attribute in early boot Brijesh Singh
@ 2017-08-28 10:51 ` Borislav Petkov
2017-08-28 11:49 ` Brijesh Singh
0 siblings, 1 reply; 79+ messages in thread
From: Borislav Petkov @ 2017-08-28 10:51 UTC (permalink / raw)
To: Brijesh Singh, Tom Lendacky
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Fenghua Yu, Lu Baolu, Reza Arbab, David Howells,
Matt Fleming, Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel,
Andrew Morton, Eric Biederman, Benjamin
On Mon, Jul 24, 2017 at 02:07:55PM -0500, Brijesh Singh wrote:
> Some KVM-specific custom MSRs shares the guest physical address with
s/shares/share/
> hypervisor.
"the hypervisor."
> When SEV is active, the shared physical address must be mapped
> with encryption attribute cleared so that both hypervsior and guest can
> access the data.
>
> Add APIs to change memory encryption attribute in early boot code.
>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> arch/x86/include/asm/mem_encrypt.h | 17 ++++++
> arch/x86/mm/mem_encrypt.c | 117 +++++++++++++++++++++++++++++++++++++
> 2 files changed, 134 insertions(+)
...
> +static int __init early_set_memory_enc_dec(resource_size_t paddr,
> + unsigned long size, bool enc)
> +{
> + unsigned long vaddr, vaddr_end, vaddr_next;
> + unsigned long psize, pmask;
> + int split_page_size_mask;
> + pte_t *kpte;
> + int level;
> +
> + vaddr = (unsigned long)__va(paddr);
> + vaddr_next = vaddr;
> + vaddr_end = vaddr + size;
> +
> + /*
> + * We are going to change the physical page attribute from C=1 to C=0
> + * or vice versa. Flush the caches to ensure that data is written into
> + * memory with correct C-bit before we change attribute.
> + */
> + clflush_cache_range(__va(paddr), size);
> +
> + for (; vaddr < vaddr_end; vaddr = vaddr_next) {
> + kpte = lookup_address(vaddr, &level);
> + if (!kpte || pte_none(*kpte))
> + return 1;
Return before flushing TLBs? Perhaps you mean
ret = 1;
goto out;
here and out does
__flush_tlb_all();
return ret;
?
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 15/17] x86: Add support for changing memory encryption attribute in early boot
2017-08-28 10:51 ` Borislav Petkov
@ 2017-08-28 11:49 ` Brijesh Singh
0 siblings, 0 replies; 79+ messages in thread
From: Brijesh Singh @ 2017-08-28 11:49 UTC (permalink / raw)
To: Borislav Petkov, Tom Lendacky
Cc: brijesh.singh, linux-kernel, x86, linux-efi, linuxppc-dev, kvm,
Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
Tony Luck, Piotr Luc, Fenghua Yu, Lu Baolu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric Biederman
Hi Boris,
On 8/28/17 5:51 AM, Borislav Petkov wrote:
[..]
> +static int __init early_set_memory_enc_dec(resource_size_t paddr,
>> + unsigned long size, bool enc)
>> +{
>> + unsigned long vaddr, vaddr_end, vaddr_next;
>> + unsigned long psize, pmask;
>> + int split_page_size_mask;
>> + pte_t *kpte;
>> + int level;
>> +
>> + vaddr = (unsigned long)__va(paddr);
>> + vaddr_next = vaddr;
>> + vaddr_end = vaddr + size;
>> +
>> + /*
>> + * We are going to change the physical page attribute from C=1 to C=0
>> + * or vice versa. Flush the caches to ensure that data is written into
>> + * memory with correct C-bit before we change attribute.
>> + */
>> + clflush_cache_range(__va(paddr), size);
>> +
>> + for (; vaddr < vaddr_end; vaddr = vaddr_next) {
>> + kpte = lookup_address(vaddr, &level);
>> + if (!kpte || pte_none(*kpte))
>> + return 1;
> Return before flushing TLBs? Perhaps you mean
>
> ret = 1;
> goto out;
>
> here and out does
>
> __flush_tlb_all();
> return ret;
thanks, good catch. I will fix in next rev.
-Brijesh
^ permalink raw reply [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables
2017-07-24 19:07 [RFC Part1 PATCH v3 00/17] x86: Secure Encrypted Virtualization (AMD) Brijesh Singh
` (7 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 15/17] x86: Add support for changing memory encryption attribute in early boot Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-08-29 10:22 ` Borislav Petkov
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
9 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel, x86, linux-efi, linuxppc-dev, kvm
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
Some KVM specific MSR's (steal-time, asyncpf, avic_eio) allocates per-CPU
variable at compile time and share its physical address with hypervisor.
It presents a challege when SEV is active in guest OS, when SEV is active,
the guest memory is encrypted with guest key hence hypervisor will not
able to modify the guest memory. When SEV is active, we need to clear the
encryption attribute (aka C-bit) of shared physical addresses so that both
guest and hypervisor can access the data.
To solve this problem, I have tried these three options:
1) Convert the static per-CPU to dynamic per-CPU allocation and when SEV
is detected clear the C-bit from the page table. But while doing so I
found that per-CPU dynamic allocator was not ready when kvm_guest_cpu_init
was called.
2) Since the C-bit works on PAGE_SIZE hence add some extra padding to
'struct kvm-steal-time' to make it PAGE_SIZE and then at runtime
clear the encryption attribute of the full PAGE. The downside of this -
we need to modify structure which may break the compatibility.
3) Define a new per-CPU section (.data..percpu.hv_shared) which will be
used to hold the compile time shared per-CPU variables. When SEV is
detected we map this section without C-bit.
This patch implements #3. It introduces a new DEFINE_PER_CPU_HV_SHAHRED
macro to create a compile time per-CPU variable. When SEV is detected we
clear the C-bit from the shared per-CPU variable.
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
arch/x86/kernel/kvm.c | 46 ++++++++++++++++++++++++++++++++++++---
include/asm-generic/vmlinux.lds.h | 3 +++
include/linux/percpu-defs.h | 12 ++++++++++
3 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 71c17a5..1f6fec8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -75,8 +75,8 @@ static int parse_no_kvmclock_vsyscall(char *arg)
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
-static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
-static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static DEFINE_PER_CPU_HV_SHARED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
+static DEFINE_PER_CPU_HV_SHARED(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
/*
@@ -303,7 +303,7 @@ static void kvm_register_steal_time(void)
cpu, (unsigned long long) slow_virt_to_phys(st));
}
-static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
+static DEFINE_PER_CPU_HV_SHARED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
{
@@ -319,11 +319,51 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
}
+/* NOTE: function is marked as __ref because it is used by __init functions */
+static int __ref kvm_map_hv_shared_decrypted(void)
+{
+ static int once, ret;
+ int cpu;
+
+ if (once)
+ return ret;
+
+ /*
+ * Iterate through all possible CPU's and clear the C-bit from
+ * percpu variables.
+ */
+ for_each_possible_cpu(cpu) {
+ struct kvm_vcpu_pv_apf_data *apf;
+ unsigned long pa;
+
+ apf = &per_cpu(apf_reason, cpu);
+ pa = slow_virt_to_phys(apf);
+ sme_early_decrypt(pa & PAGE_MASK, PAGE_SIZE);
+ ret = early_set_memory_decrypted(pa, PAGE_SIZE);
+ if (ret)
+ break;
+ }
+
+ once = 1;
+ return ret;
+}
+
static void kvm_guest_cpu_init(void)
{
if (!kvm_para_available())
return;
+ /*
+ * When SEV is active, map the shared percpu as unencrypted so that
+ * both guest and hypervsior can access the data.
+ */
+ if (sev_active()) {
+ if (kvm_map_hv_shared_decrypted()) {
+ printk(KERN_ERR "Failed to map percpu as unencrypted\n");
+ return;
+ }
+ }
+
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index da0be9a..52854cf 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -783,6 +783,9 @@
. = ALIGN(cacheline); \
*(.data..percpu) \
*(.data..percpu..shared_aligned) \
+ . = ALIGN(PAGE_SIZE); \
+ *(.data..percpu..hv_shared) \
+ . = ALIGN(PAGE_SIZE); \
VMLINUX_SYMBOL(__per_cpu_end) = .;
/**
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 8f16299..f74b0c3 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -173,6 +173,18 @@
DEFINE_PER_CPU_SECTION(type, name, "..read_mostly")
/*
+ * Declaration/definition used for per-CPU variables that must be shared
+ * between hypervisor and guest OS.
+ */
+#ifdef CONFIG_VIRTUALIZATION
+#define DECLARE_PER_CPU_HV_SHARED(type, name) \
+ DECLARE_PER_CPU_SECTION(type, name, "..hv_shared")
+
+#define DEFINE_PER_CPU_HV_SHARED(type, name) \
+ DEFINE_PER_CPU_SECTION(type, name, "..hv_shared")
+#endif
+
+/*
* Intermodule exports for per-CPU variables. sparse forgets about
* address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
* noop if __CHECKER__.
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables
2017-07-24 19:07 ` [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables Brijesh Singh
@ 2017-08-29 10:22 ` Borislav Petkov
2017-08-30 16:18 ` Brijesh Singh
0 siblings, 1 reply; 79+ messages in thread
From: Borislav Petkov @ 2017-08-29 10:22 UTC (permalink / raw)
To: Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric
On Mon, Jul 24, 2017 at 02:07:56PM -0500, Brijesh Singh wrote:
> Some KVM specific MSR's (steal-time, asyncpf, avic_eio) allocates per-CPU
MSRs
> variable at compile time and share its physical address with hypervisor.
That sentence needs changing - the MSRs don't allocate - for them gets
allocated.
> It presents a challege when SEV is active in guest OS, when SEV is active,
> the guest memory is encrypted with guest key hence hypervisor will not
> able to modify the guest memory. When SEV is active, we need to clear the
> encryption attribute (aka C-bit) of shared physical addresses so that both
> guest and hypervisor can access the data.
This whole paragraph needs rewriting.
> To solve this problem, I have tried these three options:
>
> 1) Convert the static per-CPU to dynamic per-CPU allocation and when SEV
> is detected clear the C-bit from the page table. But while doing so I
> found that per-CPU dynamic allocator was not ready when kvm_guest_cpu_init
> was called.
>
> 2) Since the C-bit works on PAGE_SIZE hence add some extra padding to
> 'struct kvm-steal-time' to make it PAGE_SIZE and then at runtime
"to make it PAGE_SIZE"?
I know what it means but it reads strange and needs more restraint when
rewriting it. :)
> clear the encryption attribute of the full PAGE. The downside of this -
> we need to modify structure which may break the compatibility.
>
> 3) Define a new per-CPU section (.data..percpu.hv_shared) which will be
> used to hold the compile time shared per-CPU variables. When SEV is
> detected we map this section without C-bit.
>
> This patch implements #3.
>From Documentation/process/submitting-patches.rst:
"Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
to do frotz", as if you are giving orders to the codebase to change
its behaviour."
Also, never say "This patch" in a commit message of a patch. It is
tautologically useless.
> It introduces a new DEFINE_PER_CPU_HV_SHAHRED
There's no DEFINE_PER_CPU_HV_SHAHRED. Typo.
> macro to create a compile time per-CPU variable. When SEV is detected we
> clear the C-bit from the shared per-CPU variable.
>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> arch/x86/kernel/kvm.c | 46 ++++++++++++++++++++++++++++++++++++---
> include/asm-generic/vmlinux.lds.h | 3 +++
> include/linux/percpu-defs.h | 12 ++++++++++
> 3 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 71c17a5..1f6fec8 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -75,8 +75,8 @@ static int parse_no_kvmclock_vsyscall(char *arg)
>
> early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
>
> -static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
> -static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
> +static DEFINE_PER_CPU_HV_SHARED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
> +static DEFINE_PER_CPU_HV_SHARED(struct kvm_steal_time, steal_time) __aligned(64);
> static int has_steal_clock = 0;
>
> /*
> @@ -303,7 +303,7 @@ static void kvm_register_steal_time(void)
> cpu, (unsigned long long) slow_virt_to_phys(st));
> }
>
> -static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
> +static DEFINE_PER_CPU_HV_SHARED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
>
> static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
> {
> @@ -319,11 +319,51 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
> apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
> }
>
> +/* NOTE: function is marked as __ref because it is used by __init functions */
No need for that comment.
What should you look into is why do you need to call the early versions:
" * producing a warning (of course, no warning does not mean code is
* correct, so optimally document why the __ref is needed and why it's OK)."
And we do have the normal set_memory_decrypted() etc helpers so why
aren't we using those?
If you need to use the early ones too, then you probably need to
differentiate this in the callers by passing a "bool early", which calls
the proper flavor.
> +static int __ref kvm_map_hv_shared_decrypted(void)
> +{
> + static int once, ret;
> + int cpu;
> +
> + if (once)
> + return ret;
So this function gets called per-CPU but you need to do this ugly "once"
thing - i.e., global function called in a per-CPU context.
Why can't you do that mapping only on the current CPU and then
when that function is called on the next CPU, it will do the same thing
on that next CPU?
> + /*
> + * Iterate through all possible CPU's and clear the C-bit from
> + * percpu variables.
> + */
> + for_each_possible_cpu(cpu) {
> + struct kvm_vcpu_pv_apf_data *apf;
> + unsigned long pa;
> +
> + apf = &per_cpu(apf_reason, cpu);
> + pa = slow_virt_to_phys(apf);
> + sme_early_decrypt(pa & PAGE_MASK, PAGE_SIZE);
> + ret = early_set_memory_decrypted(pa, PAGE_SIZE);
> + if (ret)
> + break;
> + }
> +
> + once = 1;
> + return ret;
> +}
> +
> static void kvm_guest_cpu_init(void)
> {
> if (!kvm_para_available())
> return;
>
> + /*
> + * When SEV is active, map the shared percpu as unencrypted so that
... map the share percpu area unecnrypted...
> + * both guest and hypervsior can access the data.
> + */
> + if (sev_active()) {
> + if (kvm_map_hv_shared_decrypted()) {
> + printk(KERN_ERR "Failed to map percpu as unencrypted\n");
> + return;
> + }
> + }
> +
> if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
> u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
>
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index da0be9a..52854cf 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -783,6 +783,9 @@
> . = ALIGN(cacheline); \
> *(.data..percpu) \
> *(.data..percpu..shared_aligned) \
> + . = ALIGN(PAGE_SIZE); \
> + *(.data..percpu..hv_shared) \
> + . = ALIGN(PAGE_SIZE); \
> VMLINUX_SYMBOL(__per_cpu_end) = .;
Yeah, no, you can't do that. That's adding this section unconditionally
on *every* arch. You need to do some ifdeffery like it is done at the
beginning of that file and have this only on the arch which supports SEV.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables
2017-08-29 10:22 ` Borislav Petkov
@ 2017-08-30 16:18 ` Brijesh Singh
2017-08-30 17:46 ` Borislav Petkov
0 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-08-30 16:18 UTC (permalink / raw)
To: Borislav Petkov
Cc: brijesh.singh, linux-kernel, x86, linux-efi, linuxppc-dev, kvm,
Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu,
Reza Arbab, David Howells, Matt Fleming, Kirill A . Shutemov,
Laura Abbott, Ard Biesheuvel, Andrew Morton
Hi Boris,
On 08/29/2017 05:22 AM, Borislav Petkov wrote:
[...]
> On Mon, Jul 24, 2017 at 02:07:56PM -0500, Brijesh Singh wrote:
>> Some KVM specific MSR's (steal-time, asyncpf, avic_eio) allocates per-CPU
>
> MSRs
>
>> variable at compile time and share its physical address with hypervisor.
>
> That sentence needs changing - the MSRs don't allocate - for them gets
> allocated.
>
>> It presents a challege when SEV is active in guest OS, when SEV is active,
>> the guest memory is encrypted with guest key hence hypervisor will not
>> able to modify the guest memory. When SEV is active, we need to clear the
>> encryption attribute (aka C-bit) of shared physical addresses so that both
>> guest and hypervisor can access the data.
>
> This whole paragraph needs rewriting.
>
I will improve the commit message in next rev.
[...]
>> +/* NOTE: function is marked as __ref because it is used by __init functions */
>
> No need for that comment.
>
> What should you look into is why do you need to call the early versions:
>
> " * producing a warning (of course, no warning does not mean code is
> * correct, so optimally document why the __ref is needed and why it's OK)."
>
> And we do have the normal set_memory_decrypted() etc helpers so why
> aren't we using those?
>
Since kvm_guest_init() is called early in the boot process hence we will not
able to use set_memory_decrypted() function. IIRC, if we try calling
set_memory_decrypted() early then we will hit a BUG_ON [1] -- mainly when it
tries to flush the caches.
[1] http://elixir.free-electrons.com/linux/latest/source/arch/x86/mm/pageattr.c#L167
> If you need to use the early ones too, then you probably need to
> differentiate this in the callers by passing a "bool early", which calls
> the proper flavor.
>
Sure I can rearrange code to make it more readable and use "bool early"
parameter to differentiate it.
>> +static int __ref kvm_map_hv_shared_decrypted(void)
>> +{
>> + static int once, ret;
>> + int cpu;
>> +
>> + if (once)
>> + return ret;
>
> So this function gets called per-CPU but you need to do this ugly "once"
> thing - i.e., global function called in a per-CPU context.
>
> Why can't you do that mapping only on the current CPU and then
> when that function is called on the next CPU, it will do the same thing
> on that next CPU?
>
Yes, it can be done but I remember running into issues during the CPU hot plug.
The patch uses early_set_memory_decrypted() -- which calls
kernel_physical_mapping_init() to split the large pages into smaller. IIRC, the
API did not work after the system is successfully booted. After the system is
booted we must use the set_memory_decrypted().
I was trying to avoid mixing early and no-early set_memory_decrypted() but if
feedback is: use early_set_memory_decrypted() only if its required otherwise
use set_memory_decrypted() then I can improve the logic in next rev. thanks
[...]
>> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
>> index da0be9a..52854cf 100644
>> --- a/include/asm-generic/vmlinux.lds.h
>> +++ b/include/asm-generic/vmlinux.lds.h
>> @@ -783,6 +783,9 @@
>> . = ALIGN(cacheline); \
>> *(.data..percpu) \
>> *(.data..percpu..shared_aligned) \
>> + . = ALIGN(PAGE_SIZE); \
>> + *(.data..percpu..hv_shared) \
>> + . = ALIGN(PAGE_SIZE); \
>> VMLINUX_SYMBOL(__per_cpu_end) = .;
>
> Yeah, no, you can't do that. That's adding this section unconditionally
> on *every* arch. You need to do some ifdeffery like it is done at the
> beginning of that file and have this only on the arch which supports SEV.
>
Will do . thanks
-Brijesh
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables
2017-08-30 16:18 ` Brijesh Singh
@ 2017-08-30 17:46 ` Borislav Petkov
2017-09-01 22:52 ` Brijesh Singh
0 siblings, 1 reply; 79+ messages in thread
From: Borislav Petkov @ 2017-08-30 17:46 UTC (permalink / raw)
To: Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric
On Wed, Aug 30, 2017 at 11:18:42AM -0500, Brijesh Singh wrote:
> I was trying to avoid mixing early and no-early set_memory_decrypted() but if
> feedback is: use early_set_memory_decrypted() only if its required otherwise
> use set_memory_decrypted() then I can improve the logic in next rev. thanks
Yes, I think you should use the early versions when you're, well,
*early* :-) But get rid of that for_each_possible_cpu() and do it only
on the current CPU, as this is a per-CPU path anyway. If you need to
do it on *every* CPU and very early, then you need a separate function
which is called in kvm_smp_prepare_boot_cpu() as there you're pre-SMP.
Thx.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables
2017-08-30 17:46 ` Borislav Petkov
@ 2017-09-01 22:52 ` Brijesh Singh
2017-09-02 3:21 ` Andy Lutomirski
[not found] ` <8155b5b2-b2b3-bc8f-33ae-b81b661a2e38-5C7GfCeVMHo@public.gmane.org>
0 siblings, 2 replies; 79+ messages in thread
From: Brijesh Singh @ 2017-09-01 22:52 UTC (permalink / raw)
To: Borislav Petkov
Cc: brijesh.singh, linux-kernel, x86, linux-efi, linuxppc-dev, kvm,
Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu,
Reza Arbab, David Howells, Matt Fleming, Kirill A . Shutemov,
Laura Abbott, Ard Biesheuvel, Andrew Morton
Hi Boris,
On 08/30/2017 12:46 PM, Borislav Petkov wrote:
> On Wed, Aug 30, 2017 at 11:18:42AM -0500, Brijesh Singh wrote:
>> I was trying to avoid mixing early and no-early set_memory_decrypted() but if
>> feedback is: use early_set_memory_decrypted() only if its required otherwise
>> use set_memory_decrypted() then I can improve the logic in next rev. thanks
>
> Yes, I think you should use the early versions when you're, well,
> *early* :-) But get rid of that for_each_possible_cpu() and do it only
> on the current CPU, as this is a per-CPU path anyway. If you need to
> do it on *every* CPU and very early, then you need a separate function
> which is called in kvm_smp_prepare_boot_cpu() as there you're pre-SMP.
>
I am trying to implement your feedback and now remember why I choose to
use early_set_memory_decrypted() and for_each_possible_cpu loop. These
percpu variables are static. Hence before clearing the C-bit we must
perform the in-place decryption so that original assignment is preserved
after we change the C-bit. Tom's SME patch [1] added sme_early_decrypt()
-- which can be used to perform the in-place decryption but we do not have
similar routine for non-early cases. In order to address your feedback,
we have to add similar functions. So far, we have not seen the need for
having such functions except this cases. The approach we have right now
works just fine and not sure if its worth adding new functions.
Thoughts ?
[1] Commit :7f8b7e7 x86/mm: Add support for early encryption/decryption of memory
-Brijesh
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables
2017-09-01 22:52 ` Brijesh Singh
@ 2017-09-02 3:21 ` Andy Lutomirski
[not found] ` <CALCETrV+rv=9Rg5V1z8vHtVDW64eCNtZHQMW8DipRADvm+qP5A-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
[not found] ` <8155b5b2-b2b3-bc8f-33ae-b81b661a2e38-5C7GfCeVMHo@public.gmane.org>
1 sibling, 1 reply; 79+ messages in thread
From: Andy Lutomirski @ 2017-09-02 3:21 UTC (permalink / raw)
To: Brijesh Singh
Cc: Borislav Petkov, linux-kernel, X86 ML, linux-efi, linuxppc-dev,
kvm list, Thomas Gleixner, Ingo Molnar, H . Peter Anvin,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov
On Fri, Sep 1, 2017 at 3:52 PM, Brijesh Singh <brijesh.singh@amd.com> wrote:
> Hi Boris,
>
> On 08/30/2017 12:46 PM, Borislav Petkov wrote:
>>
>> On Wed, Aug 30, 2017 at 11:18:42AM -0500, Brijesh Singh wrote:
>>>
>>> I was trying to avoid mixing early and no-early set_memory_decrypted()
>>> but if
>>> feedback is: use early_set_memory_decrypted() only if its required
>>> otherwise
>>> use set_memory_decrypted() then I can improve the logic in next rev.
>>> thanks
>>
>>
>> Yes, I think you should use the early versions when you're, well,
>> *early* :-) But get rid of that for_each_possible_cpu() and do it only
>> on the current CPU, as this is a per-CPU path anyway. If you need to
>> do it on *every* CPU and very early, then you need a separate function
>> which is called in kvm_smp_prepare_boot_cpu() as there you're pre-SMP.
>>
>
> I am trying to implement your feedback and now remember why I choose to
> use early_set_memory_decrypted() and for_each_possible_cpu loop. These
> percpu variables are static. Hence before clearing the C-bit we must
> perform the in-place decryption so that original assignment is preserved
> after we change the C-bit. Tom's SME patch [1] added sme_early_decrypt()
> -- which can be used to perform the in-place decryption but we do not have
> similar routine for non-early cases. In order to address your feedback,
> we have to add similar functions. So far, we have not seen the need for
> having such functions except this cases. The approach we have right now
> works just fine and not sure if its worth adding new functions.
>
> Thoughts ?
>
> [1] Commit :7f8b7e7 x86/mm: Add support for early encryption/decryption of
> memory
Shouldn't this be called DEFINE_PER_CPU_UNENCRYPTED? ISTM the "HV
shared" bit is incidental.
^ permalink raw reply [flat|nested] 79+ messages in thread
[parent not found: <8155b5b2-b2b3-bc8f-33ae-b81b661a2e38-5C7GfCeVMHo@public.gmane.org>]
* Re: [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables
[not found] ` <8155b5b2-b2b3-bc8f-33ae-b81b661a2e38-5C7GfCeVMHo@public.gmane.org>
@ 2017-09-04 17:05 ` Borislav Petkov
2017-09-04 17:47 ` Brijesh Singh
0 siblings, 1 reply; 79+ messages in thread
From: Borislav Petkov @ 2017-09-04 17:05 UTC (permalink / raw)
To: Brijesh Singh
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA,
Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric Biederman, Benjamin
On Fri, Sep 01, 2017 at 05:52:13PM -0500, Brijesh Singh wrote:
> So far, we have not seen the need for having such functions except
> this cases. The approach we have right now works just fine and not
> sure if its worth adding new functions.
Then put the call to kvm_map_hv_shared_decrypted() into
kvm_smp_prepare_boot_cpu() to denote that you're executing this whole
stuff only once during guest init.
Now you're doing additional jumping-through-hoops with that once static
var just so you can force something which needs to execute only once but
gets called in a per-CPU path.
See what I mean?
> Thoughts ?
>
> [1] Commit :7f8b7e7 x86/mm: Add support for early encryption/decryption of memory
Add
[core]
abbrev = 12
to the core section of your .gitconfig.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 16/17] X86/KVM: Provide support to create Guest and HV shared per-CPU variables
2017-09-04 17:05 ` Borislav Petkov
@ 2017-09-04 17:47 ` Brijesh Singh
0 siblings, 0 replies; 79+ messages in thread
From: Brijesh Singh @ 2017-09-04 17:47 UTC (permalink / raw)
To: Borislav Petkov
Cc: brijesh.singh, linux-kernel, x86, linux-efi, linuxppc-dev, kvm,
Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andy Lutomirski,
Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric Biederman
On 9/4/17 12:05 PM, Borislav Petkov wrote:
> On Fri, Sep 01, 2017 at 05:52:13PM -0500, Brijesh Singh wrote:
>> So far, we have not seen the need for having such functions except
>> this cases. The approach we have right now works just fine and not
>> sure if its worth adding new functions.
> Then put the call to kvm_map_hv_shared_decrypted() into
> kvm_smp_prepare_boot_cpu() to denote that you're executing this whole
> stuff only once during guest init.
>
> Now you're doing additional jumping-through-hoops with that once static
> var just so you can force something which needs to execute only once but
> gets called in a per-CPU path.
>
> See what I mean?
Yes, I see your point. I will address this issue in next rev.
-Brijesh
^ permalink raw reply [flat|nested] 79+ messages in thread
[parent not found: <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>]
* [RFC Part1 PATCH v3 01/17] Documentation/x86: Add AMD Secure Encrypted Virtualization (SEV) descrption
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
@ 2017-07-24 19:07 ` Brijesh Singh
[not found] ` <20170724190757.11278-2-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 04/17] x86/mm: Don't attempt to encrypt initrd under SEV Brijesh Singh
` (6 subsequent siblings)
7 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
Update amd-memory-encryption document describing the AMD Secure Encrypted
Virtualization (SEV) feature.
Signed-off-by: Brijesh Singh <brijesh.singh-5C7GfCeVMHo@public.gmane.org>
---
Documentation/x86/amd-memory-encryption.txt | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/Documentation/x86/amd-memory-encryption.txt b/Documentation/x86/amd-memory-encryption.txt
index f512ab7..747df07 100644
--- a/Documentation/x86/amd-memory-encryption.txt
+++ b/Documentation/x86/amd-memory-encryption.txt
@@ -1,4 +1,5 @@
-Secure Memory Encryption (SME) is a feature found on AMD processors.
+Secure Memory Encryption (SME) and Secure Encrypted Virtualization (SEV) are
+features found on AMD processors.
SME provides the ability to mark individual pages of memory as encrypted using
the standard x86 page tables. A page that is marked encrypted will be
@@ -6,6 +7,12 @@ automatically decrypted when read from DRAM and encrypted when written to
DRAM. SME can therefore be used to protect the contents of DRAM from physical
attacks on the system.
+SEV enables running encrypted virtual machine (VMs) in which the code and data
+of the virtual machine are secured so that decrypted version is available only
+within the VM itself. SEV guest VMs have concept of private and shared memory.
+Private memory is encrypted with the guest-specific key, while shared memory
+may be encrypted with hypervisor key.
+
A page is encrypted when a page table entry has the encryption bit set (see
below on how to determine its position). The encryption bit can also be
specified in the cr3 register, allowing the PGD table to be encrypted. Each
@@ -19,11 +26,20 @@ so that the PGD is encrypted, but not set the encryption bit in the PGD entry
for a PUD which results in the PUD pointed to by that entry to not be
encrypted.
-Support for SME can be determined through the CPUID instruction. The CPUID
-function 0x8000001f reports information related to SME:
+When SEV is enabled, certain type of memory (namely insruction pages and guest
+page tables) are always treated as private. Due to security reasons all DMA
+operations inside the guest must be performed on shared memory. Since the
+memory encryption bit is only controllable by the guest OS when it is operating
+in 64-bit or 32-bit PAE mode, in all other modes the SEV hardware forces memory
+encryption bit to 1.
+
+Support for SME and SEV can be determined through the CPUID instruction. The
+CPUID function 0x8000001f reports information related to SME:
0x8000001f[eax]:
Bit[0] indicates support for SME
+ 0x800001f[eax]:
+ Bit[1] indicates support for SEV
0x8000001f[ebx]:
Bits[5:0] pagetable bit number used to activate memory
encryption
@@ -39,6 +55,13 @@ determine if SME is enabled and/or to enable memory encryption:
Bit[23] 0 = memory encryption features are disabled
1 = memory encryption features are enabled
+If SEV is supported, MSR 0xc0010131 (MSR_F17H_SEV) can be used to determine if
+SEV is active:
+
+ 0xc0010131:
+ Bit[0] 0 = memory encryption is not active
+ 1 = memory encryption is active
+
Linux relies on BIOS to set this bit if BIOS has determined that the reduction
in the physical address space as a result of enabling memory encryption (see
CPUID information above) will not conflict with the address space resource
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 04/17] x86/mm: Don't attempt to encrypt initrd under SEV
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 01/17] Documentation/x86: Add AMD Secure Encrypted Virtualization (SEV) descrption Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-07-26 14:44 ` Borislav Petkov
2017-07-24 19:07 ` [RFC Part1 PATCH v3 05/17] x86, realmode: Don't decrypt trampoline area " Brijesh Singh
` (5 subsequent siblings)
7 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
When SEV is active the initrd/initramfs will already have already been
placed in memory encyrpted so do not try to encrypt it.
Signed-off-by: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
Signed-off-by: Brijesh Singh <brijesh.singh-5C7GfCeVMHo@public.gmane.org>
---
arch/x86/kernel/setup.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0bfe0c1..01d56a1 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -379,9 +379,11 @@ static void __init reserve_initrd(void)
* If SME is active, this memory will be marked encrypted by the
* kernel when it is accessed (including relocation). However, the
* ramdisk image was loaded decrypted by the bootloader, so make
- * sure that it is encrypted before accessing it.
+ * sure that it is encrypted before accessing it. For SEV the
+ * ramdisk will already be encyrpted, so only do this for SME.
*/
- sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
+ if (sme_active())
+ sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
initrd_start = 0;
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 04/17] x86/mm: Don't attempt to encrypt initrd under SEV
2017-07-24 19:07 ` [RFC Part1 PATCH v3 04/17] x86/mm: Don't attempt to encrypt initrd under SEV Brijesh Singh
@ 2017-07-26 14:44 ` Borislav Petkov
0 siblings, 0 replies; 79+ messages in thread
From: Borislav Petkov @ 2017-07-26 14:44 UTC (permalink / raw)
To: Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric
On Mon, Jul 24, 2017 at 02:07:44PM -0500, Brijesh Singh wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
>
> When SEV is active the initrd/initramfs will already have already been
> placed in memory encyrpted so do not try to encrypt it.
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> arch/x86/kernel/setup.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 0bfe0c1..01d56a1 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -379,9 +379,11 @@ static void __init reserve_initrd(void)
> * If SME is active, this memory will be marked encrypted by the
> * kernel when it is accessed (including relocation). However, the
> * ramdisk image was loaded decrypted by the bootloader, so make
> - * sure that it is encrypted before accessing it.
> + * sure that it is encrypted before accessing it. For SEV the
> + * ramdisk will already be encyrpted, so only do this for SME.
^^^^^^^^^
Please introduce a spellchecker into your patch creation workflow.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 05/17] x86, realmode: Don't decrypt trampoline area under SEV
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 01/17] Documentation/x86: Add AMD Secure Encrypted Virtualization (SEV) descrption Brijesh Singh
2017-07-24 19:07 ` [RFC Part1 PATCH v3 04/17] x86/mm: Don't attempt to encrypt initrd under SEV Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
[not found] ` <20170724190757.11278-6-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
2017-07-24 19:07 ` [RFC Part1 PATCH v3 06/17] x86/mm: Use encrypted access of boot related data with SEV Brijesh Singh
` (4 subsequent siblings)
7 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
When SEV is active the trampoline area will need to be in encrypted
memory so only mark the area decrypted if SME is active.
Signed-off-by: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
Signed-off-by: Brijesh Singh <brijesh.singh-5C7GfCeVMHo@public.gmane.org>
---
arch/x86/realmode/init.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 1f71980..c7eeca7 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -63,9 +63,11 @@ static void __init setup_real_mode(void)
/*
* If SME is active, the trampoline area will need to be in
* decrypted memory in order to bring up other processors
- * successfully.
+ * successfully. For SEV the trampoline area needs to be in
+ * encrypted memory, so only do this for SME.
*/
- set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT);
+ if (sme_active())
+ set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT);
memcpy(base, real_mode_blob, size);
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 06/17] x86/mm: Use encrypted access of boot related data with SEV
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
` (2 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 05/17] x86, realmode: Don't decrypt trampoline area " Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-07-27 13:31 ` Borislav Petkov
2017-07-24 19:07 ` [RFC Part1 PATCH v3 09/17] resource: Consolidate resource walking code Brijesh Singh
` (3 subsequent siblings)
7 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
When Secure Encrypted Virtualization (SEV) is active, boot data (such as
EFI related data, setup data) is encrypted and needs to be accessed as
such when mapped. Update the architecture override in early_memremap to
keep the encryption attribute when mapping this data.
Signed-off-by: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
Signed-off-by: Brijesh Singh <brijesh.singh-5C7GfCeVMHo@public.gmane.org>
---
arch/x86/mm/ioremap.c | 44 ++++++++++++++++++++++++++++++++------------
1 file changed, 32 insertions(+), 12 deletions(-)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 34f0e18..c0be7cf 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -422,6 +422,9 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
* areas should be mapped decrypted. And since the encryption key can
* change across reboots, persistent memory should also be mapped
* decrypted.
+ *
+ * If SEV is active, that implies that BIOS/UEFI also ran encrypted so
+ * only persistent memory should be mapped decrypted.
*/
static bool memremap_should_map_decrypted(resource_size_t phys_addr,
unsigned long size)
@@ -458,6 +461,11 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr,
case E820_TYPE_ACPI:
case E820_TYPE_NVS:
case E820_TYPE_UNUSABLE:
+ /* For SEV, these areas are encrypted */
+ if (sev_active())
+ break;
+ /* Fallthrough */
+
case E820_TYPE_PRAM:
return true;
default:
@@ -581,7 +589,7 @@ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
unsigned long flags)
{
- if (!sme_active())
+ if (!sme_active() && !sev_active())
return true;
if (flags & MEMREMAP_ENC)
@@ -590,10 +598,15 @@ bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
if (flags & MEMREMAP_DEC)
return false;
- if (memremap_is_setup_data(phys_addr, size) ||
- memremap_is_efi_data(phys_addr, size) ||
- memremap_should_map_decrypted(phys_addr, size))
- return false;
+ if (sme_active()) {
+ if (memremap_is_setup_data(phys_addr, size) ||
+ memremap_is_efi_data(phys_addr, size) ||
+ memremap_should_map_decrypted(phys_addr, size))
+ return false;
+ } else if (sev_active()) {
+ if (memremap_should_map_decrypted(phys_addr, size))
+ return false;
+ }
return true;
}
@@ -608,15 +621,22 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
unsigned long size,
pgprot_t prot)
{
- if (!sme_active())
+ if (!sme_active() && !sev_active())
return prot;
- if (early_memremap_is_setup_data(phys_addr, size) ||
- memremap_is_efi_data(phys_addr, size) ||
- memremap_should_map_decrypted(phys_addr, size))
- prot = pgprot_decrypted(prot);
- else
- prot = pgprot_encrypted(prot);
+ if (sme_active()) {
+ if (early_memremap_is_setup_data(phys_addr, size) ||
+ memremap_is_efi_data(phys_addr, size) ||
+ memremap_should_map_decrypted(phys_addr, size))
+ prot = pgprot_decrypted(prot);
+ else
+ prot = pgprot_encrypted(prot);
+ } else if (sev_active()) {
+ if (memremap_should_map_decrypted(phys_addr, size))
+ prot = pgprot_decrypted(prot);
+ else
+ prot = pgprot_encrypted(prot);
+ }
return prot;
}
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 06/17] x86/mm: Use encrypted access of boot related data with SEV
2017-07-24 19:07 ` [RFC Part1 PATCH v3 06/17] x86/mm: Use encrypted access of boot related data with SEV Brijesh Singh
@ 2017-07-27 13:31 ` Borislav Petkov
2017-08-17 18:05 ` Tom Lendacky
0 siblings, 1 reply; 79+ messages in thread
From: Borislav Petkov @ 2017-07-27 13:31 UTC (permalink / raw)
To: Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric
On Mon, Jul 24, 2017 at 02:07:46PM -0500, Brijesh Singh wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
>
> When Secure Encrypted Virtualization (SEV) is active, boot data (such as
> EFI related data, setup data) is encrypted and needs to be accessed as
> such when mapped. Update the architecture override in early_memremap to
> keep the encryption attribute when mapping this data.
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> arch/x86/mm/ioremap.c | 44 ++++++++++++++++++++++++++++++++------------
> 1 file changed, 32 insertions(+), 12 deletions(-)
...
> @@ -590,10 +598,15 @@ bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
> if (flags & MEMREMAP_DEC)
> return false;
>
> - if (memremap_is_setup_data(phys_addr, size) ||
> - memremap_is_efi_data(phys_addr, size) ||
> - memremap_should_map_decrypted(phys_addr, size))
> - return false;
> + if (sme_active()) {
> + if (memremap_is_setup_data(phys_addr, size) ||
> + memremap_is_efi_data(phys_addr, size) ||
> + memremap_should_map_decrypted(phys_addr, size))
> + return false;
> + } else if (sev_active()) {
> + if (memremap_should_map_decrypted(phys_addr, size))
> + return false;
> + }
>
> return true;
> }
I guess this function's hind part can be simplified to:
if (sme_active()) {
if (memremap_is_setup_data(phys_addr, size) ||
memremap_is_efi_data(phys_addr, size))
return false;
}
return ! memremap_should_map_decrypted(phys_addr, size);
}
> @@ -608,15 +621,22 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
> unsigned long size,
> pgprot_t prot)
And this one in a similar manner...
> {
> - if (!sme_active())
> + if (!sme_active() && !sev_active())
> return prot;
... and you don't need that check...
> - if (early_memremap_is_setup_data(phys_addr, size) ||
> - memremap_is_efi_data(phys_addr, size) ||
> - memremap_should_map_decrypted(phys_addr, size))
> - prot = pgprot_decrypted(prot);
> - else
> - prot = pgprot_encrypted(prot);
> + if (sme_active()) {
... if you're going to do it here too.
> + if (early_memremap_is_setup_data(phys_addr, size) ||
> + memremap_is_efi_data(phys_addr, size) ||
> + memremap_should_map_decrypted(phys_addr, size))
> + prot = pgprot_decrypted(prot);
> + else
> + prot = pgprot_encrypted(prot);
> + } else if (sev_active()) {
And here.
> + if (memremap_should_map_decrypted(phys_addr, size))
> + prot = pgprot_decrypted(prot);
> + else
> + prot = pgprot_encrypted(prot);
> + }
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 06/17] x86/mm: Use encrypted access of boot related data with SEV
2017-07-27 13:31 ` Borislav Petkov
@ 2017-08-17 18:05 ` Tom Lendacky
0 siblings, 0 replies; 79+ messages in thread
From: Tom Lendacky @ 2017-08-17 18:05 UTC (permalink / raw)
To: Borislav Petkov, Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Fenghua Yu, Lu Baolu, Reza Arbab, David Howells,
Matt Fleming, Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel,
Andrew Morton, Eric Biederman, Benjamin
On 7/27/2017 8:31 AM, Borislav Petkov wrote:
> On Mon, Jul 24, 2017 at 02:07:46PM -0500, Brijesh Singh wrote:
>> From: Tom Lendacky <thomas.lendacky@amd.com>
>>
>> When Secure Encrypted Virtualization (SEV) is active, boot data (such as
>> EFI related data, setup data) is encrypted and needs to be accessed as
>> such when mapped. Update the architecture override in early_memremap to
>> keep the encryption attribute when mapping this data.
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>> ---
>> arch/x86/mm/ioremap.c | 44 ++++++++++++++++++++++++++++++++------------
>> 1 file changed, 32 insertions(+), 12 deletions(-)
>
> ...
>
>> @@ -590,10 +598,15 @@ bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
>> if (flags & MEMREMAP_DEC)
>> return false;
>>
>> - if (memremap_is_setup_data(phys_addr, size) ||
>> - memremap_is_efi_data(phys_addr, size) ||
>> - memremap_should_map_decrypted(phys_addr, size))
>> - return false;
>> + if (sme_active()) {
>> + if (memremap_is_setup_data(phys_addr, size) ||
>> + memremap_is_efi_data(phys_addr, size) ||
>> + memremap_should_map_decrypted(phys_addr, size))
>> + return false;
>> + } else if (sev_active()) {
>> + if (memremap_should_map_decrypted(phys_addr, size))
>> + return false;
>> + }
>>
>> return true;
>> }
>
> I guess this function's hind part can be simplified to:
>
> if (sme_active()) {
> if (memremap_is_setup_data(phys_addr, size) ||
> memremap_is_efi_data(phys_addr, size))
> return false;
> }
>
> return ! memremap_should_map_decrypted(phys_addr, size);
> }
>
Ok, definitely cleaner.
>> @@ -608,15 +621,22 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
>> unsigned long size,
>> pgprot_t prot)
>
> And this one in a similar manner...
>
>> {
>> - if (!sme_active())
>> + if (!sme_active() && !sev_active())
>> return prot;
>
> ... and you don't need that check...
>
>> - if (early_memremap_is_setup_data(phys_addr, size) ||
>> - memremap_is_efi_data(phys_addr, size) ||
>> - memremap_should_map_decrypted(phys_addr, size))
>> - prot = pgprot_decrypted(prot);
>> - else
>> - prot = pgprot_encrypted(prot);
>> + if (sme_active()) {
>
> ... if you're going to do it here too.
>
>> + if (early_memremap_is_setup_data(phys_addr, size) ||
>> + memremap_is_efi_data(phys_addr, size) ||
>> + memremap_should_map_decrypted(phys_addr, size))
>> + prot = pgprot_decrypted(prot);
>> + else
>> + prot = pgprot_encrypted(prot);
>> + } else if (sev_active()) {
>
> And here.
Will do.
Thanks,
Tom
>
>> + if (memremap_should_map_decrypted(phys_addr, size))
>> + prot = pgprot_decrypted(prot);
>> + else
>> + prot = pgprot_encrypted(prot);
>> + }
>
^ permalink raw reply [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 09/17] resource: Consolidate resource walking code
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
` (3 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 06/17] x86/mm: Use encrypted access of boot related data with SEV Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-07-28 15:23 ` Borislav Petkov
2017-07-24 19:07 ` [RFC Part1 PATCH v3 12/17] x86/mm: DMA support for SEV memory encryption Brijesh Singh
` (2 subsequent siblings)
7 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
The walk_iomem_res_desc(), walk_system_ram_res() and walk_system_ram_range()
functions each have much of the same code. Create a new function that
consolidates the common code from these functions in one place to reduce
the amount of duplicated code.
Signed-off-by: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
Signed-off-by: Brijesh Singh <brijesh.singh-5C7GfCeVMHo@public.gmane.org>
---
kernel/resource.c | 53 ++++++++++++++++++++++++++---------------------------
1 file changed, 26 insertions(+), 27 deletions(-)
diff --git a/kernel/resource.c b/kernel/resource.c
index 9b5f044..7b20b3e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -397,9 +397,30 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc,
res->start = p->start;
if (res->end > p->end)
res->end = p->end;
+ res->desc = p->desc;
return 0;
}
+static int __walk_iomem_res_desc(struct resource *res, unsigned long desc,
+ bool first_level_children_only,
+ void *arg, int (*func)(u64, u64, void *))
+{
+ u64 orig_end = res->end;
+ int ret = -1;
+
+ while ((res->start < res->end) &&
+ !find_next_iomem_res(res, desc, first_level_children_only)) {
+ ret = (*func)(res->start, res->end, arg);
+ if (ret)
+ break;
+
+ res->start = res->end + 1;
+ res->end = orig_end;
+ }
+
+ return ret;
+}
+
/*
* Walks through iomem resources and calls func() with matching resource
* ranges. This walks through whole tree and not just first level children.
@@ -418,26 +439,12 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
u64 end, void *arg, int (*func)(u64, u64, void *))
{
struct resource res;
- u64 orig_end;
- int ret = -1;
res.start = start;
res.end = end;
res.flags = flags;
- orig_end = res.end;
-
- while ((res.start < res.end) &&
- (!find_next_iomem_res(&res, desc, false))) {
-
- ret = (*func)(res.start, res.end, arg);
- if (ret)
- break;
-
- res.start = res.end + 1;
- res.end = orig_end;
- }
- return ret;
+ return __walk_iomem_res_desc(&res, desc, false, arg, func);
}
/*
@@ -451,22 +458,13 @@ int walk_system_ram_res(u64 start, u64 end, void *arg,
int (*func)(u64, u64, void *))
{
struct resource res;
- u64 orig_end;
- int ret = -1;
res.start = start;
res.end = end;
res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
- orig_end = res.end;
- while ((res.start < res.end) &&
- (!find_next_iomem_res(&res, IORES_DESC_NONE, true))) {
- ret = (*func)(res.start, res.end, arg);
- if (ret)
- break;
- res.start = res.end + 1;
- res.end = orig_end;
- }
- return ret;
+
+ return __walk_iomem_res_desc(&res, IORES_DESC_NONE, true,
+ arg, func);
}
#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
@@ -508,6 +506,7 @@ static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
{
return 1;
}
+
/*
* This generic page_is_ram() returns true if specified address is
* registered as System RAM in iomem_resource list.
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 09/17] resource: Consolidate resource walking code
2017-07-24 19:07 ` [RFC Part1 PATCH v3 09/17] resource: Consolidate resource walking code Brijesh Singh
@ 2017-07-28 15:23 ` Borislav Petkov
[not found] ` <20170728152342.GB11564-K5JNixvcfoxupOikMc4+xw@public.gmane.org>
0 siblings, 1 reply; 79+ messages in thread
From: Borislav Petkov @ 2017-07-28 15:23 UTC (permalink / raw)
To: Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric
On Mon, Jul 24, 2017 at 02:07:49PM -0500, Brijesh Singh wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
>
> The walk_iomem_res_desc(), walk_system_ram_res() and walk_system_ram_range()
> functions each have much of the same code. Create a new function that
> consolidates the common code from these functions in one place to reduce
> the amount of duplicated code.
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> kernel/resource.c | 53 ++++++++++++++++++++++++++---------------------------
> 1 file changed, 26 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/resource.c b/kernel/resource.c
> index 9b5f044..7b20b3e 100644
> --- a/kernel/resource.c
> +++ b/kernel/resource.c
> @@ -397,9 +397,30 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc,
> res->start = p->start;
> if (res->end > p->end)
> res->end = p->end;
> + res->desc = p->desc;
> return 0;
I must be going blind: where are we using that res->desc?
> +static int __walk_iomem_res_desc(struct resource *res, unsigned long desc,
> + bool first_level_children_only,
Btw, that variable name is insanely long.
The rest looks ok to me, thanks for the cleanup!
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 12/17] x86/mm: DMA support for SEV memory encryption
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
` (4 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 09/17] resource: Consolidate resource walking code Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-08-07 3:48 ` Borislav Petkov
2017-07-24 19:07 ` [RFC Part1 PATCH v3 14/17] x86/boot: Add early boot support when running with SEV active Brijesh Singh
2017-07-24 19:07 ` [RFC Part1 PATCH v3 17/17] X86/KVM: Clear encryption attribute when SEV is active Brijesh Singh
7 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
DMA access to memory mapped as encrypted while SEV is active can not be
encrypted during device write or decrypted during device read. In order
for DMA to properly work when SEV is active, the SWIOTLB bounce buffers
must be used.
Signed-off-by: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
Signed-off-by: Brijesh Singh <brijesh.singh-5C7GfCeVMHo@public.gmane.org>
---
arch/x86/mm/mem_encrypt.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++
lib/swiotlb.c | 5 +--
2 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1e4643e..5e5d460 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -191,8 +191,86 @@ void __init sme_early_init(void)
/* Update the protection map with memory encryption mask */
for (i = 0; i < ARRAY_SIZE(protection_map); i++)
protection_map[i] = pgprot_encrypted(protection_map[i]);
+
+ if (sev_active())
+ swiotlb_force = SWIOTLB_FORCE;
+}
+
+static void *sme_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, unsigned long attrs)
+{
+ unsigned long dma_mask;
+ unsigned int order;
+ struct page *page;
+ void *vaddr = NULL;
+
+ dma_mask = dma_alloc_coherent_mask(dev, gfp);
+ order = get_order(size);
+
+ /*
+ * Memory will be memset to zero after marking decrypted, so don't
+ * bother clearing it before.
+ */
+ gfp &= ~__GFP_ZERO;
+
+ page = alloc_pages_node(dev_to_node(dev), gfp, order);
+ if (page) {
+ dma_addr_t addr;
+
+ /*
+ * Since we will be clearing the encryption bit, check the
+ * mask with it already cleared.
+ */
+ addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
+ if ((addr + size) > dma_mask) {
+ __free_pages(page, get_order(size));
+ } else {
+ vaddr = page_address(page);
+ *dma_handle = addr;
+ }
+ }
+
+ if (!vaddr)
+ vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
+
+ if (!vaddr)
+ return NULL;
+
+ /* Clear the SME encryption bit for DMA use if not swiotlb area */
+ if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) {
+ set_memory_decrypted((unsigned long)vaddr, 1 << order);
+ memset(vaddr, 0, PAGE_SIZE << order);
+ *dma_handle = __sme_clr(*dma_handle);
+ }
+
+ return vaddr;
+}
+
+static void sme_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
+{
+ /* Set the SME encryption bit for re-use if not swiotlb area */
+ if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle)))
+ set_memory_encrypted((unsigned long)vaddr,
+ 1 << get_order(size));
+
+ swiotlb_free_coherent(dev, size, vaddr, dma_handle);
}
+static const struct dma_map_ops sme_dma_ops = {
+ .alloc = sme_alloc,
+ .free = sme_free,
+ .map_page = swiotlb_map_page,
+ .unmap_page = swiotlb_unmap_page,
+ .map_sg = swiotlb_map_sg_attrs,
+ .unmap_sg = swiotlb_unmap_sg_attrs,
+ .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+ .sync_single_for_device = swiotlb_sync_single_for_device,
+ .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+ .sync_sg_for_device = swiotlb_sync_sg_for_device,
+ .mapping_error = swiotlb_dma_mapping_error,
+};
+
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void)
{
@@ -202,6 +280,14 @@ void __init mem_encrypt_init(void)
/* Call into SWIOTLB to update the SWIOTLB DMA buffers */
swiotlb_update_mem_attributes();
+ /*
+ * With SEV, DMA operations cannot use encryption. New DMA ops
+ * are required in order to mark the DMA areas as decrypted or
+ * to use bounce buffers.
+ */
+ if (sev_active())
+ dma_ops = &sme_dma_ops;
+
pr_info("AMD Secure Memory Encryption (SME) active\n");
}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 8c6c83e..85fed2f 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -507,8 +507,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
if (no_iotlb_memory)
panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
- if (sme_active())
- pr_warn_once("SME is active and system is using DMA bounce buffers\n");
+ if (sme_active() || sev_active())
+ pr_warn_once("%s is active and system is using DMA bounce buffers\n",
+ sme_active() ? "SME" : "SEV");
mask = dma_get_seg_boundary(hwdev);
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 12/17] x86/mm: DMA support for SEV memory encryption
2017-07-24 19:07 ` [RFC Part1 PATCH v3 12/17] x86/mm: DMA support for SEV memory encryption Brijesh Singh
@ 2017-08-07 3:48 ` Borislav Petkov
2017-08-17 19:35 ` Tom Lendacky
0 siblings, 1 reply; 79+ messages in thread
From: Borislav Petkov @ 2017-08-07 3:48 UTC (permalink / raw)
To: Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Tom Lendacky, Fenghua Yu, Lu Baolu, Reza Arbab,
David Howells, Matt Fleming, Kirill A . Shutemov, Laura Abbott,
Ard Biesheuvel, Andrew Morton, Eric
On Mon, Jul 24, 2017 at 02:07:52PM -0500, Brijesh Singh wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
>
> DMA access to memory mapped as encrypted while SEV is active can not be
> encrypted during device write or decrypted during device read.
Yeah, definitely rewrite that sentence.
> In order
> for DMA to properly work when SEV is active, the SWIOTLB bounce buffers
> must be used.
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> arch/x86/mm/mem_encrypt.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++
> lib/swiotlb.c | 5 +--
> 2 files changed, 89 insertions(+), 2 deletions
...
> @@ -202,6 +280,14 @@ void __init mem_encrypt_init(void)
> /* Call into SWIOTLB to update the SWIOTLB DMA buffers */
> swiotlb_update_mem_attributes();
>
> + /*
> + * With SEV, DMA operations cannot use encryption. New DMA ops
> + * are required in order to mark the DMA areas as decrypted or
> + * to use bounce buffers.
> + */
> + if (sev_active())
> + dma_ops = &sme_dma_ops;
Well, we do differentiate between SME and SEV and the check is
sev_active but the ops are called sme_dma_ops. Call them sev_dma_ops
instead for less confusion.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 12/17] x86/mm: DMA support for SEV memory encryption
2017-08-07 3:48 ` Borislav Petkov
@ 2017-08-17 19:35 ` Tom Lendacky
0 siblings, 0 replies; 79+ messages in thread
From: Tom Lendacky @ 2017-08-17 19:35 UTC (permalink / raw)
To: Borislav Petkov, Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Fenghua Yu, Lu Baolu, Reza Arbab, David Howells,
Matt Fleming, Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel,
Andrew Morton, Eric Biederman, Benjamin
On 8/6/2017 10:48 PM, Borislav Petkov wrote:
> On Mon, Jul 24, 2017 at 02:07:52PM -0500, Brijesh Singh wrote:
>> From: Tom Lendacky <thomas.lendacky@amd.com>
>>
>> DMA access to memory mapped as encrypted while SEV is active can not be
>> encrypted during device write or decrypted during device read.
>
> Yeah, definitely rewrite that sentence.
Heh, yup.
>
>> In order
>> for DMA to properly work when SEV is active, the SWIOTLB bounce buffers
>> must be used.
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>> ---
>> arch/x86/mm/mem_encrypt.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++
>> lib/swiotlb.c | 5 +--
>> 2 files changed, 89 insertions(+), 2 deletions
>
> ...
>
>> @@ -202,6 +280,14 @@ void __init mem_encrypt_init(void)
>> /* Call into SWIOTLB to update the SWIOTLB DMA buffers */
>> swiotlb_update_mem_attributes();
>>
>> + /*
>> + * With SEV, DMA operations cannot use encryption. New DMA ops
>> + * are required in order to mark the DMA areas as decrypted or
>> + * to use bounce buffers.
>> + */
>> + if (sev_active())
>> + dma_ops = &sme_dma_ops;
>
> Well, we do differentiate between SME and SEV and the check is
> sev_active but the ops are called sme_dma_ops. Call them sev_dma_ops
> instead for less confusion.
Yup, will do.
Thanks,
Tom
>
^ permalink raw reply [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 14/17] x86/boot: Add early boot support when running with SEV active
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
` (5 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 12/17] x86/mm: DMA support for SEV memory encryption Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
2017-08-23 15:30 ` Borislav Petkov
2017-07-24 19:07 ` [RFC Part1 PATCH v3 17/17] X86/KVM: Clear encryption attribute when SEV is active Brijesh Singh
7 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
From: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
Early in the boot process, add checks to determine if the kernel is
running with Secure Encrypted Virtualization (SEV) active.
Checking for SEV requires checking that the kernel is running under a
hypervisor (CPUID 0x00000001, bit 31), that the SEV feature is available
(CPUID 0x8000001f, bit 1) and then check a non-interceptable SEV MSR
(0xc0010131, bit 0).
This check is required so that during early compressed kernel booting the
pagetables (both the boot pagetables and KASLR pagetables (if enabled) are
updated to include the encryption mask so that when the kernel is
decompressed into encrypted memory.
After the kernel is decompressed and continues booting the same logic is
used to check if SEV is active and set a flag indicating so. This allows
us to distinguish between SME and SEV, each of which have unique
differences in how certain things are handled: e.g. DMA (always bounce
buffered with SEV) or EFI tables (always access decrypted with SME).
Signed-off-by: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
Signed-off-by: Brijesh Singh <brijesh.singh-5C7GfCeVMHo@public.gmane.org>
---
arch/x86/boot/compressed/Makefile | 2 +
arch/x86/boot/compressed/head_64.S | 16 +++++
arch/x86/boot/compressed/mem_encrypt.S | 103 +++++++++++++++++++++++++++++++++
arch/x86/boot/compressed/misc.h | 2 +
arch/x86/boot/compressed/pagetable.c | 8 ++-
arch/x86/include/asm/mem_encrypt.h | 3 +
arch/x86/include/asm/msr-index.h | 3 +
arch/x86/include/uapi/asm/kvm_para.h | 1 -
arch/x86/mm/mem_encrypt.c | 42 +++++++++++---
9 files changed, 169 insertions(+), 11 deletions(-)
create mode 100644 arch/x86/boot/compressed/mem_encrypt.S
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 2c860ad..d2fe901 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -72,6 +72,8 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
$(obj)/piggy.o $(obj)/cpuflags.o
+vmlinux-objs-$(CONFIG_X86_64) += $(obj)/mem_encrypt.o
+
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fbf4c32..6179d43 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -130,6 +130,19 @@ ENTRY(startup_32)
/*
* Build early 4G boot pagetable
*/
+ /*
+ * If SEV is active then set the encryption mask in the page tables.
+ * This will insure that when the kernel is copied and decompressed
+ * it will be done so encrypted.
+ */
+ call get_sev_encryption_bit
+ xorl %edx, %edx
+ testl %eax, %eax
+ jz 1f
+ subl $32, %eax /* Encryption bit is always above bit 31 */
+ bts %eax, %edx /* Set encryption mask for page tables */
+1:
+
/* Initialize Page tables to 0 */
leal pgtable(%ebx), %edi
xorl %eax, %eax
@@ -140,12 +153,14 @@ ENTRY(startup_32)
leal pgtable + 0(%ebx), %edi
leal 0x1007 (%edi), %eax
movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
/* Build Level 3 */
leal pgtable + 0x1000(%ebx), %edi
leal 0x1007(%edi), %eax
movl $4, %ecx
1: movl %eax, 0x00(%edi)
+ addl %edx, 0x04(%edi)
addl $0x00001000, %eax
addl $8, %edi
decl %ecx
@@ -156,6 +171,7 @@ ENTRY(startup_32)
movl $0x00000183, %eax
movl $2048, %ecx
1: movl %eax, 0(%edi)
+ addl %edx, 4(%edi)
addl $0x00200000, %eax
addl $8, %edi
decl %ecx
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
new file mode 100644
index 0000000..696716e
--- /dev/null
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -0,0 +1,103 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky-5C7GfCeVMHo@public.gmane.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/processor-flags.h>
+#include <asm/msr.h>
+#include <asm/asm-offsets.h>
+
+ .text
+ .code32
+ENTRY(get_sev_encryption_bit)
+ xor %eax, %eax
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ push %ebx
+ push %ecx
+ push %edx
+
+ /* Check if running under a hypervisor */
+ movl $1, %eax
+ cpuid
+ bt $31, %ecx /* Check the hypervisor bit */
+ jnc .Lno_sev
+
+ movl $0x80000000, %eax /* CPUID to check the highest leaf */
+ cpuid
+ cmpl $0x8000001f, %eax /* See if 0x8000001f is available */
+ jb .Lno_sev
+
+ /*
+ * Check for the SEV feature:
+ * CPUID Fn8000_001F[EAX] - Bit 1
+ * CPUID Fn8000_001F[EBX] - Bits 5:0
+ * Pagetable bit position used to indicate encryption
+ */
+ movl $0x8000001f, %eax
+ cpuid
+ bt $1, %eax /* Check if SEV is available */
+ jnc .Lno_sev
+
+ movl $MSR_F17H_SEV, %ecx /* Read the SEV MSR */
+ rdmsr
+ bt $MSR_F17H_SEV_ENABLED_BIT, %eax /* Check if SEV is active */
+ jnc .Lno_sev
+
+ /*
+ * Get memory encryption information:
+ */
+ movl %ebx, %eax
+ andl $0x3f, %eax /* Return the encryption bit location */
+ jmp .Lsev_exit
+
+.Lno_sev:
+ xor %eax, %eax
+
+.Lsev_exit:
+ pop %edx
+ pop %ecx
+ pop %ebx
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
+ ret
+ENDPROC(get_sev_encryption_bit)
+
+ .code64
+ENTRY(get_sev_encryption_mask)
+ xor %rax, %rax
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ push %rbp
+ push %rdx
+
+ movq %rsp, %rbp /* Save current stack pointer */
+
+ call get_sev_encryption_bit /* Get the encryption bit position */
+ testl %eax, %eax
+ jz .Lno_sev_mask
+
+ xor %rdx, %rdx
+ bts %rax, %rdx /* Create the encryption mask */
+ mov %rdx, %rax /* ... and return it */
+
+.Lno_sev_mask:
+ movq %rbp, %rsp /* Restore original stack pointer */
+
+ pop %rdx
+ pop %rbp
+
+#endif
+
+ ret
+ENDPROC(get_sev_encryption_mask)
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 766a521..38c5f0e 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -108,4 +108,6 @@ static inline void console_init(void)
{ }
#endif
+unsigned long get_sev_encryption_mask(void);
+
#endif
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index f1aa438..a577329 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -76,16 +76,18 @@ static unsigned long top_level_pgt;
* Mapping information structure passed to kernel_ident_mapping_init().
* Due to relocation, pointers must be assigned at run time not build time.
*/
-static struct x86_mapping_info mapping_info = {
- .page_flag = __PAGE_KERNEL_LARGE_EXEC,
-};
+static struct x86_mapping_info mapping_info;
/* Locates and clears a region for a new top level page table. */
void initialize_identity_maps(void)
{
+ unsigned long sev_me_mask = get_sev_encryption_mask();
+
/* Init mapping_info with run-time function/buffer pointers. */
mapping_info.alloc_pgt_page = alloc_pgt_page;
mapping_info.context = &pgt_data;
+ mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sev_me_mask;
+ mapping_info.kernpg_flag = _KERNPG_TABLE | sev_me_mask;
/*
* It should be impossible for this not to already be true,
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 9274ec7..9cb6472 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -19,6 +19,9 @@
#include <asm/bootparam.h>
+#define AMD_SME_FEATURE_BIT BIT(0)
+#define AMD_SEV_FEATURE_BIT BIT(1)
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern unsigned long sme_me_mask;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e399d68..530020f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -326,6 +326,9 @@
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_F17H_SEV 0xc0010131
+#define MSR_F17H_SEV_ENABLED_BIT 0
+#define MSR_F17H_SEV_ENABLED BIT_ULL(MSR_F17H_SEV_ENABLED_BIT)
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index a965e5b..c202ba3 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -109,5 +109,4 @@ struct kvm_vcpu_pv_apf_data {
#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
#define KVM_PV_EOI_DISABLED 0x0
-
#endif /* _UAPI_ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 5e5d460..ed8780e 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -288,7 +288,9 @@ void __init mem_encrypt_init(void)
if (sev_active())
dma_ops = &sme_dma_ops;
- pr_info("AMD Secure Memory Encryption (SME) active\n");
+ pr_info("AMD %s active\n",
+ sev_active() ? "Secure Encrypted Virtualization (SEV)"
+ : "Secure Memory Encryption (SME)");
}
void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
@@ -616,12 +618,23 @@ void __init __nostackprotector sme_enable(struct boot_params *bp)
{
const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
unsigned int eax, ebx, ecx, edx;
+ unsigned long feature_mask;
bool active_by_default;
unsigned long me_mask;
char buffer[16];
u64 msr;
- /* Check for the SME support leaf */
+ /*
+ * Set the feature mask (SME or SEV) based on whether we are
+ * running under a hypervisor.
+ */
+ eax = 1;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ feature_mask = (ecx & BIT(31)) ? AMD_SEV_FEATURE_BIT
+ : AMD_SME_FEATURE_BIT;
+
+ /* Check for the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
@@ -629,24 +642,39 @@ void __init __nostackprotector sme_enable(struct boot_params *bp)
return;
/*
- * Check for the SME feature:
+ * Check for the SME/SEV feature:
* CPUID Fn8000_001F[EAX] - Bit 0
* Secure Memory Encryption support
+ * CPUID Fn8000_001F[EAX] - Bit 1
+ * Secure Encrypted Virtualization support
* CPUID Fn8000_001F[EBX] - Bits 5:0
* Pagetable bit position used to indicate encryption
*/
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
- if (!(eax & 1))
+ if (!(eax & feature_mask))
return;
me_mask = 1UL << (ebx & 0x3f);
- /* Check if SME is enabled */
- msr = __rdmsr(MSR_K8_SYSCFG);
- if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ /* Check if memory encryption is enabled */
+ if (feature_mask == AMD_SME_FEATURE_BIT) {
+ /* For SME, check the SYSCFG MSR */
+ msr = __rdmsr(MSR_K8_SYSCFG);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ return;
+ } else {
+ /* For SEV, check the SEV MSR */
+ msr = __rdmsr(MSR_F17H_SEV);
+ if (!(msr & MSR_F17H_SEV_ENABLED))
+ return;
+
+ /* SEV state cannot be controlled by a command line option */
+ sme_me_mask = me_mask;
+ sev_enabled = 1;
return;
+ }
/*
* Fixups have not been applied to phys_base yet and we're running
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 14/17] x86/boot: Add early boot support when running with SEV active
2017-07-24 19:07 ` [RFC Part1 PATCH v3 14/17] x86/boot: Add early boot support when running with SEV active Brijesh Singh
@ 2017-08-23 15:30 ` Borislav Petkov
2017-08-24 18:54 ` Tom Lendacky
0 siblings, 1 reply; 79+ messages in thread
From: Borislav Petkov @ 2017-08-23 15:30 UTC (permalink / raw)
To: Brijesh Singh, Tom Lendacky
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Fenghua Yu, Lu Baolu, Reza Arbab, David Howells,
Matt Fleming, Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel,
Andrew Morton, Eric Biederman, Benjamin
On Mon, Jul 24, 2017 at 02:07:54PM -0500, Brijesh Singh wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
>
> Early in the boot process, add checks to determine if the kernel is
> running with Secure Encrypted Virtualization (SEV) active.
>
> Checking for SEV requires checking that the kernel is running under a
> hypervisor (CPUID 0x00000001, bit 31), that the SEV feature is available
> (CPUID 0x8000001f, bit 1) and then check a non-interceptable SEV MSR
> (0xc0010131, bit 0).
>
> This check is required so that during early compressed kernel booting the
> pagetables (both the boot pagetables and KASLR pagetables (if enabled) are
> updated to include the encryption mask so that when the kernel is
> decompressed into encrypted memory.
, it can boot properly.
:)
> After the kernel is decompressed and continues booting the same logic is
> used to check if SEV is active and set a flag indicating so. This allows
> us to distinguish between SME and SEV, each of which have unique
> differences in how certain things are handled: e.g. DMA (always bounce
> buffered with SEV) or EFI tables (always access decrypted with SME).
>
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
> arch/x86/boot/compressed/Makefile | 2 +
> arch/x86/boot/compressed/head_64.S | 16 +++++
> arch/x86/boot/compressed/mem_encrypt.S | 103 +++++++++++++++++++++++++++++++++
> arch/x86/boot/compressed/misc.h | 2 +
> arch/x86/boot/compressed/pagetable.c | 8 ++-
> arch/x86/include/asm/mem_encrypt.h | 3 +
> arch/x86/include/asm/msr-index.h | 3 +
> arch/x86/include/uapi/asm/kvm_para.h | 1 -
> arch/x86/mm/mem_encrypt.c | 42 +++++++++++---
> 9 files changed, 169 insertions(+), 11 deletions(-)
> create mode 100644 arch/x86/boot/compressed/mem_encrypt.S
>
> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> index 2c860ad..d2fe901 100644
> --- a/arch/x86/boot/compressed/Makefile
> +++ b/arch/x86/boot/compressed/Makefile
> @@ -72,6 +72,8 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
> $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
> $(obj)/piggy.o $(obj)/cpuflags.o
>
> +vmlinux-objs-$(CONFIG_X86_64) += $(obj)/mem_encrypt.o
There's a
ifdef CONFIG_X86_64
a couple of lines below. Just put it there.
...
> +++ b/arch/x86/boot/compressed/mem_encrypt.S
> @@ -0,0 +1,103 @@
> +/*
> + * AMD Memory Encryption Support
> + *
> + * Copyright (C) 2017 Advanced Micro Devices, Inc.
> + *
> + * Author: Tom Lendacky <thomas.lendacky@amd.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/linkage.h>
> +
> +#include <asm/processor-flags.h>
> +#include <asm/msr.h>
> +#include <asm/asm-offsets.h>
> +
> + .text
> + .code32
> +ENTRY(get_sev_encryption_bit)
> + xor %eax, %eax
> +
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + push %ebx
> + push %ecx
> + push %edx
> +
> + /* Check if running under a hypervisor */
> + movl $1, %eax
> + cpuid
> + bt $31, %ecx /* Check the hypervisor bit */
> + jnc .Lno_sev
> +
> + movl $0x80000000, %eax /* CPUID to check the highest leaf */
> + cpuid
> + cmpl $0x8000001f, %eax /* See if 0x8000001f is available */
> + jb .Lno_sev
> +
> + /*
> + * Check for the SEV feature:
> + * CPUID Fn8000_001F[EAX] - Bit 1
> + * CPUID Fn8000_001F[EBX] - Bits 5:0
> + * Pagetable bit position used to indicate encryption
> + */
> + movl $0x8000001f, %eax
> + cpuid
> + bt $1, %eax /* Check if SEV is available */
> + jnc .Lno_sev
> +
> + movl $MSR_F17H_SEV, %ecx /* Read the SEV MSR */
> + rdmsr
> + bt $MSR_F17H_SEV_ENABLED_BIT, %eax /* Check if SEV is active */
> + jnc .Lno_sev
> +
> + /*
> + * Get memory encryption information:
> + */
The side-comment is enough. This one above can go.
> + movl %ebx, %eax
> + andl $0x3f, %eax /* Return the encryption bit location */
> + jmp .Lsev_exit
> +
> +.Lno_sev:
> + xor %eax, %eax
> +
> +.Lsev_exit:
> + pop %edx
> + pop %ecx
> + pop %ebx
> +
> +#endif /* CONFIG_AMD_MEM_ENCRYPT */
> +
> + ret
> +ENDPROC(get_sev_encryption_bit)
> +
> + .code64
> +ENTRY(get_sev_encryption_mask)
> + xor %rax, %rax
> +
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + push %rbp
> + push %rdx
> +
> + movq %rsp, %rbp /* Save current stack pointer */
> +
> + call get_sev_encryption_bit /* Get the encryption bit position */
So we get to call get_sev_encryption_bit() here again and noodle through
the CPUID discovery and MSR access. We should instead cache that info
and return the encryption bit directly on a second call. (And initialize
it to -1 to denote that get_sev_encryption_bit() hasn't run yet).
...
> diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
> index 9274ec7..9cb6472 100644
> --- a/arch/x86/include/asm/mem_encrypt.h
> +++ b/arch/x86/include/asm/mem_encrypt.h
> @@ -19,6 +19,9 @@
>
> #include <asm/bootparam.h>
>
> +#define AMD_SME_FEATURE_BIT BIT(0)
> +#define AMD_SEV_FEATURE_BIT BIT(1)
s/_FEATURE//
AMD_SME_BIT and AMD_SEV_BIT is good enough :)
And frankly, if you're going to use them only below in sme_enable() - I
need to check more thoroughly the remaining patches - but if you only
are going to use them there, just define them inside the function so
that they're close.
> +
> #ifdef CONFIG_AMD_MEM_ENCRYPT
>
> extern unsigned long sme_me_mask;
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index e399d68..530020f 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -326,6 +326,9 @@
>
> /* Fam 17h MSRs */
> #define MSR_F17H_IRPERF 0xc00000e9
> +#define MSR_F17H_SEV 0xc0010131
If that MSR is going to be used later on - and I don't see why not - you
probably should make it an arch one: MSR_AMD64_SEV. Even if it isn't yet
officially. :-)
> +#define MSR_F17H_SEV_ENABLED_BIT 0
> +#define MSR_F17H_SEV_ENABLED BIT_ULL(MSR_F17H_SEV_ENABLED_BIT)
>
> /* Fam 16h MSRs */
> #define MSR_F16H_L2I_PERF_CTL 0xc0010230
> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> index a965e5b..c202ba3 100644
> --- a/arch/x86/include/uapi/asm/kvm_para.h
> +++ b/arch/x86/include/uapi/asm/kvm_para.h
> @@ -109,5 +109,4 @@ struct kvm_vcpu_pv_apf_data {
> #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
> #define KVM_PV_EOI_DISABLED 0x0
>
> -
> #endif /* _UAPI_ASM_X86_KVM_PARA_H */
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index 5e5d460..ed8780e 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -288,7 +288,9 @@ void __init mem_encrypt_init(void)
> if (sev_active())
> dma_ops = &sme_dma_ops;
>
> - pr_info("AMD Secure Memory Encryption (SME) active\n");
> + pr_info("AMD %s active\n",
> + sev_active() ? "Secure Encrypted Virtualization (SEV)"
> + : "Secure Memory Encryption (SME)");
> }
>
> void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
> @@ -616,12 +618,23 @@ void __init __nostackprotector sme_enable(struct boot_params *bp)
> {
> const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
> unsigned int eax, ebx, ecx, edx;
> + unsigned long feature_mask;
> bool active_by_default;
> unsigned long me_mask;
> char buffer[16];
> u64 msr;
>
> - /* Check for the SME support leaf */
> + /*
> + * Set the feature mask (SME or SEV) based on whether we are
> + * running under a hypervisor.
> + */
> + eax = 1;
> + ecx = 0;
> + native_cpuid(&eax, &ebx, &ecx, &edx);
> + feature_mask = (ecx & BIT(31)) ? AMD_SEV_FEATURE_BIT
> + : AMD_SME_FEATURE_BIT;
Set that feature mask before using it...
> +
> + /* Check for the SME/SEV support leaf */
... because if that check exits due to no SME leaf, you're uselessly
doing all the above.
> eax = 0x80000000;
> ecx = 0;
> native_cpuid(&eax, &ebx, &ecx, &edx);
> @@ -629,24 +642,39 @@ void __init __nostackprotector sme_enable(struct boot_params *bp)
> return;
>
> /*
> - * Check for the SME feature:
> + * Check for the SME/SEV feature:
> * CPUID Fn8000_001F[EAX] - Bit 0
> * Secure Memory Encryption support
> + * CPUID Fn8000_001F[EAX] - Bit 1
No need to repeat the CPUID leaf here - only Bit 1:
* CPUID Fn8000_001F[EAX]
* - Bit 0: Secure Memory Encryption support
* - Bit 1: Secure Encrypted Virtualization support
> + * Secure Encrypted Virtualization support
> * CPUID Fn8000_001F[EBX] - Bits 5:0
> * Pagetable bit position used to indicate encryption
> */
> eax = 0x8000001f;
> ecx = 0;
> native_cpuid(&eax, &ebx, &ecx, &edx);
> - if (!(eax & 1))
> + if (!(eax & feature_mask))
> return;
>
> me_mask = 1UL << (ebx & 0x3f);
>
> - /* Check if SME is enabled */
> - msr = __rdmsr(MSR_K8_SYSCFG);
> - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
> + /* Check if memory encryption is enabled */
> + if (feature_mask == AMD_SME_FEATURE_BIT) {
> + /* For SME, check the SYSCFG MSR */
> + msr = __rdmsr(MSR_K8_SYSCFG);
> + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
> + return;
> + } else {
> + /* For SEV, check the SEV MSR */
> + msr = __rdmsr(MSR_F17H_SEV);
> + if (!(msr & MSR_F17H_SEV_ENABLED))
> + return;
> +
> + /* SEV state cannot be controlled by a command line option */
> + sme_me_mask = me_mask;
> + sev_enabled = 1;
> return;
> + }
Nice. Two birds with one stone is always better. :)
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 79+ messages in thread
* Re: [RFC Part1 PATCH v3 14/17] x86/boot: Add early boot support when running with SEV active
2017-08-23 15:30 ` Borislav Petkov
@ 2017-08-24 18:54 ` Tom Lendacky
[not found] ` <42cf82fc-03be-c4c7-eaab-b2306a049d20-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 79+ messages in thread
From: Tom Lendacky @ 2017-08-24 18:54 UTC (permalink / raw)
To: Borislav Petkov, Brijesh Singh
Cc: linux-kernel, x86, linux-efi, linuxppc-dev, kvm, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Andy Lutomirski, Tony Luck,
Piotr Luc, Fenghua Yu, Lu Baolu, Reza Arbab, David Howells,
Matt Fleming, Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel,
Andrew Morton, Eric Biederman, Benjamin
On 8/23/2017 10:30 AM, Borislav Petkov wrote:
> On Mon, Jul 24, 2017 at 02:07:54PM -0500, Brijesh Singh wrote:
>> From: Tom Lendacky <thomas.lendacky@amd.com>
>>
>> Early in the boot process, add checks to determine if the kernel is
>> running with Secure Encrypted Virtualization (SEV) active.
>>
>> Checking for SEV requires checking that the kernel is running under a
>> hypervisor (CPUID 0x00000001, bit 31), that the SEV feature is available
>> (CPUID 0x8000001f, bit 1) and then check a non-interceptable SEV MSR
>> (0xc0010131, bit 0).
>>
>> This check is required so that during early compressed kernel booting the
>> pagetables (both the boot pagetables and KASLR pagetables (if enabled) are
>> updated to include the encryption mask so that when the kernel is
>> decompressed into encrypted memory.
>
> , it can boot properly.
>
> :)
>
Yup, kinda didn't complete that sentence.
>> After the kernel is decompressed and continues booting the same logic is
>> used to check if SEV is active and set a flag indicating so. This allows
>> us to distinguish between SME and SEV, each of which have unique
>> differences in how certain things are handled: e.g. DMA (always bounce
>> buffered with SEV) or EFI tables (always access decrypted with SME).
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>> ---
>> arch/x86/boot/compressed/Makefile | 2 +
>> arch/x86/boot/compressed/head_64.S | 16 +++++
>> arch/x86/boot/compressed/mem_encrypt.S | 103 +++++++++++++++++++++++++++++++++
>> arch/x86/boot/compressed/misc.h | 2 +
>> arch/x86/boot/compressed/pagetable.c | 8 ++-
>> arch/x86/include/asm/mem_encrypt.h | 3 +
>> arch/x86/include/asm/msr-index.h | 3 +
>> arch/x86/include/uapi/asm/kvm_para.h | 1 -
>> arch/x86/mm/mem_encrypt.c | 42 +++++++++++---
>> 9 files changed, 169 insertions(+), 11 deletions(-)
>> create mode 100644 arch/x86/boot/compressed/mem_encrypt.S
>>
>> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
>> index 2c860ad..d2fe901 100644
>> --- a/arch/x86/boot/compressed/Makefile
>> +++ b/arch/x86/boot/compressed/Makefile
>> @@ -72,6 +72,8 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
>> $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
>> $(obj)/piggy.o $(obj)/cpuflags.o
>>
>> +vmlinux-objs-$(CONFIG_X86_64) += $(obj)/mem_encrypt.o
>
> There's a
>
> ifdef CONFIG_X86_64
>
> a couple of lines below. Just put it there.
Will do.
>
> ...
>
>> +++ b/arch/x86/boot/compressed/mem_encrypt.S
>> @@ -0,0 +1,103 @@
>> +/*
>> + * AMD Memory Encryption Support
>> + *
>> + * Copyright (C) 2017 Advanced Micro Devices, Inc.
>> + *
>> + * Author: Tom Lendacky <thomas.lendacky@amd.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/linkage.h>
>> +
>> +#include <asm/processor-flags.h>
>> +#include <asm/msr.h>
>> +#include <asm/asm-offsets.h>
>> +
>> + .text
>> + .code32
>> +ENTRY(get_sev_encryption_bit)
>> + xor %eax, %eax
>> +
>> +#ifdef CONFIG_AMD_MEM_ENCRYPT
>> + push %ebx
>> + push %ecx
>> + push %edx
>> +
>> + /* Check if running under a hypervisor */
>> + movl $1, %eax
>> + cpuid
>> + bt $31, %ecx /* Check the hypervisor bit */
>> + jnc .Lno_sev
>> +
>> + movl $0x80000000, %eax /* CPUID to check the highest leaf */
>> + cpuid
>> + cmpl $0x8000001f, %eax /* See if 0x8000001f is available */
>> + jb .Lno_sev
>> +
>> + /*
>> + * Check for the SEV feature:
>> + * CPUID Fn8000_001F[EAX] - Bit 1
>> + * CPUID Fn8000_001F[EBX] - Bits 5:0
>> + * Pagetable bit position used to indicate encryption
>> + */
>> + movl $0x8000001f, %eax
>> + cpuid
>> + bt $1, %eax /* Check if SEV is available */
>> + jnc .Lno_sev
>> +
>> + movl $MSR_F17H_SEV, %ecx /* Read the SEV MSR */
>> + rdmsr
>> + bt $MSR_F17H_SEV_ENABLED_BIT, %eax /* Check if SEV is active */
>> + jnc .Lno_sev
>> +
>> + /*
>> + * Get memory encryption information:
>> + */
>
> The side-comment is enough. This one above can go.
Done.
>
>> + movl %ebx, %eax
>> + andl $0x3f, %eax /* Return the encryption bit location */
>> + jmp .Lsev_exit
>> +
>> +.Lno_sev:
>> + xor %eax, %eax
>> +
>> +.Lsev_exit:
>> + pop %edx
>> + pop %ecx
>> + pop %ebx
>> +
>> +#endif /* CONFIG_AMD_MEM_ENCRYPT */
>> +
>> + ret
>> +ENDPROC(get_sev_encryption_bit)
>> +
>> + .code64
>> +ENTRY(get_sev_encryption_mask)
>> + xor %rax, %rax
>> +
>> +#ifdef CONFIG_AMD_MEM_ENCRYPT
>> + push %rbp
>> + push %rdx
>> +
>> + movq %rsp, %rbp /* Save current stack pointer */
>> +
>> + call get_sev_encryption_bit /* Get the encryption bit position */
>
> So we get to call get_sev_encryption_bit() here again and noodle through
> the CPUID discovery and MSR access. We should instead cache that info
> and return the encryption bit directly on a second call. (And initialize
> it to -1 to denote that get_sev_encryption_bit() hasn't run yet).
Ok, I'll look into that optimization.
>
> ...
>
>> diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
>> index 9274ec7..9cb6472 100644
>> --- a/arch/x86/include/asm/mem_encrypt.h
>> +++ b/arch/x86/include/asm/mem_encrypt.h
>> @@ -19,6 +19,9 @@
>>
>> #include <asm/bootparam.h>
>>
>> +#define AMD_SME_FEATURE_BIT BIT(0)
>> +#define AMD_SEV_FEATURE_BIT BIT(1)
>
> s/_FEATURE//
>
> AMD_SME_BIT and AMD_SEV_BIT is good enough :)
>
> And frankly, if you're going to use them only below in sme_enable() - I
> need to check more thoroughly the remaining patches - but if you only
> are going to use them there, just define them inside the function so
> that they're close.
Sounds good. I believe that is the only place they are/will be used
so I'll make that change.
>
>> +
>> #ifdef CONFIG_AMD_MEM_ENCRYPT
>>
>> extern unsigned long sme_me_mask;
>> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>> index e399d68..530020f 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -326,6 +326,9 @@
>>
>> /* Fam 17h MSRs */
>> #define MSR_F17H_IRPERF 0xc00000e9
>> +#define MSR_F17H_SEV 0xc0010131
>
> If that MSR is going to be used later on - and I don't see why not - you
> probably should make it an arch one: MSR_AMD64_SEV. Even if it isn't yet
> officially. :-)
>
Will do.
>> +#define MSR_F17H_SEV_ENABLED_BIT 0
>> +#define MSR_F17H_SEV_ENABLED BIT_ULL(MSR_F17H_SEV_ENABLED_BIT)
>>
>> /* Fam 16h MSRs */
>> #define MSR_F16H_L2I_PERF_CTL 0xc0010230
>> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
>> index a965e5b..c202ba3 100644
>> --- a/arch/x86/include/uapi/asm/kvm_para.h
>> +++ b/arch/x86/include/uapi/asm/kvm_para.h
>> @@ -109,5 +109,4 @@ struct kvm_vcpu_pv_apf_data {
>> #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
>> #define KVM_PV_EOI_DISABLED 0x0
>>
>> -
>> #endif /* _UAPI_ASM_X86_KVM_PARA_H */
>> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
>> index 5e5d460..ed8780e 100644
>> --- a/arch/x86/mm/mem_encrypt.c
>> +++ b/arch/x86/mm/mem_encrypt.c
>> @@ -288,7 +288,9 @@ void __init mem_encrypt_init(void)
>> if (sev_active())
>> dma_ops = &sme_dma_ops;
>>
>> - pr_info("AMD Secure Memory Encryption (SME) active\n");
>> + pr_info("AMD %s active\n",
>> + sev_active() ? "Secure Encrypted Virtualization (SEV)"
>> + : "Secure Memory Encryption (SME)");
>> }
>>
>> void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
>> @@ -616,12 +618,23 @@ void __init __nostackprotector sme_enable(struct boot_params *bp)
>> {
>> const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
>> unsigned int eax, ebx, ecx, edx;
>> + unsigned long feature_mask;
>> bool active_by_default;
>> unsigned long me_mask;
>> char buffer[16];
>> u64 msr;
>>
>> - /* Check for the SME support leaf */
>> + /*
>> + * Set the feature mask (SME or SEV) based on whether we are
>> + * running under a hypervisor.
>> + */
>> + eax = 1;
>> + ecx = 0;
>> + native_cpuid(&eax, &ebx, &ecx, &edx);
>> + feature_mask = (ecx & BIT(31)) ? AMD_SEV_FEATURE_BIT
>> + : AMD_SME_FEATURE_BIT;
>
> Set that feature mask before using it...
>
>> +
>> + /* Check for the SME/SEV support leaf */
>
> ... because if that check exits due to no SME leaf, you're uselessly
> doing all the above.
Ok, I'll move that down after the leaf check.
>
>> eax = 0x80000000;
>> ecx = 0;
>> native_cpuid(&eax, &ebx, &ecx, &edx);
>> @@ -629,24 +642,39 @@ void __init __nostackprotector sme_enable(struct boot_params *bp)
>> return;
>>
>> /*
>> - * Check for the SME feature:
>> + * Check for the SME/SEV feature:
>> * CPUID Fn8000_001F[EAX] - Bit 0
>> * Secure Memory Encryption support
>> + * CPUID Fn8000_001F[EAX] - Bit 1
>
> No need to repeat the CPUID leaf here - only Bit 1:
>
> * CPUID Fn8000_001F[EAX]
> * - Bit 0: Secure Memory Encryption support
> * - Bit 1: Secure Encrypted Virtualization support
>
Ok, I'll clean that up.
Thanks,
Tom
>
>> + * Secure Encrypted Virtualization support
>> * CPUID Fn8000_001F[EBX] - Bits 5:0
>> * Pagetable bit position used to indicate encryption
>> */
>> eax = 0x8000001f;
>> ecx = 0;
>> native_cpuid(&eax, &ebx, &ecx, &edx);
>> - if (!(eax & 1))
>> + if (!(eax & feature_mask))
>> return;
>>
>> me_mask = 1UL << (ebx & 0x3f);
>>
>> - /* Check if SME is enabled */
>> - msr = __rdmsr(MSR_K8_SYSCFG);
>> - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
>> + /* Check if memory encryption is enabled */
>> + if (feature_mask == AMD_SME_FEATURE_BIT) {
>> + /* For SME, check the SYSCFG MSR */
>> + msr = __rdmsr(MSR_K8_SYSCFG);
>> + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
>> + return;
>> + } else {
>> + /* For SEV, check the SEV MSR */
>> + msr = __rdmsr(MSR_F17H_SEV);
>> + if (!(msr & MSR_F17H_SEV_ENABLED))
>> + return;
>> +
>> + /* SEV state cannot be controlled by a command line option */
>> + sme_me_mask = me_mask;
>> + sev_enabled = 1;
>> return;
>> + }
>
> Nice. Two birds with one stone is always better. :)
>
>
^ permalink raw reply [flat|nested] 79+ messages in thread
* [RFC Part1 PATCH v3 17/17] X86/KVM: Clear encryption attribute when SEV is active
[not found] ` <20170724190757.11278-1-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
` (6 preceding siblings ...)
2017-07-24 19:07 ` [RFC Part1 PATCH v3 14/17] x86/boot: Add early boot support when running with SEV active Brijesh Singh
@ 2017-07-24 19:07 ` Brijesh Singh
[not found] ` <20170724190757.11278-18-brijesh.singh-5C7GfCeVMHo@public.gmane.org>
7 siblings, 1 reply; 79+ messages in thread
From: Brijesh Singh @ 2017-07-24 19:07 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA, x86-DgEjT+Ai2ygdnm+yROfE0A,
linux-efi-u79uwXL29TY76Z2rM5mHXA,
linuxppc-dev-uLR06cmDAlY/bJ5BZ2RsiQ, kvm-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Borislav Petkov,
Andy Lutomirski, Tony Luck, Piotr Luc, Tom Lendacky, Fenghua Yu,
Lu Baolu, Reza Arbab, David Howells, Matt Fleming,
Kirill A . Shutemov, Laura Abbott, Ard Biesheuvel, Andrew Morton,
Eric Biederman, Benjamin Herrenschmidt, Paul Mackerras
The guest physical memory area holding the struct pvclock_wall_clock and
struct pvclock_vcpu_time_info are shared with the hypervisor. Hypervisor
periodically updates the contents of the memory. When SEV is active, we
must clear the encryption attributes from the shared memory pages so that
both hypervisor and guest can access the data.
Signed-off-by: Brijesh Singh <brijesh.singh-5C7GfCeVMHo@public.gmane.org>
---
arch/x86/entry/vdso/vma.c | 5 ++--
arch/x86/kernel/kvmclock.c | 64 +++++++++++++++++++++++++++++++++++++++-------
2 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 726355c..ff50251 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -114,10 +114,11 @@ static int vvar_fault(const struct vm_special_mapping *sm,
struct pvclock_vsyscall_time_info *pvti =
pvclock_pvti_cpu0_va();
if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
- ret = vm_insert_pfn(
+ ret = vm_insert_pfn_prot(
vma,
vmf->address,
- __pa(pvti) >> PAGE_SHIFT);
+ __pa(pvti) >> PAGE_SHIFT,
+ pgprot_decrypted(vma->vm_page_prot));
}
} else if (sym_offset == image->sym_hvclock_page) {
struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d889676..f3a8101 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -27,6 +27,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <asm/mem_encrypt.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
#include <asm/kvmclock.h>
@@ -45,7 +46,7 @@ early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
static struct pvclock_vsyscall_time_info *hv_clock;
-static struct pvclock_wall_clock wall_clock;
+static struct pvclock_wall_clock *wall_clock;
struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
{
@@ -64,15 +65,18 @@ static void kvm_get_wallclock(struct timespec *now)
int low, high;
int cpu;
- low = (int)__pa_symbol(&wall_clock);
- high = ((u64)__pa_symbol(&wall_clock) >> 32);
+ if (!wall_clock)
+ return;
+
+ low = (int)slow_virt_to_phys(wall_clock);
+ high = ((u64)slow_virt_to_phys(wall_clock) >> 32);
native_write_msr(msr_kvm_wall_clock, low, high);
cpu = get_cpu();
vcpu_time = &hv_clock[cpu].pvti;
- pvclock_read_wallclock(&wall_clock, vcpu_time, now);
+ pvclock_read_wallclock(wall_clock, vcpu_time, now);
put_cpu();
}
@@ -249,11 +253,39 @@ static void kvm_shutdown(void)
native_machine_shutdown();
}
+static phys_addr_t __init kvm_memblock_alloc(phys_addr_t size,
+ phys_addr_t align)
+{
+ phys_addr_t mem;
+
+ mem = memblock_alloc(size, align);
+ if (!mem)
+ return 0;
+
+ if (sev_active()) {
+ if (early_set_memory_decrypted(mem, size))
+ goto e_free;
+ }
+
+ return mem;
+e_free:
+ memblock_free(mem, size);
+ return 0;
+}
+
+static void __init kvm_memblock_free(phys_addr_t addr, phys_addr_t size)
+{
+ if (sev_active())
+ early_set_memory_encrypted(addr, size);
+
+ memblock_free(addr, size);
+}
+
void __init kvmclock_init(void)
{
struct pvclock_vcpu_time_info *vcpu_time;
- unsigned long mem;
- int size, cpu;
+ unsigned long mem, mem_wall_clock;
+ int size, cpu, wall_clock_size;
u8 flags;
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
@@ -270,15 +302,29 @@ void __init kvmclock_init(void)
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock);
- mem = memblock_alloc(size, PAGE_SIZE);
- if (!mem)
+ wall_clock_size = PAGE_ALIGN(sizeof(struct pvclock_wall_clock));
+ mem_wall_clock = kvm_memblock_alloc(wall_clock_size, PAGE_SIZE);
+ if (!mem_wall_clock)
return;
+
+ wall_clock = __va(mem_wall_clock);
+ memset(wall_clock, 0, wall_clock_size);
+
+ mem = kvm_memblock_alloc(size, PAGE_SIZE);
+ if (!mem) {
+ kvm_memblock_free(mem_wall_clock, wall_clock_size);
+ wall_clock = NULL;
+ return;
+ }
+
hv_clock = __va(mem);
memset(hv_clock, 0, size);
if (kvm_register_clock("primary cpu clock")) {
hv_clock = NULL;
- memblock_free(mem, size);
+ kvm_memblock_free(mem, size);
+ kvm_memblock_free(mem_wall_clock, wall_clock_size);
+ wall_clock = NULL;
return;
}
--
2.9.4
^ permalink raw reply related [flat|nested] 79+ messages in thread