All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brijesh Singh <brijesh.singh@amd.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: "Brijesh Singh" <brijesh.singh@amd.com>,
	"Tom Lendacky" <thomas.lendacky@amd.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Borislav Petkov" <bp@suse.de>, "H. Peter Anvin" <hpa@zytor.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Sean Christopherson" <sean.j.christopherson@intel.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>
Subject: [PATCH v6 5/5] x86/kvm: Avoid dynamic allocation of pvclock data when SEV is active
Date: Fri,  7 Sep 2018 12:57:30 -0500	[thread overview]
Message-ID: <1536343050-18532-6-git-send-email-brijesh.singh@amd.com> (raw)
In-Reply-To: <1536343050-18532-1-git-send-email-brijesh.singh@amd.com>

Currently, the per-cpu pvclock data is allocated dynamically when
cpu > HVC_BOOT_ARRAY_SIZE. The physical address of this variable is
shared between the guest and the hypervisor hence it must be mapped as
unencrypted (ie. C=0) when SEV is active.

The C-bit works on a page, hence we will be required to perform a
full 4k page allocation to store a single 32-byte pvclock variable. It
will waste fairly sizeable amount of memory since each CPU will be doing
a separate 4k allocation. Let's define a second array for the SEV case to
statically allocate for NR_CPUS and put this array in .data..decrypted
section so that its mapped with C=0 during boot. The .data..decrypted
section has a big chunk of memory that is currently unused. And since
second array will be used only when memory encryption is active hence
free it when encryption is not active.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Suggested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: kvm@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: linux-kernel@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: kvm@vger.kernel.org
Cc: "Radim Krčmář" <rkrcmar@redhat.com>
---
 arch/x86/include/asm/mem_encrypt.h |  4 ++++
 arch/x86/kernel/kvmclock.c         | 14 ++++++++++++++
 arch/x86/kernel/vmlinux.lds.S      |  3 +++
 arch/x86/mm/init.c                 |  3 +++
 arch/x86/mm/mem_encrypt.c          | 10 ++++++++++
 5 files changed, 34 insertions(+)

diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 802b2eb..cc46584 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,11 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
 
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void);
+void __init free_decrypted_mem(void);
 
 bool sme_active(void);
 bool sev_active(void);
 
 #define __decrypted __attribute__((__section__(".data..decrypted")))
+#define __decrypted_aux __attribute__((__section__(".data..decrypted.aux")))
 
 #else	/* !CONFIG_AMD_MEM_ENCRYPT */
 
@@ -80,6 +82,7 @@ static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
 
 #define __decrypted
+#define __decrypted_aux
 
 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
 
@@ -93,6 +96,7 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
 #define __sme_pa_nodebug(x)	(__pa_nodebug(x) | sme_me_mask)
 
 extern char __start_data_decrypted[], __end_data_decrypted[];
+extern char __start_data_decrypted_aux[];
 
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 376fd3a..6086b56 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -65,6 +65,15 @@ static struct pvclock_vsyscall_time_info
 static struct pvclock_wall_clock wall_clock __decrypted;
 static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/*
+ * The auxiliary array will be used when SEV is active. In non-SEV case,
+ * it will be freed by free_decrypted_mem().
+ */
+static struct pvclock_vsyscall_time_info
+			hv_clock_aux[NR_CPUS] __decrypted_aux;
+#endif
+
 static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
 {
 	return &this_cpu_read(hv_clock_per_cpu)->pvti;
@@ -269,6 +278,11 @@ static int kvmclock_setup_percpu(unsigned int cpu)
 	/* Use the static page for the first CPUs, allocate otherwise */
 	if (cpu < HVC_BOOT_ARRAY_SIZE)
 		p = &hv_clock_boot[cpu];
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+	/* Use the static page from auxiliary array instead of allocating it. */
+	else if (sev_active())
+		p = &hv_clock_aux[cpu - HVC_BOOT_ARRAY_SIZE];
+#endif
 	else
 		p = kzalloc(sizeof(*p), GFP_KERNEL);
 
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4cb1064..bde287a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -77,6 +77,9 @@ jiffies_64 = jiffies;
 	. = ALIGN(PMD_SIZE);					\
 	__start_data_decrypted = .;				\
 	*(.data..decrypted);					\
+	. = ALIGN(PAGE_SIZE);					\
+	__start_data_decrypted_aux = .;				\
+	*(.data..decrypted.aux);				\
 	. = ALIGN(PMD_SIZE);					\
 	__end_data_decrypted = .;				\
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26..052b279 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,9 +815,12 @@ void free_kernel_image_pages(void *begin, void *end)
 		set_memory_np_noalias(begin_ul, len_pages);
 }
 
+void __weak free_decrypted_mem(void) { }
+
 void __ref free_initmem(void)
 {
 	e820__reallocate_tables();
+	free_decrypted_mem();
 
 	free_kernel_image_pages(&__init_begin, &__init_end);
 }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398..9a08c52 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,16 @@ bool sev_active(void)
 EXPORT_SYMBOL(sev_active);
 
 /* Architecture __weak replacement functions */
+void __init free_decrypted_mem(void)
+{
+	if (mem_encrypt_active())
+		return;
+
+	free_init_pages("unused decrypted",
+			(unsigned long)__start_data_decrypted_aux,
+			(unsigned long)__end_data_decrypted);
+}
+
 void __init mem_encrypt_init(void)
 {
 	if (!sme_me_mask)
-- 
2.7.4


  parent reply	other threads:[~2018-09-07 17:57 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-07 17:57 [PATCH v6 0/5] x86: Fix SEV guest regression Brijesh Singh
2018-09-07 17:57 ` [PATCH v6 1/5] x86/mm: Restructure sme_encrypt_kernel() Brijesh Singh
2018-09-10 11:32   ` Borislav Petkov
2018-09-07 17:57 ` [PATCH v6 2/5] x86/mm: fix sme_populate_pgd() to update page flags Brijesh Singh
2018-09-10 11:36   ` Borislav Petkov
2018-09-10 12:28     ` Brijesh Singh
2018-09-10 12:32       ` Borislav Petkov
2018-09-07 17:57 ` [PATCH v6 3/5] x86/mm: add .data..decrypted section to hold shared variables Brijesh Singh
2018-09-10 11:54   ` Borislav Petkov
2018-09-10 12:33     ` Brijesh Singh
2018-09-07 17:57 ` [PATCH v6 4/5] x86/kvm: use __decrypted attribute in " Brijesh Singh
2018-09-10 12:04   ` Borislav Petkov
2018-09-10 13:15     ` Sean Christopherson
2018-09-10 13:29       ` Thomas Gleixner
2018-09-10 15:34       ` Borislav Petkov
2018-09-10 12:29   ` Paolo Bonzini
2018-09-10 12:33     ` Borislav Petkov
2018-09-10 12:46       ` Paolo Bonzini
2018-09-07 17:57 ` Brijesh Singh [this message]
2018-09-10 12:27   ` [PATCH v6 5/5] x86/kvm: Avoid dynamic allocation of pvclock data when SEV is active Borislav Petkov
2018-09-10 13:15     ` Brijesh Singh
2018-09-10 13:29       ` Sean Christopherson
2018-09-10 15:10         ` Brijesh Singh
2018-09-10 15:28           ` Sean Christopherson
2018-09-10 15:30             ` Brijesh Singh
2018-09-10 16:48               ` Borislav Petkov
2018-09-11  9:26                 ` Paolo Bonzini
2018-09-11 10:01                   ` Borislav Petkov
2018-09-11 10:19                     ` Paolo Bonzini
2018-09-11 10:25                       ` Borislav Petkov
2018-09-11 11:07                         ` Paolo Bonzini
2018-09-11 13:55                           ` Borislav Petkov
2018-09-11 14:00                             ` Paolo Bonzini
2018-09-10 15:53       ` Borislav Petkov
2018-09-10 16:13         ` Sean Christopherson
2018-09-10 16:14         ` Brijesh Singh
2018-09-10 12:28   ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1536343050-18532-6-git-send-email-brijesh.singh@amd.com \
    --to=brijesh.singh@amd.com \
    --cc=bp@suse.de \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=sean.j.christopherson@intel.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.