All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size
@ 2017-01-26 16:59 ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fenghua Yu, He Chen, Brian Gerst,
	Luis R . Rodriguez, Adam Buchbinder, Stanislaw Gruszka,
	Arnd Bergmann, Dave Hansen, Chen Yucong, Vitaly Kuznetsov,
	David Vrabel, Josh Poimboeuf, Tim Chen, Rik van Riel, Andi Kleen,
	Jiri Olsa, Prarit Bhargava, Michael Ellerman, Joerg Roedel,
	Paolo Bonzini, Radim Krčmář
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

This patch aligns MODULES_END to the beginning of the Fixmap section.
It optimizes the space available for both sections. The address is
pre-computed based on the number of pages required by the Fixmap
section.

It will allow GDT remapping in the Fixmap section. The current
MODULES_END static address does not provide enough space for the kernel
to support a large number of processors.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/fixmap.h           | 8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 3 ---
 arch/x86/kernel/module.c                | 1 +
 arch/x86/mm/dump_pagetables.c           | 1 +
 arch/x86/mm/kasan_init_64.c             | 1 +
 5 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8554f960e21b..c46289799b02 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -132,6 +132,14 @@ enum fixed_addresses {
 
 extern void reserve_top_address(unsigned long reserve);
 
+/* On 64bit, the module sections ends with the start of the fixmap */
+#ifdef CONFIG_X86_64
+#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+#endif /* CONFIG_X86_64 */
+
+
 #define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..de8bace10200 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -66,9 +66,6 @@ typedef struct { pteval_t pte; } pte_t;
 #define VMEMMAP_START	__VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 #define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END      _AC(0xffffffffff000000, UL)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 #define ESPFIX_PGD_ENTRY _AC(-2, UL)
 #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
 #define EFI_VA_START	 ( -4 * (_AC(1, UL) << 30))
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 477ae806c2fa..fad61caac75e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -35,6 +35,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
+#include <asm/fixmap.h>
 
 #if 0
 #define DEBUGP(fmt, ...)				\
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49adaa1f..eefd6d015d02 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/pgtable.h>
+#include <asm/fixmap.h>
 
 /*
  * The dumper groups pagetable entries of the same type into one, and for
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0493c17b8a51..34f167cf3316 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -8,6 +8,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/fixmap.h>
 
 extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_X_MAX];
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size
@ 2017-01-26 16:59 ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

This patch aligns MODULES_END to the beginning of the Fixmap section.
It optimizes the space available for both sections. The address is
pre-computed based on the number of pages required by the Fixmap
section.

It will allow GDT remapping in the Fixmap section. The current
MODULES_END static address does not provide enough space for the kernel
to support a large number of processors.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/fixmap.h           | 8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 3 ---
 arch/x86/kernel/module.c                | 1 +
 arch/x86/mm/dump_pagetables.c           | 1 +
 arch/x86/mm/kasan_init_64.c             | 1 +
 5 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8554f960e21b..c46289799b02 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -132,6 +132,14 @@ enum fixed_addresses {
 
 extern void reserve_top_address(unsigned long reserve);
 
+/* On 64bit, the module sections ends with the start of the fixmap */
+#ifdef CONFIG_X86_64
+#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+#endif /* CONFIG_X86_64 */
+
+
 #define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..de8bace10200 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -66,9 +66,6 @@ typedef struct { pteval_t pte; } pte_t;
 #define VMEMMAP_START	__VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 #define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END      _AC(0xffffffffff000000, UL)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 #define ESPFIX_PGD_ENTRY _AC(-2, UL)
 #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
 #define EFI_VA_START	 ( -4 * (_AC(1, UL) << 30))
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 477ae806c2fa..fad61caac75e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -35,6 +35,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
+#include <asm/fixmap.h>
 
 #if 0
 #define DEBUGP(fmt, ...)				\
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49adaa1f..eefd6d015d02 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/pgtable.h>
+#include <asm/fixmap.h>
 
 /*
  * The dumper groups pagetable entries of the same type into one, and for
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0493c17b8a51..34f167cf3316 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -8,6 +8,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/fixmap.h>
 
 extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_X_MAX];
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size
@ 2017-01-26 16:59 ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

This patch aligns MODULES_END to the beginning of the Fixmap section.
It optimizes the space available for both sections. The address is
pre-computed based on the number of pages required by the Fixmap
section.

It will allow GDT remapping in the Fixmap section. The current
MODULES_END static address does not provide enough space for the kernel
to support a large number of processors.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/fixmap.h           | 8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 3 ---
 arch/x86/kernel/module.c                | 1 +
 arch/x86/mm/dump_pagetables.c           | 1 +
 arch/x86/mm/kasan_init_64.c             | 1 +
 5 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8554f960e21b..c46289799b02 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -132,6 +132,14 @@ enum fixed_addresses {
 
 extern void reserve_top_address(unsigned long reserve);
 
+/* On 64bit, the module sections ends with the start of the fixmap */
+#ifdef CONFIG_X86_64
+#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+#endif /* CONFIG_X86_64 */
+
+
 #define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..de8bace10200 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -66,9 +66,6 @@ typedef struct { pteval_t pte; } pte_t;
 #define VMEMMAP_START	__VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 #define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END      _AC(0xffffffffff000000, UL)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 #define ESPFIX_PGD_ENTRY _AC(-2, UL)
 #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
 #define EFI_VA_START	 ( -4 * (_AC(1, UL) << 30))
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 477ae806c2fa..fad61caac75e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -35,6 +35,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
+#include <asm/fixmap.h>
 
 #if 0
 #define DEBUGP(fmt, ...)				\
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49adaa1f..eefd6d015d02 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/pgtable.h>
+#include <asm/fixmap.h>
 
 /*
  * The dumper groups pagetable entries of the same type into one, and for
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0493c17b8a51..34f167cf3316 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -8,6 +8,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/fixmap.h>
 
 extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_X_MAX];
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [kernel-hardening] [PATCH v2 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size
@ 2017-01-26 16:59 ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fenghua Yu, He Chen, Brian Gerst,
	Luis R . Rodriguez, Adam Buchbinder, Stanislaw Gruszka,
	Arnd Bergmann, Dave Hansen, Chen Yucong, Vitaly Kuznetsov,
	David Vrabel, Josh Poimboeuf, Tim Chen, Rik van Riel, Andi Kleen,
	Jiri Olsa, Prarit Bhargava, Michael Ellerman, Joerg Roedel,
	Paolo Bonzini, Radim Krčmář
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

This patch aligns MODULES_END to the beginning of the Fixmap section.
It optimizes the space available for both sections. The address is
pre-computed based on the number of pages required by the Fixmap
section.

It will allow GDT remapping in the Fixmap section. The current
MODULES_END static address does not provide enough space for the kernel
to support a large number of processors.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/fixmap.h           | 8 ++++++++
 arch/x86/include/asm/pgtable_64_types.h | 3 ---
 arch/x86/kernel/module.c                | 1 +
 arch/x86/mm/dump_pagetables.c           | 1 +
 arch/x86/mm/kasan_init_64.c             | 1 +
 5 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8554f960e21b..c46289799b02 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -132,6 +132,14 @@ enum fixed_addresses {
 
 extern void reserve_top_address(unsigned long reserve);
 
+/* On 64bit, the module sections ends with the start of the fixmap */
+#ifdef CONFIG_X86_64
+#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+#endif /* CONFIG_X86_64 */
+
+
 #define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..de8bace10200 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -66,9 +66,6 @@ typedef struct { pteval_t pte; } pte_t;
 #define VMEMMAP_START	__VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 #define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END      _AC(0xffffffffff000000, UL)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 #define ESPFIX_PGD_ENTRY _AC(-2, UL)
 #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
 #define EFI_VA_START	 ( -4 * (_AC(1, UL) << 30))
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 477ae806c2fa..fad61caac75e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -35,6 +35,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
+#include <asm/fixmap.h>
 
 #if 0
 #define DEBUGP(fmt, ...)				\
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49adaa1f..eefd6d015d02 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/pgtable.h>
+#include <asm/fixmap.h>
 
 /*
  * The dumper groups pagetable entries of the same type into one, and for
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0493c17b8a51..34f167cf3316 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -8,6 +8,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/fixmap.h>
 
 extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern struct range pfn_mapped[E820_X_MAX];
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
  2017-01-26 16:59 ` Thomas Garnier
  (?)
  (?)
@ 2017-01-26 16:59   ` Thomas Garnier
  -1 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fenghua Yu, He Chen, Brian Gerst,
	Luis R . Rodriguez, Adam Buchbinder, Stanislaw Gruszka,
	Arnd Bergmann, Dave Hansen, Chen Yucong, Vitaly Kuznetsov,
	David Vrabel, Josh Poimboeuf, Tim Chen, Rik van Riel, Andi Kleen,
	Jiri Olsa, Prarit Bhargava, Michael Ellerman, Joerg Roedel,
	Paolo Bonzini, Radim Krčmář
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

Each processor holds a GDT in its per-cpu structure. The sgdt
instruction gives the base address of the current GDT. This address can
be used to bypass KASLR memory randomization. With another bug, an
attacker could target other per-cpu structures or deduce the base of
the main memory section (PAGE_OFFSET).

This patch relocates the GDT table for each processor inside the
Fixmap section. The space is reserved based on number of supported
processors.

For consistency, the remapping is done by default on 32 and 64 bit.

Each processor switches to its remapped GDT at the end of
initialization. For hibernation, the main processor returns with the
original GDT and switches back to the remapping at completion.

This patch was tested on both architectures. Hibernation and KVM were
both tested specially for their usage of the GDT.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/entry/vdso/vma.c             |  2 +-
 arch/x86/include/asm/desc.h           | 23 +++++++++++++++++++----
 arch/x86/include/asm/fixmap.h         |  4 ++++
 arch/x86/include/asm/processor.h      |  1 +
 arch/x86/include/asm/stackprotector.h |  2 +-
 arch/x86/kernel/acpi/sleep.c          |  2 +-
 arch/x86/kernel/apm_32.c              |  6 +++---
 arch/x86/kernel/cpu/common.c          | 26 ++++++++++++++++++++++++--
 arch/x86/kernel/setup_percpu.c        |  2 +-
 arch/x86/kernel/smpboot.c             |  2 +-
 arch/x86/platform/efi/efi_32.c        |  4 ++--
 arch/x86/power/cpu.c                  |  7 +++++--
 arch/x86/xen/enlighten.c              |  2 +-
 arch/x86/xen/smp.c                    |  2 +-
 drivers/lguest/x86/core.c             |  6 +++---
 drivers/pnp/pnpbios/bioscalls.c       | 10 +++++-----
 16 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10820f6cefbf..acae4ef04cdf 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -353,7 +353,7 @@ static void vgetcpu_cpu_init(void *arg)
 	d.p = 1;		/* Present */
 	d.d = 1;		/* 32-bit */
 
-	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+	write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
 }
 
 static int vgetcpu_online(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 12080d87da3b..4cc176f57b78 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <asm/fixmap.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -45,11 +46,25 @@ struct gdt_page {
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
 
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 {
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
+{
+	return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_fixmap_gdt(int cpu)
+{
+	unsigned int idx = get_cpu_fixmap_gdt_index(cpu);
+	return (struct desc_struct *)__fix_to_virt(idx);
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +189,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
 
 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 {
-	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	struct desc_struct *d = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 
 	/*
@@ -202,7 +217,7 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 
 		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
 				      entries * LDT_ENTRY_SIZE - 1);
-		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+		write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_LDT,
 				&ldt, DESC_LDT);
 		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 	}
@@ -244,7 +259,7 @@ static inline unsigned long native_store_tr(void)
 
 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt = get_cpu_direct_gdt(cpu);
 	unsigned int i;
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index c46289799b02..8b913b5e9383 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
+	/* Fixmap entries to remap the GDTs, one per processor. */
+	FIX_GDT_REMAP_BEGIN,
+	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
 	__end_of_permanent_fixed_addresses,
 
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1be64da0384e..22801fd345dc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..422426b8aba7 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
 	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt_table = get_cpu_direct_gdt(cpu);
 	struct desc_struct desc;
 
 	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 48587335ede8..d13e6d156c22 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
 #ifdef CONFIG_SMP
 	initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
-			(unsigned long)get_cpu_gdt_table(smp_processor_id());
+			(unsigned long)get_cpu_direct_gdt(smp_processor_id());
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 45d44c173cf9..458dbe18a860 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -608,7 +608,7 @@ static long __apm_bios_call(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -684,7 +684,7 @@ static long __apm_bios_call_simple(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -2351,7 +2351,7 @@ static int __init apm_init(void)
 	 * Note we only set APM segments on CPU zero, since we pin the APM
 	 * code to that CPU.
 	 */
-	gdt = get_cpu_gdt_table(0);
+	gdt = get_cpu_direct_gdt(0);
 	set_desc_base(&gdt[APM_CS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
 	set_desc_base(&gdt[APM_CS_16 >> 3],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5ba74f280448..15f06cf3e3d4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,6 +444,23 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+}
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_fixmap_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one.
@@ -452,11 +469,10 @@ void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 	/* Reload the per-cpu base */
-
 	load_percpu_segment(cpu);
 }
 
@@ -1511,6 +1527,9 @@ void cpu_init(void)
 
 	if (is_uv_system())
 		uv_cpu_init();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 
 #else
@@ -1566,6 +1585,9 @@ void cpu_init(void)
 	dbg_restore_debug_regs();
 
 	fpu__init_cpu();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 #endif
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9820d6d977c6..e413c7607316 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
 	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
 			0x2 | DESCTYPE_S, 0x8);
 	gdt.s = 1;
-	write_gdt_entry(get_cpu_gdt_table(cpu),
+	write_gdt_entry(get_cpu_direct_gdt(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 #endif
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..0b1096685cda 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -974,7 +974,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	unsigned long timeout;
 
 	idle->thread.sp = (unsigned long)task_pt_regs(idle);
-	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+	early_gdt_descr.address = (unsigned long)get_cpu_direct_gdt(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_stack  = idle->thread.sp;
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index cef39b097649..72d41dddbaf5 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
-	gdt_descr.address = __pa(get_cpu_gdt_table(0));
+	gdt_descr.address = __pa(get_cpu_direct_gdt(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
@@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+	gdt_descr.address = (unsigned long)get_cpu_direct_gdt(0);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 66ade16c7693..644f80341f12 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
 	 * 'pmode_gdt' in wakeup_start.
 	 */
 	ctxt->gdt_desc.size = GDT_SIZE - 1;
-	ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+	ctxt->gdt_desc.address = (unsigned long)get_cpu_direct_gdt(smp_processor_id());
 
 	store_tr(ctxt->tr);
 
@@ -162,7 +162,7 @@ static void fix_processor_context(void)
 	int cpu = smp_processor_id();
 	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
-	struct desc_struct *desc = get_cpu_gdt_table(cpu);
+	struct desc_struct *desc = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 #endif
 	set_tss_desc(cpu, t);	/*
@@ -183,6 +183,9 @@ static void fix_processor_context(void)
 	load_mm_ldt(current->active_mm);	/* This does lldt */
 
 	fpu__resume_cpu();
+
+	/* The processor is back on the direct GDT, load back the fixmap */
+	load_fixmap_gdt(cpu);
 }
 
 /**
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 51ef95232725..b1a9baad2f0d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,7 +695,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
 
 	*shadow = t->tls_array[i];
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
 	mc = __xen_mc_entry(0);
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 311acad7dad2..3de63fb4d833 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -401,7 +401,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	if (ctxt == NULL)
 		return -ENOMEM;
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 
 #ifdef CONFIG_X86_32
 	/* Note: PVH is not yet supported on x86_32. */
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index d71f6323ac00..aa9d420913c5 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
 		 * byte, not the size, hence the "-1").
 		 */
 		state->host_gdt_desc.size = GDT_SIZE-1;
-		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+		state->host_gdt_desc.address = (long)get_cpu_direct_gdt(i);
 
 		/*
 		 * All CPUs on the Host use the same Interrupt Descriptor
@@ -554,8 +554,8 @@ void __init lguest_arch_host_init(void)
 		 * The Host needs to be able to use the LGUEST segments on this
 		 * CPU, too, so put them in the Host GDT.
 		 */
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
 	}
 
 	/*
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 438d4c72c7b3..ce7288c5864c 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -54,7 +54,7 @@ __asm__(".text			\n"
 
 #define Q2_SET_SEL(cpu, selname, address, size) \
 do { \
-	struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+	struct desc_struct *gdt = get_cpu_direct_gdt((cpu)); \
 	set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
 	set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
 } while(0)
@@ -95,8 +95,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 		return PNP_FUNCTION_NOT_SUPPORTED;
 
 	cpu = get_cpu();
-	save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-	get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+	save_desc_40 = get_cpu_direct_gdt(cpu)[0x40 / 8];
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = bad_bios_desc;
 
 	/* On some boxes IRQ's during PnP BIOS calls are deadly.  */
 	spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -134,7 +134,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 			     :"memory");
 	spin_unlock_irqrestore(&pnp_bios_lock, flags);
 
-	get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = save_desc_40;
 	put_cpu();
 
 	/* If we get here and this is set then the PnP BIOS faulted on us. */
@@ -477,7 +477,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 	pnp_bios_callpoint.segment = PNP_CS16;
 
 	for_each_possible_cpu(i) {
-		struct desc_struct *gdt = get_cpu_gdt_table(i);
+		struct desc_struct *gdt = get_cpu_direct_gdt(i);
 		if (!gdt)
 			continue;
 		set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
@ 2017-01-26 16:59   ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

Each processor holds a GDT in its per-cpu structure. The sgdt
instruction gives the base address of the current GDT. This address can
be used to bypass KASLR memory randomization. With another bug, an
attacker could target other per-cpu structures or deduce the base of
the main memory section (PAGE_OFFSET).

This patch relocates the GDT table for each processor inside the
Fixmap section. The space is reserved based on number of supported
processors.

For consistency, the remapping is done by default on 32 and 64 bit.

Each processor switches to its remapped GDT at the end of
initialization. For hibernation, the main processor returns with the
original GDT and switches back to the remapping at completion.

This patch was tested on both architectures. Hibernation and KVM were
both tested specially for their usage of the GDT.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/entry/vdso/vma.c             |  2 +-
 arch/x86/include/asm/desc.h           | 23 +++++++++++++++++++----
 arch/x86/include/asm/fixmap.h         |  4 ++++
 arch/x86/include/asm/processor.h      |  1 +
 arch/x86/include/asm/stackprotector.h |  2 +-
 arch/x86/kernel/acpi/sleep.c          |  2 +-
 arch/x86/kernel/apm_32.c              |  6 +++---
 arch/x86/kernel/cpu/common.c          | 26 ++++++++++++++++++++++++--
 arch/x86/kernel/setup_percpu.c        |  2 +-
 arch/x86/kernel/smpboot.c             |  2 +-
 arch/x86/platform/efi/efi_32.c        |  4 ++--
 arch/x86/power/cpu.c                  |  7 +++++--
 arch/x86/xen/enlighten.c              |  2 +-
 arch/x86/xen/smp.c                    |  2 +-
 drivers/lguest/x86/core.c             |  6 +++---
 drivers/pnp/pnpbios/bioscalls.c       | 10 +++++-----
 16 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10820f6cefbf..acae4ef04cdf 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -353,7 +353,7 @@ static void vgetcpu_cpu_init(void *arg)
 	d.p = 1;		/* Present */
 	d.d = 1;		/* 32-bit */
 
-	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+	write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
 }
 
 static int vgetcpu_online(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 12080d87da3b..4cc176f57b78 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <asm/fixmap.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -45,11 +46,25 @@ struct gdt_page {
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
 
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 {
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
+{
+	return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_fixmap_gdt(int cpu)
+{
+	unsigned int idx = get_cpu_fixmap_gdt_index(cpu);
+	return (struct desc_struct *)__fix_to_virt(idx);
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +189,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
 
 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 {
-	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	struct desc_struct *d = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 
 	/*
@@ -202,7 +217,7 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 
 		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
 				      entries * LDT_ENTRY_SIZE - 1);
-		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+		write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_LDT,
 				&ldt, DESC_LDT);
 		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 	}
@@ -244,7 +259,7 @@ static inline unsigned long native_store_tr(void)
 
 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt = get_cpu_direct_gdt(cpu);
 	unsigned int i;
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index c46289799b02..8b913b5e9383 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
+	/* Fixmap entries to remap the GDTs, one per processor. */
+	FIX_GDT_REMAP_BEGIN,
+	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
 	__end_of_permanent_fixed_addresses,
 
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1be64da0384e..22801fd345dc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..422426b8aba7 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
 	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt_table = get_cpu_direct_gdt(cpu);
 	struct desc_struct desc;
 
 	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 48587335ede8..d13e6d156c22 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
 #ifdef CONFIG_SMP
 	initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
-			(unsigned long)get_cpu_gdt_table(smp_processor_id());
+			(unsigned long)get_cpu_direct_gdt(smp_processor_id());
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 45d44c173cf9..458dbe18a860 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -608,7 +608,7 @@ static long __apm_bios_call(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -684,7 +684,7 @@ static long __apm_bios_call_simple(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -2351,7 +2351,7 @@ static int __init apm_init(void)
 	 * Note we only set APM segments on CPU zero, since we pin the APM
 	 * code to that CPU.
 	 */
-	gdt = get_cpu_gdt_table(0);
+	gdt = get_cpu_direct_gdt(0);
 	set_desc_base(&gdt[APM_CS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
 	set_desc_base(&gdt[APM_CS_16 >> 3],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5ba74f280448..15f06cf3e3d4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,6 +444,23 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+}
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_fixmap_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one.
@@ -452,11 +469,10 @@ void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 	/* Reload the per-cpu base */
-
 	load_percpu_segment(cpu);
 }
 
@@ -1511,6 +1527,9 @@ void cpu_init(void)
 
 	if (is_uv_system())
 		uv_cpu_init();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 
 #else
@@ -1566,6 +1585,9 @@ void cpu_init(void)
 	dbg_restore_debug_regs();
 
 	fpu__init_cpu();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 #endif
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9820d6d977c6..e413c7607316 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
 	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
 			0x2 | DESCTYPE_S, 0x8);
 	gdt.s = 1;
-	write_gdt_entry(get_cpu_gdt_table(cpu),
+	write_gdt_entry(get_cpu_direct_gdt(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 #endif
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..0b1096685cda 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -974,7 +974,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	unsigned long timeout;
 
 	idle->thread.sp = (unsigned long)task_pt_regs(idle);
-	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+	early_gdt_descr.address = (unsigned long)get_cpu_direct_gdt(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_stack  = idle->thread.sp;
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index cef39b097649..72d41dddbaf5 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
-	gdt_descr.address = __pa(get_cpu_gdt_table(0));
+	gdt_descr.address = __pa(get_cpu_direct_gdt(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
@@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+	gdt_descr.address = (unsigned long)get_cpu_direct_gdt(0);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 66ade16c7693..644f80341f12 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
 	 * 'pmode_gdt' in wakeup_start.
 	 */
 	ctxt->gdt_desc.size = GDT_SIZE - 1;
-	ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+	ctxt->gdt_desc.address = (unsigned long)get_cpu_direct_gdt(smp_processor_id());
 
 	store_tr(ctxt->tr);
 
@@ -162,7 +162,7 @@ static void fix_processor_context(void)
 	int cpu = smp_processor_id();
 	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
-	struct desc_struct *desc = get_cpu_gdt_table(cpu);
+	struct desc_struct *desc = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 #endif
 	set_tss_desc(cpu, t);	/*
@@ -183,6 +183,9 @@ static void fix_processor_context(void)
 	load_mm_ldt(current->active_mm);	/* This does lldt */
 
 	fpu__resume_cpu();
+
+	/* The processor is back on the direct GDT, load back the fixmap */
+	load_fixmap_gdt(cpu);
 }
 
 /**
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 51ef95232725..b1a9baad2f0d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,7 +695,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
 
 	*shadow = t->tls_array[i];
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
 	mc = __xen_mc_entry(0);
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 311acad7dad2..3de63fb4d833 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -401,7 +401,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	if (ctxt == NULL)
 		return -ENOMEM;
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 
 #ifdef CONFIG_X86_32
 	/* Note: PVH is not yet supported on x86_32. */
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index d71f6323ac00..aa9d420913c5 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
 		 * byte, not the size, hence the "-1").
 		 */
 		state->host_gdt_desc.size = GDT_SIZE-1;
-		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+		state->host_gdt_desc.address = (long)get_cpu_direct_gdt(i);
 
 		/*
 		 * All CPUs on the Host use the same Interrupt Descriptor
@@ -554,8 +554,8 @@ void __init lguest_arch_host_init(void)
 		 * The Host needs to be able to use the LGUEST segments on this
 		 * CPU, too, so put them in the Host GDT.
 		 */
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
 	}
 
 	/*
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 438d4c72c7b3..ce7288c5864c 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -54,7 +54,7 @@ __asm__(".text			\n"
 
 #define Q2_SET_SEL(cpu, selname, address, size) \
 do { \
-	struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+	struct desc_struct *gdt = get_cpu_direct_gdt((cpu)); \
 	set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
 	set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
 } while(0)
@@ -95,8 +95,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 		return PNP_FUNCTION_NOT_SUPPORTED;
 
 	cpu = get_cpu();
-	save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-	get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+	save_desc_40 = get_cpu_direct_gdt(cpu)[0x40 / 8];
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = bad_bios_desc;
 
 	/* On some boxes IRQ's during PnP BIOS calls are deadly.  */
 	spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -134,7 +134,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 			     :"memory");
 	spin_unlock_irqrestore(&pnp_bios_lock, flags);
 
-	get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = save_desc_40;
 	put_cpu();
 
 	/* If we get here and this is set then the PnP BIOS faulted on us. */
@@ -477,7 +477,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 	pnp_bios_callpoint.segment = PNP_CS16;
 
 	for_each_possible_cpu(i) {
-		struct desc_struct *gdt = get_cpu_gdt_table(i);
+		struct desc_struct *gdt = get_cpu_direct_gdt(i);
 		if (!gdt)
 			continue;
 		set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
@ 2017-01-26 16:59   ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

Each processor holds a GDT in its per-cpu structure. The sgdt
instruction gives the base address of the current GDT. This address can
be used to bypass KASLR memory randomization. With another bug, an
attacker could target other per-cpu structures or deduce the base of
the main memory section (PAGE_OFFSET).

This patch relocates the GDT table for each processor inside the
Fixmap section. The space is reserved based on number of supported
processors.

For consistency, the remapping is done by default on 32 and 64 bit.

Each processor switches to its remapped GDT at the end of
initialization. For hibernation, the main processor returns with the
original GDT and switches back to the remapping at completion.

This patch was tested on both architectures. Hibernation and KVM were
both tested specially for their usage of the GDT.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/entry/vdso/vma.c             |  2 +-
 arch/x86/include/asm/desc.h           | 23 +++++++++++++++++++----
 arch/x86/include/asm/fixmap.h         |  4 ++++
 arch/x86/include/asm/processor.h      |  1 +
 arch/x86/include/asm/stackprotector.h |  2 +-
 arch/x86/kernel/acpi/sleep.c          |  2 +-
 arch/x86/kernel/apm_32.c              |  6 +++---
 arch/x86/kernel/cpu/common.c          | 26 ++++++++++++++++++++++++--
 arch/x86/kernel/setup_percpu.c        |  2 +-
 arch/x86/kernel/smpboot.c             |  2 +-
 arch/x86/platform/efi/efi_32.c        |  4 ++--
 arch/x86/power/cpu.c                  |  7 +++++--
 arch/x86/xen/enlighten.c              |  2 +-
 arch/x86/xen/smp.c                    |  2 +-
 drivers/lguest/x86/core.c             |  6 +++---
 drivers/pnp/pnpbios/bioscalls.c       | 10 +++++-----
 16 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10820f6cefbf..acae4ef04cdf 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -353,7 +353,7 @@ static void vgetcpu_cpu_init(void *arg)
 	d.p = 1;		/* Present */
 	d.d = 1;		/* 32-bit */
 
-	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+	write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
 }
 
 static int vgetcpu_online(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 12080d87da3b..4cc176f57b78 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <asm/fixmap.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -45,11 +46,25 @@ struct gdt_page {
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
 
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 {
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
+{
+	return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_fixmap_gdt(int cpu)
+{
+	unsigned int idx = get_cpu_fixmap_gdt_index(cpu);
+	return (struct desc_struct *)__fix_to_virt(idx);
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +189,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
 
 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 {
-	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	struct desc_struct *d = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 
 	/*
@@ -202,7 +217,7 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 
 		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
 				      entries * LDT_ENTRY_SIZE - 1);
-		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+		write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_LDT,
 				&ldt, DESC_LDT);
 		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 	}
@@ -244,7 +259,7 @@ static inline unsigned long native_store_tr(void)
 
 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt = get_cpu_direct_gdt(cpu);
 	unsigned int i;
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index c46289799b02..8b913b5e9383 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
+	/* Fixmap entries to remap the GDTs, one per processor. */
+	FIX_GDT_REMAP_BEGIN,
+	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
 	__end_of_permanent_fixed_addresses,
 
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1be64da0384e..22801fd345dc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..422426b8aba7 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
 	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt_table = get_cpu_direct_gdt(cpu);
 	struct desc_struct desc;
 
 	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 48587335ede8..d13e6d156c22 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
 #ifdef CONFIG_SMP
 	initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
-			(unsigned long)get_cpu_gdt_table(smp_processor_id());
+			(unsigned long)get_cpu_direct_gdt(smp_processor_id());
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 45d44c173cf9..458dbe18a860 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -608,7 +608,7 @@ static long __apm_bios_call(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -684,7 +684,7 @@ static long __apm_bios_call_simple(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -2351,7 +2351,7 @@ static int __init apm_init(void)
 	 * Note we only set APM segments on CPU zero, since we pin the APM
 	 * code to that CPU.
 	 */
-	gdt = get_cpu_gdt_table(0);
+	gdt = get_cpu_direct_gdt(0);
 	set_desc_base(&gdt[APM_CS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
 	set_desc_base(&gdt[APM_CS_16 >> 3],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5ba74f280448..15f06cf3e3d4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,6 +444,23 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+}
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_fixmap_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one.
@@ -452,11 +469,10 @@ void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 	/* Reload the per-cpu base */
-
 	load_percpu_segment(cpu);
 }
 
@@ -1511,6 +1527,9 @@ void cpu_init(void)
 
 	if (is_uv_system())
 		uv_cpu_init();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 
 #else
@@ -1566,6 +1585,9 @@ void cpu_init(void)
 	dbg_restore_debug_regs();
 
 	fpu__init_cpu();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 #endif
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9820d6d977c6..e413c7607316 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
 	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
 			0x2 | DESCTYPE_S, 0x8);
 	gdt.s = 1;
-	write_gdt_entry(get_cpu_gdt_table(cpu),
+	write_gdt_entry(get_cpu_direct_gdt(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 #endif
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..0b1096685cda 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -974,7 +974,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	unsigned long timeout;
 
 	idle->thread.sp = (unsigned long)task_pt_regs(idle);
-	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+	early_gdt_descr.address = (unsigned long)get_cpu_direct_gdt(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_stack  = idle->thread.sp;
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index cef39b097649..72d41dddbaf5 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
-	gdt_descr.address = __pa(get_cpu_gdt_table(0));
+	gdt_descr.address = __pa(get_cpu_direct_gdt(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
@@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+	gdt_descr.address = (unsigned long)get_cpu_direct_gdt(0);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 66ade16c7693..644f80341f12 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
 	 * 'pmode_gdt' in wakeup_start.
 	 */
 	ctxt->gdt_desc.size = GDT_SIZE - 1;
-	ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+	ctxt->gdt_desc.address = (unsigned long)get_cpu_direct_gdt(smp_processor_id());
 
 	store_tr(ctxt->tr);
 
@@ -162,7 +162,7 @@ static void fix_processor_context(void)
 	int cpu = smp_processor_id();
 	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
-	struct desc_struct *desc = get_cpu_gdt_table(cpu);
+	struct desc_struct *desc = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 #endif
 	set_tss_desc(cpu, t);	/*
@@ -183,6 +183,9 @@ static void fix_processor_context(void)
 	load_mm_ldt(current->active_mm);	/* This does lldt */
 
 	fpu__resume_cpu();
+
+	/* The processor is back on the direct GDT, load back the fixmap */
+	load_fixmap_gdt(cpu);
 }
 
 /**
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 51ef95232725..b1a9baad2f0d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,7 +695,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
 
 	*shadow = t->tls_array[i];
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
 	mc = __xen_mc_entry(0);
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 311acad7dad2..3de63fb4d833 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -401,7 +401,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	if (ctxt == NULL)
 		return -ENOMEM;
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 
 #ifdef CONFIG_X86_32
 	/* Note: PVH is not yet supported on x86_32. */
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index d71f6323ac00..aa9d420913c5 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
 		 * byte, not the size, hence the "-1").
 		 */
 		state->host_gdt_desc.size = GDT_SIZE-1;
-		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+		state->host_gdt_desc.address = (long)get_cpu_direct_gdt(i);
 
 		/*
 		 * All CPUs on the Host use the same Interrupt Descriptor
@@ -554,8 +554,8 @@ void __init lguest_arch_host_init(void)
 		 * The Host needs to be able to use the LGUEST segments on this
 		 * CPU, too, so put them in the Host GDT.
 		 */
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
 	}
 
 	/*
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 438d4c72c7b3..ce7288c5864c 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -54,7 +54,7 @@ __asm__(".text			\n"
 
 #define Q2_SET_SEL(cpu, selname, address, size) \
 do { \
-	struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+	struct desc_struct *gdt = get_cpu_direct_gdt((cpu)); \
 	set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
 	set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
 } while(0)
@@ -95,8 +95,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 		return PNP_FUNCTION_NOT_SUPPORTED;
 
 	cpu = get_cpu();
-	save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-	get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+	save_desc_40 = get_cpu_direct_gdt(cpu)[0x40 / 8];
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = bad_bios_desc;
 
 	/* On some boxes IRQ's during PnP BIOS calls are deadly.  */
 	spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -134,7 +134,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 			     :"memory");
 	spin_unlock_irqrestore(&pnp_bios_lock, flags);
 
-	get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = save_desc_40;
 	put_cpu();
 
 	/* If we get here and this is set then the PnP BIOS faulted on us. */
@@ -477,7 +477,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 	pnp_bios_callpoint.segment = PNP_CS16;
 
 	for_each_possible_cpu(i) {
-		struct desc_struct *gdt = get_cpu_gdt_table(i);
+		struct desc_struct *gdt = get_cpu_direct_gdt(i);
 		if (!gdt)
 			continue;
 		set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [kernel-hardening] [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
@ 2017-01-26 16:59   ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fenghua Yu, He Chen, Brian Gerst,
	Luis R . Rodriguez, Adam Buchbinder, Stanislaw Gruszka,
	Arnd Bergmann, Dave Hansen, Chen Yucong, Vitaly Kuznetsov,
	David Vrabel, Josh Poimboeuf, Tim Chen, Rik van Riel, Andi Kleen,
	Jiri Olsa, Prarit Bhargava, Michael Ellerman, Joerg Roedel,
	Paolo Bonzini, Radim Krčmář
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

Each processor holds a GDT in its per-cpu structure. The sgdt
instruction gives the base address of the current GDT. This address can
be used to bypass KASLR memory randomization. With another bug, an
attacker could target other per-cpu structures or deduce the base of
the main memory section (PAGE_OFFSET).

This patch relocates the GDT table for each processor inside the
Fixmap section. The space is reserved based on number of supported
processors.

For consistency, the remapping is done by default on 32 and 64 bit.

Each processor switches to its remapped GDT at the end of
initialization. For hibernation, the main processor returns with the
original GDT and switches back to the remapping at completion.

This patch was tested on both architectures. Hibernation and KVM were
both tested specially for their usage of the GDT.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/entry/vdso/vma.c             |  2 +-
 arch/x86/include/asm/desc.h           | 23 +++++++++++++++++++----
 arch/x86/include/asm/fixmap.h         |  4 ++++
 arch/x86/include/asm/processor.h      |  1 +
 arch/x86/include/asm/stackprotector.h |  2 +-
 arch/x86/kernel/acpi/sleep.c          |  2 +-
 arch/x86/kernel/apm_32.c              |  6 +++---
 arch/x86/kernel/cpu/common.c          | 26 ++++++++++++++++++++++++--
 arch/x86/kernel/setup_percpu.c        |  2 +-
 arch/x86/kernel/smpboot.c             |  2 +-
 arch/x86/platform/efi/efi_32.c        |  4 ++--
 arch/x86/power/cpu.c                  |  7 +++++--
 arch/x86/xen/enlighten.c              |  2 +-
 arch/x86/xen/smp.c                    |  2 +-
 drivers/lguest/x86/core.c             |  6 +++---
 drivers/pnp/pnpbios/bioscalls.c       | 10 +++++-----
 16 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10820f6cefbf..acae4ef04cdf 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -353,7 +353,7 @@ static void vgetcpu_cpu_init(void *arg)
 	d.p = 1;		/* Present */
 	d.d = 1;		/* 32-bit */
 
-	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+	write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
 }
 
 static int vgetcpu_online(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 12080d87da3b..4cc176f57b78 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <asm/fixmap.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -45,11 +46,25 @@ struct gdt_page {
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
 
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 {
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
+{
+	return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_fixmap_gdt(int cpu)
+{
+	unsigned int idx = get_cpu_fixmap_gdt_index(cpu);
+	return (struct desc_struct *)__fix_to_virt(idx);
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +189,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
 
 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 {
-	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	struct desc_struct *d = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 
 	/*
@@ -202,7 +217,7 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 
 		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
 				      entries * LDT_ENTRY_SIZE - 1);
-		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+		write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_LDT,
 				&ldt, DESC_LDT);
 		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 	}
@@ -244,7 +259,7 @@ static inline unsigned long native_store_tr(void)
 
 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt = get_cpu_direct_gdt(cpu);
 	unsigned int i;
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index c46289799b02..8b913b5e9383 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
+	/* Fixmap entries to remap the GDTs, one per processor. */
+	FIX_GDT_REMAP_BEGIN,
+	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
 	__end_of_permanent_fixed_addresses,
 
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1be64da0384e..22801fd345dc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..422426b8aba7 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
 	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt_table = get_cpu_direct_gdt(cpu);
 	struct desc_struct desc;
 
 	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 48587335ede8..d13e6d156c22 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
 #ifdef CONFIG_SMP
 	initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
-			(unsigned long)get_cpu_gdt_table(smp_processor_id());
+			(unsigned long)get_cpu_direct_gdt(smp_processor_id());
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 45d44c173cf9..458dbe18a860 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -608,7 +608,7 @@ static long __apm_bios_call(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -684,7 +684,7 @@ static long __apm_bios_call_simple(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -2351,7 +2351,7 @@ static int __init apm_init(void)
 	 * Note we only set APM segments on CPU zero, since we pin the APM
 	 * code to that CPU.
 	 */
-	gdt = get_cpu_gdt_table(0);
+	gdt = get_cpu_direct_gdt(0);
 	set_desc_base(&gdt[APM_CS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
 	set_desc_base(&gdt[APM_CS_16 >> 3],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5ba74f280448..15f06cf3e3d4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,6 +444,23 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+}
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_fixmap_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one.
@@ -452,11 +469,10 @@ void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 	/* Reload the per-cpu base */
-
 	load_percpu_segment(cpu);
 }
 
@@ -1511,6 +1527,9 @@ void cpu_init(void)
 
 	if (is_uv_system())
 		uv_cpu_init();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 
 #else
@@ -1566,6 +1585,9 @@ void cpu_init(void)
 	dbg_restore_debug_regs();
 
 	fpu__init_cpu();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 #endif
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9820d6d977c6..e413c7607316 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
 	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
 			0x2 | DESCTYPE_S, 0x8);
 	gdt.s = 1;
-	write_gdt_entry(get_cpu_gdt_table(cpu),
+	write_gdt_entry(get_cpu_direct_gdt(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 #endif
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..0b1096685cda 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -974,7 +974,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	unsigned long timeout;
 
 	idle->thread.sp = (unsigned long)task_pt_regs(idle);
-	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+	early_gdt_descr.address = (unsigned long)get_cpu_direct_gdt(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_stack  = idle->thread.sp;
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index cef39b097649..72d41dddbaf5 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
-	gdt_descr.address = __pa(get_cpu_gdt_table(0));
+	gdt_descr.address = __pa(get_cpu_direct_gdt(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
@@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+	gdt_descr.address = (unsigned long)get_cpu_direct_gdt(0);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 66ade16c7693..644f80341f12 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
 	 * 'pmode_gdt' in wakeup_start.
 	 */
 	ctxt->gdt_desc.size = GDT_SIZE - 1;
-	ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+	ctxt->gdt_desc.address = (unsigned long)get_cpu_direct_gdt(smp_processor_id());
 
 	store_tr(ctxt->tr);
 
@@ -162,7 +162,7 @@ static void fix_processor_context(void)
 	int cpu = smp_processor_id();
 	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
-	struct desc_struct *desc = get_cpu_gdt_table(cpu);
+	struct desc_struct *desc = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 #endif
 	set_tss_desc(cpu, t);	/*
@@ -183,6 +183,9 @@ static void fix_processor_context(void)
 	load_mm_ldt(current->active_mm);	/* This does lldt */
 
 	fpu__resume_cpu();
+
+	/* The processor is back on the direct GDT, load back the fixmap */
+	load_fixmap_gdt(cpu);
 }
 
 /**
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 51ef95232725..b1a9baad2f0d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,7 +695,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
 
 	*shadow = t->tls_array[i];
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
 	mc = __xen_mc_entry(0);
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 311acad7dad2..3de63fb4d833 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -401,7 +401,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	if (ctxt == NULL)
 		return -ENOMEM;
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 
 #ifdef CONFIG_X86_32
 	/* Note: PVH is not yet supported on x86_32. */
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index d71f6323ac00..aa9d420913c5 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
 		 * byte, not the size, hence the "-1").
 		 */
 		state->host_gdt_desc.size = GDT_SIZE-1;
-		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+		state->host_gdt_desc.address = (long)get_cpu_direct_gdt(i);
 
 		/*
 		 * All CPUs on the Host use the same Interrupt Descriptor
@@ -554,8 +554,8 @@ void __init lguest_arch_host_init(void)
 		 * The Host needs to be able to use the LGUEST segments on this
 		 * CPU, too, so put them in the Host GDT.
 		 */
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
 	}
 
 	/*
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 438d4c72c7b3..ce7288c5864c 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -54,7 +54,7 @@ __asm__(".text			\n"
 
 #define Q2_SET_SEL(cpu, selname, address, size) \
 do { \
-	struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+	struct desc_struct *gdt = get_cpu_direct_gdt((cpu)); \
 	set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
 	set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
 } while(0)
@@ -95,8 +95,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 		return PNP_FUNCTION_NOT_SUPPORTED;
 
 	cpu = get_cpu();
-	save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-	get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+	save_desc_40 = get_cpu_direct_gdt(cpu)[0x40 / 8];
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = bad_bios_desc;
 
 	/* On some boxes IRQ's during PnP BIOS calls are deadly.  */
 	spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -134,7 +134,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 			     :"memory");
 	spin_unlock_irqrestore(&pnp_bios_lock, flags);
 
-	get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = save_desc_40;
 	put_cpu();
 
 	/* If we get here and this is set then the PnP BIOS faulted on us. */
@@ -477,7 +477,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 	pnp_bios_callpoint.segment = PNP_CS16;
 
 	for_each_possible_cpu(i) {
-		struct desc_struct *gdt = get_cpu_gdt_table(i);
+		struct desc_struct *gdt = get_cpu_direct_gdt(i);
 		if (!gdt)
 			continue;
 		set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
  2017-01-26 16:59 ` Thomas Garnier
                   ` (2 preceding siblings ...)
  (?)
@ 2017-01-26 16:59 ` Thomas Garnier
  -1 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fe
  Cc: linux-efi, kvm, linux-pm, x86, linux-kernel, kasan-dev, lguest,
	kernel-hardening, xen-devel

Each processor holds a GDT in its per-cpu structure. The sgdt
instruction gives the base address of the current GDT. This address can
be used to bypass KASLR memory randomization. With another bug, an
attacker could target other per-cpu structures or deduce the base of
the main memory section (PAGE_OFFSET).

This patch relocates the GDT table for each processor inside the
Fixmap section. The space is reserved based on number of supported
processors.

For consistency, the remapping is done by default on 32 and 64 bit.

Each processor switches to its remapped GDT at the end of
initialization. For hibernation, the main processor returns with the
original GDT and switches back to the remapping at completion.

This patch was tested on both architectures. Hibernation and KVM were
both tested specially for their usage of the GDT.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/entry/vdso/vma.c             |  2 +-
 arch/x86/include/asm/desc.h           | 23 +++++++++++++++++++----
 arch/x86/include/asm/fixmap.h         |  4 ++++
 arch/x86/include/asm/processor.h      |  1 +
 arch/x86/include/asm/stackprotector.h |  2 +-
 arch/x86/kernel/acpi/sleep.c          |  2 +-
 arch/x86/kernel/apm_32.c              |  6 +++---
 arch/x86/kernel/cpu/common.c          | 26 ++++++++++++++++++++++++--
 arch/x86/kernel/setup_percpu.c        |  2 +-
 arch/x86/kernel/smpboot.c             |  2 +-
 arch/x86/platform/efi/efi_32.c        |  4 ++--
 arch/x86/power/cpu.c                  |  7 +++++--
 arch/x86/xen/enlighten.c              |  2 +-
 arch/x86/xen/smp.c                    |  2 +-
 drivers/lguest/x86/core.c             |  6 +++---
 drivers/pnp/pnpbios/bioscalls.c       | 10 +++++-----
 16 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10820f6cefbf..acae4ef04cdf 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -353,7 +353,7 @@ static void vgetcpu_cpu_init(void *arg)
 	d.p = 1;		/* Present */
 	d.d = 1;		/* 32-bit */
 
-	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+	write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
 }
 
 static int vgetcpu_online(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 12080d87da3b..4cc176f57b78 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <asm/fixmap.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -45,11 +46,25 @@ struct gdt_page {
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
 
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 {
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
+{
+	return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_fixmap_gdt(int cpu)
+{
+	unsigned int idx = get_cpu_fixmap_gdt_index(cpu);
+	return (struct desc_struct *)__fix_to_virt(idx);
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +189,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
 
 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 {
-	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	struct desc_struct *d = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 
 	/*
@@ -202,7 +217,7 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 
 		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
 				      entries * LDT_ENTRY_SIZE - 1);
-		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+		write_gdt_entry(get_cpu_direct_gdt(cpu), GDT_ENTRY_LDT,
 				&ldt, DESC_LDT);
 		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 	}
@@ -244,7 +259,7 @@ static inline unsigned long native_store_tr(void)
 
 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt = get_cpu_direct_gdt(cpu);
 	unsigned int i;
 
 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index c46289799b02..8b913b5e9383 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
+	/* Fixmap entries to remap the GDTs, one per processor. */
+	FIX_GDT_REMAP_BEGIN,
+	FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
 	__end_of_permanent_fixed_addresses,
 
 	/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1be64da0384e..22801fd345dc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
 
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..422426b8aba7 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
 	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+	struct desc_struct *gdt_table = get_cpu_direct_gdt(cpu);
 	struct desc_struct desc;
 
 	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 48587335ede8..d13e6d156c22 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
 #ifdef CONFIG_SMP
 	initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
-			(unsigned long)get_cpu_gdt_table(smp_processor_id());
+			(unsigned long)get_cpu_direct_gdt(smp_processor_id());
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 45d44c173cf9..458dbe18a860 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -608,7 +608,7 @@ static long __apm_bios_call(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -684,7 +684,7 @@ static long __apm_bios_call_simple(void *_call)
 
 	cpu = get_cpu();
 	BUG_ON(cpu != 0);
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	save_desc_40 = gdt[0x40 / 8];
 	gdt[0x40 / 8] = bad_bios_desc;
 
@@ -2351,7 +2351,7 @@ static int __init apm_init(void)
 	 * Note we only set APM segments on CPU zero, since we pin the APM
 	 * code to that CPU.
 	 */
-	gdt = get_cpu_gdt_table(0);
+	gdt = get_cpu_direct_gdt(0);
 	set_desc_base(&gdt[APM_CS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
 	set_desc_base(&gdt[APM_CS_16 >> 3],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5ba74f280448..15f06cf3e3d4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,6 +444,23 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+}
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_fixmap_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one.
@@ -452,11 +469,10 @@ void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 	/* Reload the per-cpu base */
-
 	load_percpu_segment(cpu);
 }
 
@@ -1511,6 +1527,9 @@ void cpu_init(void)
 
 	if (is_uv_system())
 		uv_cpu_init();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 
 #else
@@ -1566,6 +1585,9 @@ void cpu_init(void)
 	dbg_restore_debug_regs();
 
 	fpu__init_cpu();
+
+	setup_fixmap_gdt(cpu);
+	load_fixmap_gdt(cpu);
 }
 #endif
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9820d6d977c6..e413c7607316 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
 	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
 			0x2 | DESCTYPE_S, 0x8);
 	gdt.s = 1;
-	write_gdt_entry(get_cpu_gdt_table(cpu),
+	write_gdt_entry(get_cpu_direct_gdt(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 #endif
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..0b1096685cda 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -974,7 +974,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	unsigned long timeout;
 
 	idle->thread.sp = (unsigned long)task_pt_regs(idle);
-	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+	early_gdt_descr.address = (unsigned long)get_cpu_direct_gdt(cpu);
 	initial_code = (unsigned long)start_secondary;
 	initial_stack  = idle->thread.sp;
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index cef39b097649..72d41dddbaf5 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
 	load_cr3(initial_page_table);
 	__flush_tlb_all();
 
-	gdt_descr.address = __pa(get_cpu_gdt_table(0));
+	gdt_descr.address = __pa(get_cpu_direct_gdt(0));
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
@@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
 	struct desc_ptr gdt_descr;
 
-	gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+	gdt_descr.address = (unsigned long)get_cpu_direct_gdt(0);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 66ade16c7693..644f80341f12 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
 	 * 'pmode_gdt' in wakeup_start.
 	 */
 	ctxt->gdt_desc.size = GDT_SIZE - 1;
-	ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+	ctxt->gdt_desc.address = (unsigned long)get_cpu_direct_gdt(smp_processor_id());
 
 	store_tr(ctxt->tr);
 
@@ -162,7 +162,7 @@ static void fix_processor_context(void)
 	int cpu = smp_processor_id();
 	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
-	struct desc_struct *desc = get_cpu_gdt_table(cpu);
+	struct desc_struct *desc = get_cpu_direct_gdt(cpu);
 	tss_desc tss;
 #endif
 	set_tss_desc(cpu, t);	/*
@@ -183,6 +183,9 @@ static void fix_processor_context(void)
 	load_mm_ldt(current->active_mm);	/* This does lldt */
 
 	fpu__resume_cpu();
+
+	/* The processor is back on the direct GDT, load back the fixmap */
+	load_fixmap_gdt(cpu);
 }
 
 /**
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 51ef95232725..b1a9baad2f0d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,7 +695,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
 
 	*shadow = t->tls_array[i];
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 	maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
 	mc = __xen_mc_entry(0);
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 311acad7dad2..3de63fb4d833 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -401,7 +401,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	if (ctxt == NULL)
 		return -ENOMEM;
 
-	gdt = get_cpu_gdt_table(cpu);
+	gdt = get_cpu_direct_gdt(cpu);
 
 #ifdef CONFIG_X86_32
 	/* Note: PVH is not yet supported on x86_32. */
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index d71f6323ac00..aa9d420913c5 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
 		 * byte, not the size, hence the "-1").
 		 */
 		state->host_gdt_desc.size = GDT_SIZE-1;
-		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+		state->host_gdt_desc.address = (long)get_cpu_direct_gdt(i);
 
 		/*
 		 * All CPUs on the Host use the same Interrupt Descriptor
@@ -554,8 +554,8 @@ void __init lguest_arch_host_init(void)
 		 * The Host needs to be able to use the LGUEST segments on this
 		 * CPU, too, so put them in the Host GDT.
 		 */
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
-		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+		get_cpu_direct_gdt(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
 	}
 
 	/*
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 438d4c72c7b3..ce7288c5864c 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -54,7 +54,7 @@ __asm__(".text			\n"
 
 #define Q2_SET_SEL(cpu, selname, address, size) \
 do { \
-	struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+	struct desc_struct *gdt = get_cpu_direct_gdt((cpu)); \
 	set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
 	set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
 } while(0)
@@ -95,8 +95,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 		return PNP_FUNCTION_NOT_SUPPORTED;
 
 	cpu = get_cpu();
-	save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-	get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+	save_desc_40 = get_cpu_direct_gdt(cpu)[0x40 / 8];
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = bad_bios_desc;
 
 	/* On some boxes IRQ's during PnP BIOS calls are deadly.  */
 	spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -134,7 +134,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 			     :"memory");
 	spin_unlock_irqrestore(&pnp_bios_lock, flags);
 
-	get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+	get_cpu_direct_gdt(cpu)[0x40 / 8] = save_desc_40;
 	put_cpu();
 
 	/* If we get here and this is set then the PnP BIOS faulted on us. */
@@ -477,7 +477,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 	pnp_bios_callpoint.segment = PNP_CS16;
 
 	for_each_possible_cpu(i) {
-		struct desc_struct *gdt = get_cpu_gdt_table(i);
+		struct desc_struct *gdt = get_cpu_direct_gdt(i);
 		if (!gdt)
 			continue;
 		set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
-- 
2.11.0.483.g087da7b7c-goog


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-01-26 16:59 ` Thomas Garnier
  (?)
  (?)
@ 2017-01-26 16:59   ` Thomas Garnier
  -1 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fenghua Yu, He Chen, Brian Gerst,
	Luis R . Rodriguez, Adam Buchbinder, Stanislaw Gruszka,
	Arnd Bergmann, Dave Hansen, Chen Yucong, Vitaly Kuznetsov,
	David Vrabel, Josh Poimboeuf, Tim Chen, Rik van Riel, Andi Kleen,
	Jiri Olsa, Prarit Bhargava, Michael Ellerman, Joerg Roedel,
	Paolo Bonzini, Radim Krčmář
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

This patch makes the GDT remapped pages read-only to prevent corruption.
This change is done only on 64 bit.

The native_load_tr_desc function was adapted to correctly handle a
read-only GDT. The LTR instruction always writes to the GDT TSS entry.
This generates a page fault if the GDT is read-only. This change checks
if the current GDT is a remap and swap GDTs as needed. This function was
tested by booting multiple machines and checking hibernation works
properly.

KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
per-cpu variable was removed for functions to fetch the original GDT.
Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
expected. For testing, VMs were started and restored on multiple
configurations.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
 arch/x86/kvm/svm.c               |  4 +---
 arch/x86/kvm/vmx.c               | 15 +++++--------
 5 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4cc176f57b78..ca7b2224fcb4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_direct_gdt(void)
+{
+	return this_cpu_ptr(&gdt_page)->gdt;
+}
+
 /* Get the fixmap index for a specific processor */
 static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
 {
@@ -223,11 +229,6 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 	}
 }
 
-static inline void native_load_tr_desc(void)
-{
-	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
-}
-
 static inline void native_load_gdt(const struct desc_ptr *dtr)
 {
 	asm volatile("lgdt %0"::"m" (*dtr));
@@ -248,6 +249,41 @@ static inline void native_store_idt(struct desc_ptr *dtr)
 	asm volatile("sidt %0":"=m" (*dtr));
 }
 
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+	struct desc_ptr gdt;
+	int cpu = raw_smp_processor_id();
+	bool restore = false;
+	struct desc_struct *fixmap_gdt;
+
+	native_store_gdt(&gdt);
+	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
+
+	/*
+	 * If the current GDT is the read-only fixmap, swap to the original
+	 * writeable version. Swap back at the end.
+	 */
+	if (gdt.address == (unsigned long)fixmap_gdt) {
+		load_direct_gdt(cpu);
+		restore = true;
+	}
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+	if (restore)
+		load_fixmap_gdt(cpu);
+}
+#else
+static inline void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+#endif
+
 static inline unsigned long native_store_tr(void)
 {
 	unsigned long tr;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22801fd345dc..e8e68b00a2ec 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 15f06cf3e3d4..a7a54f57b68a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,13 +444,31 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* On 64bit the GDT remapping is read-only */
+#ifdef CONFIG_X86_64
+#define PAGE_FIXMAP_GDT PAGE_KERNEL_RO
+#else
+#define PAGE_FIXMAP_GDT PAGE_KERNEL
+#endif
+
 /* Setup the fixmap mapping only once per-processor */
 static inline void setup_fixmap_gdt(int cpu)
 {
 	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
-		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_FIXMAP_GDT);
 }
 
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL(load_direct_gdt);
+
 /* Load a fixmap remapping of the per-cpu GDT */
 void load_fixmap_gdt(int cpu)
 {
@@ -460,6 +478,7 @@ void load_fixmap_gdt(int cpu)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 }
+EXPORT_SYMBOL(load_fixmap_gdt);
 
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
@@ -467,11 +486,8 @@ void load_fixmap_gdt(int cpu)
  */
 void switch_to_new_gdt(int cpu)
 {
-	struct desc_ptr gdt_descr;
-
-	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
-	gdt_descr.size = GDT_SIZE - 1;
-	load_gdt(&gdt_descr);
+	/* Load the original GDT */
+	load_direct_gdt(cpu);
 	/* Reload the per-cpu base */
 	load_percpu_segment(cpu);
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d0414f054bdf..7864f4c813a1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
 
 	struct svm_cpu_data *sd;
 	uint64_t efer;
-	struct desc_ptr gdt_descr;
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
@@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
 	sd->next_asid = sd->max_asid + 1;
 
-	native_store_gdt(&gdt_descr);
-	gdt = (struct desc_struct *)gdt_descr.address;
+	gdt = get_current_direct_gdt();
 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e691035a32d..c64fa6e0417c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
 /*
  * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -1997,10 +1996,9 @@ static void reload_tss(void)
 	/*
 	 * VT restores TR but not its size.  Useless.
 	 */
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *descs;
 
-	descs = (void *)gdt->address;
+	descs = (void *)get_current_direct_gdt();
 	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
 	load_TR_desc();
 }
@@ -2061,7 +2059,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 
 static unsigned long segment_base(u16 selector)
 {
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *d;
 	unsigned long table_base;
 	unsigned long v;
@@ -2069,7 +2066,7 @@ static unsigned long segment_base(u16 selector)
 	if (!(selector & ~3))
 		return 0;
 
-	table_base = gdt->address;
+	table_base = (unsigned long)get_current_direct_gdt();
 
 	if (selector & 4) {           /* from ldt */
 		u16 ldt_selector = kvm_read_ldt();
@@ -2185,7 +2182,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #endif
 	if (vmx->host_state.msr_host_bndcfgs)
 		wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-	load_gdt(this_cpu_ptr(&host_gdt));
+	load_fixmap_gdt(raw_smp_processor_id());
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -2287,7 +2284,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	if (!already_loaded) {
-		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+		unsigned long gdt = (unsigned long)get_current_direct_gdt();
 		unsigned long sysenter_esp;
 
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2297,7 +2294,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * processors.
 		 */
 		vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
-		vmcs_writel(HOST_GDTR_BASE, gdt->address);   /* 22.2.4 */
+		vmcs_writel(HOST_GDTR_BASE, gdt);   /* 22.2.4 */
 
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
@@ -3523,8 +3520,6 @@ static int hardware_enable(void)
 		ept_sync_global();
 	}
 
-	native_store_gdt(this_cpu_ptr(&host_gdt));
-
 	return 0;
 }
 
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-01-26 16:59   ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

This patch makes the GDT remapped pages read-only to prevent corruption.
This change is done only on 64 bit.

The native_load_tr_desc function was adapted to correctly handle a
read-only GDT. The LTR instruction always writes to the GDT TSS entry.
This generates a page fault if the GDT is read-only. This change checks
if the current GDT is a remap and swap GDTs as needed. This function was
tested by booting multiple machines and checking hibernation works
properly.

KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
per-cpu variable was removed for functions to fetch the original GDT.
Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
expected. For testing, VMs were started and restored on multiple
configurations.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
 arch/x86/kvm/svm.c               |  4 +---
 arch/x86/kvm/vmx.c               | 15 +++++--------
 5 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4cc176f57b78..ca7b2224fcb4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_direct_gdt(void)
+{
+	return this_cpu_ptr(&gdt_page)->gdt;
+}
+
 /* Get the fixmap index for a specific processor */
 static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
 {
@@ -223,11 +229,6 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 	}
 }
 
-static inline void native_load_tr_desc(void)
-{
-	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
-}
-
 static inline void native_load_gdt(const struct desc_ptr *dtr)
 {
 	asm volatile("lgdt %0"::"m" (*dtr));
@@ -248,6 +249,41 @@ static inline void native_store_idt(struct desc_ptr *dtr)
 	asm volatile("sidt %0":"=m" (*dtr));
 }
 
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+	struct desc_ptr gdt;
+	int cpu = raw_smp_processor_id();
+	bool restore = false;
+	struct desc_struct *fixmap_gdt;
+
+	native_store_gdt(&gdt);
+	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
+
+	/*
+	 * If the current GDT is the read-only fixmap, swap to the original
+	 * writeable version. Swap back at the end.
+	 */
+	if (gdt.address == (unsigned long)fixmap_gdt) {
+		load_direct_gdt(cpu);
+		restore = true;
+	}
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+	if (restore)
+		load_fixmap_gdt(cpu);
+}
+#else
+static inline void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+#endif
+
 static inline unsigned long native_store_tr(void)
 {
 	unsigned long tr;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22801fd345dc..e8e68b00a2ec 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 15f06cf3e3d4..a7a54f57b68a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,13 +444,31 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* On 64bit the GDT remapping is read-only */
+#ifdef CONFIG_X86_64
+#define PAGE_FIXMAP_GDT PAGE_KERNEL_RO
+#else
+#define PAGE_FIXMAP_GDT PAGE_KERNEL
+#endif
+
 /* Setup the fixmap mapping only once per-processor */
 static inline void setup_fixmap_gdt(int cpu)
 {
 	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
-		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_FIXMAP_GDT);
 }
 
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL(load_direct_gdt);
+
 /* Load a fixmap remapping of the per-cpu GDT */
 void load_fixmap_gdt(int cpu)
 {
@@ -460,6 +478,7 @@ void load_fixmap_gdt(int cpu)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 }
+EXPORT_SYMBOL(load_fixmap_gdt);
 
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
@@ -467,11 +486,8 @@ void load_fixmap_gdt(int cpu)
  */
 void switch_to_new_gdt(int cpu)
 {
-	struct desc_ptr gdt_descr;
-
-	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
-	gdt_descr.size = GDT_SIZE - 1;
-	load_gdt(&gdt_descr);
+	/* Load the original GDT */
+	load_direct_gdt(cpu);
 	/* Reload the per-cpu base */
 	load_percpu_segment(cpu);
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d0414f054bdf..7864f4c813a1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
 
 	struct svm_cpu_data *sd;
 	uint64_t efer;
-	struct desc_ptr gdt_descr;
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
@@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
 	sd->next_asid = sd->max_asid + 1;
 
-	native_store_gdt(&gdt_descr);
-	gdt = (struct desc_struct *)gdt_descr.address;
+	gdt = get_current_direct_gdt();
 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e691035a32d..c64fa6e0417c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
 /*
  * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -1997,10 +1996,9 @@ static void reload_tss(void)
 	/*
 	 * VT restores TR but not its size.  Useless.
 	 */
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *descs;
 
-	descs = (void *)gdt->address;
+	descs = (void *)get_current_direct_gdt();
 	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
 	load_TR_desc();
 }
@@ -2061,7 +2059,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 
 static unsigned long segment_base(u16 selector)
 {
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *d;
 	unsigned long table_base;
 	unsigned long v;
@@ -2069,7 +2066,7 @@ static unsigned long segment_base(u16 selector)
 	if (!(selector & ~3))
 		return 0;
 
-	table_base = gdt->address;
+	table_base = (unsigned long)get_current_direct_gdt();
 
 	if (selector & 4) {           /* from ldt */
 		u16 ldt_selector = kvm_read_ldt();
@@ -2185,7 +2182,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #endif
 	if (vmx->host_state.msr_host_bndcfgs)
 		wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-	load_gdt(this_cpu_ptr(&host_gdt));
+	load_fixmap_gdt(raw_smp_processor_id());
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -2287,7 +2284,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	if (!already_loaded) {
-		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+		unsigned long gdt = (unsigned long)get_current_direct_gdt();
 		unsigned long sysenter_esp;
 
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2297,7 +2294,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * processors.
 		 */
 		vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
-		vmcs_writel(HOST_GDTR_BASE, gdt->address);   /* 22.2.4 */
+		vmcs_writel(HOST_GDTR_BASE, gdt);   /* 22.2.4 */
 
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
@@ -3523,8 +3520,6 @@ static int hardware_enable(void)
 		ept_sync_global();
 	}
 
-	native_store_gdt(this_cpu_ptr(&host_gdt));
-
 	return 0;
 }
 
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-01-26 16:59   ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

This patch makes the GDT remapped pages read-only to prevent corruption.
This change is done only on 64 bit.

The native_load_tr_desc function was adapted to correctly handle a
read-only GDT. The LTR instruction always writes to the GDT TSS entry.
This generates a page fault if the GDT is read-only. This change checks
if the current GDT is a remap and swap GDTs as needed. This function was
tested by booting multiple machines and checking hibernation works
properly.

KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
per-cpu variable was removed for functions to fetch the original GDT.
Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
expected. For testing, VMs were started and restored on multiple
configurations.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
 arch/x86/kvm/svm.c               |  4 +---
 arch/x86/kvm/vmx.c               | 15 +++++--------
 5 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4cc176f57b78..ca7b2224fcb4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_direct_gdt(void)
+{
+	return this_cpu_ptr(&gdt_page)->gdt;
+}
+
 /* Get the fixmap index for a specific processor */
 static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
 {
@@ -223,11 +229,6 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 	}
 }
 
-static inline void native_load_tr_desc(void)
-{
-	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
-}
-
 static inline void native_load_gdt(const struct desc_ptr *dtr)
 {
 	asm volatile("lgdt %0"::"m" (*dtr));
@@ -248,6 +249,41 @@ static inline void native_store_idt(struct desc_ptr *dtr)
 	asm volatile("sidt %0":"=m" (*dtr));
 }
 
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+	struct desc_ptr gdt;
+	int cpu = raw_smp_processor_id();
+	bool restore = false;
+	struct desc_struct *fixmap_gdt;
+
+	native_store_gdt(&gdt);
+	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
+
+	/*
+	 * If the current GDT is the read-only fixmap, swap to the original
+	 * writeable version. Swap back at the end.
+	 */
+	if (gdt.address == (unsigned long)fixmap_gdt) {
+		load_direct_gdt(cpu);
+		restore = true;
+	}
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+	if (restore)
+		load_fixmap_gdt(cpu);
+}
+#else
+static inline void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+#endif
+
 static inline unsigned long native_store_tr(void)
 {
 	unsigned long tr;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22801fd345dc..e8e68b00a2ec 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 15f06cf3e3d4..a7a54f57b68a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,13 +444,31 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* On 64bit the GDT remapping is read-only */
+#ifdef CONFIG_X86_64
+#define PAGE_FIXMAP_GDT PAGE_KERNEL_RO
+#else
+#define PAGE_FIXMAP_GDT PAGE_KERNEL
+#endif
+
 /* Setup the fixmap mapping only once per-processor */
 static inline void setup_fixmap_gdt(int cpu)
 {
 	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
-		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_FIXMAP_GDT);
 }
 
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL(load_direct_gdt);
+
 /* Load a fixmap remapping of the per-cpu GDT */
 void load_fixmap_gdt(int cpu)
 {
@@ -460,6 +478,7 @@ void load_fixmap_gdt(int cpu)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 }
+EXPORT_SYMBOL(load_fixmap_gdt);
 
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
@@ -467,11 +486,8 @@ void load_fixmap_gdt(int cpu)
  */
 void switch_to_new_gdt(int cpu)
 {
-	struct desc_ptr gdt_descr;
-
-	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
-	gdt_descr.size = GDT_SIZE - 1;
-	load_gdt(&gdt_descr);
+	/* Load the original GDT */
+	load_direct_gdt(cpu);
 	/* Reload the per-cpu base */
 	load_percpu_segment(cpu);
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d0414f054bdf..7864f4c813a1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
 
 	struct svm_cpu_data *sd;
 	uint64_t efer;
-	struct desc_ptr gdt_descr;
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
@@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
 	sd->next_asid = sd->max_asid + 1;
 
-	native_store_gdt(&gdt_descr);
-	gdt = (struct desc_struct *)gdt_descr.address;
+	gdt = get_current_direct_gdt();
 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e691035a32d..c64fa6e0417c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
 /*
  * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -1997,10 +1996,9 @@ static void reload_tss(void)
 	/*
 	 * VT restores TR but not its size.  Useless.
 	 */
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *descs;
 
-	descs = (void *)gdt->address;
+	descs = (void *)get_current_direct_gdt();
 	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
 	load_TR_desc();
 }
@@ -2061,7 +2059,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 
 static unsigned long segment_base(u16 selector)
 {
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *d;
 	unsigned long table_base;
 	unsigned long v;
@@ -2069,7 +2066,7 @@ static unsigned long segment_base(u16 selector)
 	if (!(selector & ~3))
 		return 0;
 
-	table_base = gdt->address;
+	table_base = (unsigned long)get_current_direct_gdt();
 
 	if (selector & 4) {           /* from ldt */
 		u16 ldt_selector = kvm_read_ldt();
@@ -2185,7 +2182,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #endif
 	if (vmx->host_state.msr_host_bndcfgs)
 		wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-	load_gdt(this_cpu_ptr(&host_gdt));
+	load_fixmap_gdt(raw_smp_processor_id());
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -2287,7 +2284,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	if (!already_loaded) {
-		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+		unsigned long gdt = (unsigned long)get_current_direct_gdt();
 		unsigned long sysenter_esp;
 
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2297,7 +2294,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * processors.
 		 */
 		vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
-		vmcs_writel(HOST_GDTR_BASE, gdt->address);   /* 22.2.4 */
+		vmcs_writel(HOST_GDTR_BASE, gdt);   /* 22.2.4 */
 
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
@@ -3523,8 +3520,6 @@ static int hardware_enable(void)
 		ept_sync_global();
 	}
 
-	native_store_gdt(this_cpu_ptr(&host_gdt));
-
 	return 0;
 }
 
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [kernel-hardening] [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-01-26 16:59   ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fenghua Yu, He Chen, Brian Gerst,
	Luis R . Rodriguez, Adam Buchbinder, Stanislaw Gruszka,
	Arnd Bergmann, Dave Hansen, Chen Yucong, Vitaly Kuznetsov,
	David Vrabel, Josh Poimboeuf, Tim Chen, Rik van Riel, Andi Kleen,
	Jiri Olsa, Prarit Bhargava, Michael Ellerman, Joerg Roedel,
	Paolo Bonzini, Radim Krčmář
  Cc: x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening

This patch makes the GDT remapped pages read-only to prevent corruption.
This change is done only on 64 bit.

The native_load_tr_desc function was adapted to correctly handle a
read-only GDT. The LTR instruction always writes to the GDT TSS entry.
This generates a page fault if the GDT is read-only. This change checks
if the current GDT is a remap and swap GDTs as needed. This function was
tested by booting multiple machines and checking hibernation works
properly.

KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
per-cpu variable was removed for functions to fetch the original GDT.
Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
expected. For testing, VMs were started and restored on multiple
configurations.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
 arch/x86/kvm/svm.c               |  4 +---
 arch/x86/kvm/vmx.c               | 15 +++++--------
 5 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4cc176f57b78..ca7b2224fcb4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_direct_gdt(void)
+{
+	return this_cpu_ptr(&gdt_page)->gdt;
+}
+
 /* Get the fixmap index for a specific processor */
 static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
 {
@@ -223,11 +229,6 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 	}
 }
 
-static inline void native_load_tr_desc(void)
-{
-	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
-}
-
 static inline void native_load_gdt(const struct desc_ptr *dtr)
 {
 	asm volatile("lgdt %0"::"m" (*dtr));
@@ -248,6 +249,41 @@ static inline void native_store_idt(struct desc_ptr *dtr)
 	asm volatile("sidt %0":"=m" (*dtr));
 }
 
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+	struct desc_ptr gdt;
+	int cpu = raw_smp_processor_id();
+	bool restore = false;
+	struct desc_struct *fixmap_gdt;
+
+	native_store_gdt(&gdt);
+	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
+
+	/*
+	 * If the current GDT is the read-only fixmap, swap to the original
+	 * writeable version. Swap back at the end.
+	 */
+	if (gdt.address == (unsigned long)fixmap_gdt) {
+		load_direct_gdt(cpu);
+		restore = true;
+	}
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+	if (restore)
+		load_fixmap_gdt(cpu);
+}
+#else
+static inline void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+#endif
+
 static inline unsigned long native_store_tr(void)
 {
 	unsigned long tr;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22801fd345dc..e8e68b00a2ec 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 15f06cf3e3d4..a7a54f57b68a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,13 +444,31 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* On 64bit the GDT remapping is read-only */
+#ifdef CONFIG_X86_64
+#define PAGE_FIXMAP_GDT PAGE_KERNEL_RO
+#else
+#define PAGE_FIXMAP_GDT PAGE_KERNEL
+#endif
+
 /* Setup the fixmap mapping only once per-processor */
 static inline void setup_fixmap_gdt(int cpu)
 {
 	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
-		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_FIXMAP_GDT);
 }
 
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL(load_direct_gdt);
+
 /* Load a fixmap remapping of the per-cpu GDT */
 void load_fixmap_gdt(int cpu)
 {
@@ -460,6 +478,7 @@ void load_fixmap_gdt(int cpu)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 }
+EXPORT_SYMBOL(load_fixmap_gdt);
 
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
@@ -467,11 +486,8 @@ void load_fixmap_gdt(int cpu)
  */
 void switch_to_new_gdt(int cpu)
 {
-	struct desc_ptr gdt_descr;
-
-	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
-	gdt_descr.size = GDT_SIZE - 1;
-	load_gdt(&gdt_descr);
+	/* Load the original GDT */
+	load_direct_gdt(cpu);
 	/* Reload the per-cpu base */
 	load_percpu_segment(cpu);
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d0414f054bdf..7864f4c813a1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
 
 	struct svm_cpu_data *sd;
 	uint64_t efer;
-	struct desc_ptr gdt_descr;
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
@@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
 	sd->next_asid = sd->max_asid + 1;
 
-	native_store_gdt(&gdt_descr);
-	gdt = (struct desc_struct *)gdt_descr.address;
+	gdt = get_current_direct_gdt();
 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e691035a32d..c64fa6e0417c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
 /*
  * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -1997,10 +1996,9 @@ static void reload_tss(void)
 	/*
 	 * VT restores TR but not its size.  Useless.
 	 */
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *descs;
 
-	descs = (void *)gdt->address;
+	descs = (void *)get_current_direct_gdt();
 	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
 	load_TR_desc();
 }
@@ -2061,7 +2059,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 
 static unsigned long segment_base(u16 selector)
 {
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *d;
 	unsigned long table_base;
 	unsigned long v;
@@ -2069,7 +2066,7 @@ static unsigned long segment_base(u16 selector)
 	if (!(selector & ~3))
 		return 0;
 
-	table_base = gdt->address;
+	table_base = (unsigned long)get_current_direct_gdt();
 
 	if (selector & 4) {           /* from ldt */
 		u16 ldt_selector = kvm_read_ldt();
@@ -2185,7 +2182,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #endif
 	if (vmx->host_state.msr_host_bndcfgs)
 		wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-	load_gdt(this_cpu_ptr(&host_gdt));
+	load_fixmap_gdt(raw_smp_processor_id());
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -2287,7 +2284,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	if (!already_loaded) {
-		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+		unsigned long gdt = (unsigned long)get_current_direct_gdt();
 		unsigned long sysenter_esp;
 
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2297,7 +2294,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * processors.
 		 */
 		vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
-		vmcs_writel(HOST_GDTR_BASE, gdt->address);   /* 22.2.4 */
+		vmcs_writel(HOST_GDTR_BASE, gdt);   /* 22.2.4 */
 
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
@@ -3523,8 +3520,6 @@ static int hardware_enable(void)
 		ept_sync_global();
 	}
 
-	native_store_gdt(this_cpu_ptr(&host_gdt));
-
 	return 0;
 }
 
-- 
2.11.0.483.g087da7b7c-goog

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-01-26 16:59 ` Thomas Garnier
                   ` (4 preceding siblings ...)
  (?)
@ 2017-01-26 16:59 ` Thomas Garnier
  -1 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 16:59 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Thomas Garnier, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Andy Lutomirski, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fe
  Cc: linux-efi, kvm, linux-pm, x86, linux-kernel, kasan-dev, lguest,
	kernel-hardening, xen-devel

This patch makes the GDT remapped pages read-only to prevent corruption.
This change is done only on 64 bit.

The native_load_tr_desc function was adapted to correctly handle a
read-only GDT. The LTR instruction always writes to the GDT TSS entry.
This generates a page fault if the GDT is read-only. This change checks
if the current GDT is a remap and swap GDTs as needed. This function was
tested by booting multiple machines and checking hibernation works
properly.

KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
per-cpu variable was removed for functions to fetch the original GDT.
Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
expected. For testing, VMs were started and restored on multiple
configurations.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Based on next-20170125
---
 arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
 arch/x86/kvm/svm.c               |  4 +---
 arch/x86/kvm/vmx.c               | 15 +++++--------
 5 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4cc176f57b78..ca7b2224fcb4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
 	return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_direct_gdt(void)
+{
+	return this_cpu_ptr(&gdt_page)->gdt;
+}
+
 /* Get the fixmap index for a specific processor */
 static inline unsigned int get_cpu_fixmap_gdt_index(int cpu)
 {
@@ -223,11 +229,6 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 	}
 }
 
-static inline void native_load_tr_desc(void)
-{
-	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
-}
-
 static inline void native_load_gdt(const struct desc_ptr *dtr)
 {
 	asm volatile("lgdt %0"::"m" (*dtr));
@@ -248,6 +249,41 @@ static inline void native_store_idt(struct desc_ptr *dtr)
 	asm volatile("sidt %0":"=m" (*dtr));
 }
 
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+	struct desc_ptr gdt;
+	int cpu = raw_smp_processor_id();
+	bool restore = false;
+	struct desc_struct *fixmap_gdt;
+
+	native_store_gdt(&gdt);
+	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
+
+	/*
+	 * If the current GDT is the read-only fixmap, swap to the original
+	 * writeable version. Swap back at the end.
+	 */
+	if (gdt.address == (unsigned long)fixmap_gdt) {
+		load_direct_gdt(cpu);
+		restore = true;
+	}
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+	if (restore)
+		load_fixmap_gdt(cpu);
+}
+#else
+static inline void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+#endif
+
 static inline unsigned long native_store_tr(void)
 {
 	unsigned long tr;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22801fd345dc..e8e68b00a2ec 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -705,6 +705,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 15f06cf3e3d4..a7a54f57b68a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -444,13 +444,31 @@ void load_percpu_segment(int cpu)
 	load_stack_canary_segment();
 }
 
+/* On 64bit the GDT remapping is read-only */
+#ifdef CONFIG_X86_64
+#define PAGE_FIXMAP_GDT PAGE_KERNEL_RO
+#else
+#define PAGE_FIXMAP_GDT PAGE_KERNEL
+#endif
+
 /* Setup the fixmap mapping only once per-processor */
 static inline void setup_fixmap_gdt(int cpu)
 {
 	__set_fixmap(get_cpu_fixmap_gdt_index(cpu),
-		     __pa(get_cpu_direct_gdt(cpu)), PAGE_KERNEL);
+		     __pa(get_cpu_direct_gdt(cpu)), PAGE_FIXMAP_GDT);
 }
 
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
+{
+	struct desc_ptr gdt_descr;
+
+	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL(load_direct_gdt);
+
 /* Load a fixmap remapping of the per-cpu GDT */
 void load_fixmap_gdt(int cpu)
 {
@@ -460,6 +478,7 @@ void load_fixmap_gdt(int cpu)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 }
+EXPORT_SYMBOL(load_fixmap_gdt);
 
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
@@ -467,11 +486,8 @@ void load_fixmap_gdt(int cpu)
  */
 void switch_to_new_gdt(int cpu)
 {
-	struct desc_ptr gdt_descr;
-
-	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);
-	gdt_descr.size = GDT_SIZE - 1;
-	load_gdt(&gdt_descr);
+	/* Load the original GDT */
+	load_direct_gdt(cpu);
 	/* Reload the per-cpu base */
 	load_percpu_segment(cpu);
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d0414f054bdf..7864f4c813a1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
 
 	struct svm_cpu_data *sd;
 	uint64_t efer;
-	struct desc_ptr gdt_descr;
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
@@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
 	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
 	sd->next_asid = sd->max_asid + 1;
 
-	native_store_gdt(&gdt_descr);
-	gdt = (struct desc_struct *)gdt_descr.address;
+	gdt = get_current_direct_gdt();
 	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
 	wrmsrl(MSR_EFER, efer | EFER_SVME);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e691035a32d..c64fa6e0417c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
 /*
  * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -1997,10 +1996,9 @@ static void reload_tss(void)
 	/*
 	 * VT restores TR but not its size.  Useless.
 	 */
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *descs;
 
-	descs = (void *)gdt->address;
+	descs = (void *)get_current_direct_gdt();
 	descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
 	load_TR_desc();
 }
@@ -2061,7 +2059,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 
 static unsigned long segment_base(u16 selector)
 {
-	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
 	struct desc_struct *d;
 	unsigned long table_base;
 	unsigned long v;
@@ -2069,7 +2066,7 @@ static unsigned long segment_base(u16 selector)
 	if (!(selector & ~3))
 		return 0;
 
-	table_base = gdt->address;
+	table_base = (unsigned long)get_current_direct_gdt();
 
 	if (selector & 4) {           /* from ldt */
 		u16 ldt_selector = kvm_read_ldt();
@@ -2185,7 +2182,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #endif
 	if (vmx->host_state.msr_host_bndcfgs)
 		wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-	load_gdt(this_cpu_ptr(&host_gdt));
+	load_fixmap_gdt(raw_smp_processor_id());
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -2287,7 +2284,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	if (!already_loaded) {
-		struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+		unsigned long gdt = (unsigned long)get_current_direct_gdt();
 		unsigned long sysenter_esp;
 
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2297,7 +2294,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * processors.
 		 */
 		vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
-		vmcs_writel(HOST_GDTR_BASE, gdt->address);   /* 22.2.4 */
+		vmcs_writel(HOST_GDTR_BASE, gdt);   /* 22.2.4 */
 
 		rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
@@ -3523,8 +3520,6 @@ static int hardware_enable(void)
 		ept_sync_global();
 	}
 
-	native_store_gdt(this_cpu_ptr(&host_gdt));
-
 	return 0;
 }
 
-- 
2.11.0.483.g087da7b7c-goog


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
  2017-01-26 16:59   ` Thomas Garnier
@ 2017-01-26 18:52     ` Andy Lutomirski
  -1 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-01-26 18:52 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen, Brian

On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
> Each processor holds a GDT in its per-cpu structure. The sgdt
> instruction gives the base address of the current GDT. This address can
> be used to bypass KASLR memory randomization. With another bug, an
> attacker could target other per-cpu structures or deduce the base of
> the main memory section (PAGE_OFFSET).
>
> This patch relocates the GDT table for each processor inside the
> Fixmap section. The space is reserved based on number of supported
> processors.
>
> For consistency, the remapping is done by default on 32 and 64 bit.
>
> Each processor switches to its remapped GDT at the end of
> initialization. For hibernation, the main processor returns with the
> original GDT and switches back to the remapping at completion.
>
> This patch was tested on both architectures. Hibernation and KVM were
> both tested specially for their usage of the GDT.

I like this version much better.  Thanks!

--Andy

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [kernel-hardening] Re: [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
@ 2017-01-26 18:52     ` Andy Lutomirski
  0 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-01-26 18:52 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen,
	Brian Gerst, Luis R . Rodriguez, Adam Buchbinder,
	Stanislaw Gruszka, Arnd Bergmann, Dave Hansen, Chen Yucong,
	Vitaly Kuznetsov, David Vrabel, Josh Poimboeuf, Tim Chen,
	Rik van Riel, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Paolo Bonzini,
	Radim Krčmář,
	X86 ML, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm list, kernel-hardening

On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
> Each processor holds a GDT in its per-cpu structure. The sgdt
> instruction gives the base address of the current GDT. This address can
> be used to bypass KASLR memory randomization. With another bug, an
> attacker could target other per-cpu structures or deduce the base of
> the main memory section (PAGE_OFFSET).
>
> This patch relocates the GDT table for each processor inside the
> Fixmap section. The space is reserved based on number of supported
> processors.
>
> For consistency, the remapping is done by default on 32 and 64 bit.
>
> Each processor switches to its remapped GDT at the end of
> initialization. For hibernation, the main processor returns with the
> original GDT and switches back to the remapping at completion.
>
> This patch was tested on both architectures. Hibernation and KVM were
> both tested specially for their usage of the GDT.

I like this version much better.  Thanks!

--Andy

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
  2017-01-26 16:59   ` Thomas Garnier
                     ` (3 preceding siblings ...)
  (?)
@ 2017-01-26 18:52   ` Andy Lutomirski
  -1 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-01-26 18:52 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: X86 ML, Stanislaw Gruszka, kvm list, Fenghua Yu, Matt Fleming,
	Paul Gortmaker, Radim Krčmář,
	linux-efi, Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening, Jiri Olsa, Boris Ostrovsky, Dave Hansen,
	Andi Kleen, xen-devel, Michael Ellerman, Joerg Roedel,
	Prarit Bhargava, kasan-dev

On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
> Each processor holds a GDT in its per-cpu structure. The sgdt
> instruction gives the base address of the current GDT. This address can
> be used to bypass KASLR memory randomization. With another bug, an
> attacker could target other per-cpu structures or deduce the base of
> the main memory section (PAGE_OFFSET).
>
> This patch relocates the GDT table for each processor inside the
> Fixmap section. The space is reserved based on number of supported
> processors.
>
> For consistency, the remapping is done by default on 32 and 64 bit.
>
> Each processor switches to its remapped GDT at the end of
> initialization. For hibernation, the main processor returns with the
> original GDT and switches back to the remapping at completion.
>
> This patch was tested on both architectures. Hibernation and KVM were
> both tested specially for their usage of the GDT.

I like this version much better.  Thanks!

--Andy

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
  2017-01-26 18:52     ` [kernel-hardening] " Andy Lutomirski
@ 2017-01-26 19:10       ` Thomas Garnier
  -1 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 19:10 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen, Brian

On Thu, Jan 26, 2017 at 10:52 AM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> Each processor holds a GDT in its per-cpu structure. The sgdt
>> instruction gives the base address of the current GDT. This address can
>> be used to bypass KASLR memory randomization. With another bug, an
>> attacker could target other per-cpu structures or deduce the base of
>> the main memory section (PAGE_OFFSET).
>>
>> This patch relocates the GDT table for each processor inside the
>> Fixmap section. The space is reserved based on number of supported
>> processors.
>>
>> For consistency, the remapping is done by default on 32 and 64 bit.
>>
>> Each processor switches to its remapped GDT at the end of
>> initialization. For hibernation, the main processor returns with the
>> original GDT and switches back to the remapping at completion.
>>
>> This patch was tested on both architectures. Hibernation and KVM were
>> both tested specially for their usage of the GDT.
>
> I like this version much better.  Thanks!

Thanks for the feedback! I like this version better too.

>
> --Andy



-- 
Thomas

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [kernel-hardening] Re: [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
@ 2017-01-26 19:10       ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 19:10 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen,
	Brian Gerst, Luis R . Rodriguez, Adam Buchbinder,
	Stanislaw Gruszka, Arnd Bergmann, Dave Hansen, Chen Yucong,
	Vitaly Kuznetsov, David Vrabel, Josh Poimboeuf, Tim Chen,
	Rik van Riel, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Paolo Bonzini,
	Radim Krčmář,
	X86 ML, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm list, kernel-hardening

On Thu, Jan 26, 2017 at 10:52 AM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> Each processor holds a GDT in its per-cpu structure. The sgdt
>> instruction gives the base address of the current GDT. This address can
>> be used to bypass KASLR memory randomization. With another bug, an
>> attacker could target other per-cpu structures or deduce the base of
>> the main memory section (PAGE_OFFSET).
>>
>> This patch relocates the GDT table for each processor inside the
>> Fixmap section. The space is reserved based on number of supported
>> processors.
>>
>> For consistency, the remapping is done by default on 32 and 64 bit.
>>
>> Each processor switches to its remapped GDT at the end of
>> initialization. For hibernation, the main processor returns with the
>> original GDT and switches back to the remapping at completion.
>>
>> This patch was tested on both architectures. Hibernation and KVM were
>> both tested specially for their usage of the GDT.
>
> I like this version much better.  Thanks!

Thanks for the feedback! I like this version better too.

>
> --Andy



-- 
Thomas

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section
  2017-01-26 18:52     ` [kernel-hardening] " Andy Lutomirski
  (?)
  (?)
@ 2017-01-26 19:10     ` Thomas Garnier
  -1 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-01-26 19:10 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, Stanislaw Gruszka, kvm list, Fenghua Yu, Matt Fleming,
	Paul Gortmaker, Radim Krčmář,
	linux-efi, Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening, Jiri Olsa, Boris Ostrovsky, Dave Hansen,
	Andi Kleen, xen-devel, Michael Ellerman, Joerg Roedel,
	Prarit Bhargava, kasan-dev

On Thu, Jan 26, 2017 at 10:52 AM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> Each processor holds a GDT in its per-cpu structure. The sgdt
>> instruction gives the base address of the current GDT. This address can
>> be used to bypass KASLR memory randomization. With another bug, an
>> attacker could target other per-cpu structures or deduce the base of
>> the main memory section (PAGE_OFFSET).
>>
>> This patch relocates the GDT table for each processor inside the
>> Fixmap section. The space is reserved based on number of supported
>> processors.
>>
>> For consistency, the remapping is done by default on 32 and 64 bit.
>>
>> Each processor switches to its remapped GDT at the end of
>> initialization. For hibernation, the main processor returns with the
>> original GDT and switches back to the remapping at completion.
>>
>> This patch was tested on both architectures. Hibernation and KVM were
>> both tested specially for their usage of the GDT.
>
> I like this version much better.  Thanks!

Thanks for the feedback! I like this version better too.

>
> --Andy



-- 
Thomas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-01-26 16:59   ` Thomas Garnier
  (?)
@ 2017-02-01  9:15     ` Ingo Molnar
  -1 siblings, 0 replies; 38+ messages in thread
From: Ingo Molnar @ 2017-02-01  9:15 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Andy Lutomirski, Rafael J . Wysocki, Len Brown, Pavel Machek,
	Jiri Kosina, Matt Fleming, Ard Biesheuvel, Boris Ostrovsky,
	Juergen Gross, Rusty Russell, Christian Borntraeger, Fenghua Yu


* Thomas Garnier <thgarnie@google.com> wrote:

> This patch makes the GDT remapped pages read-only to prevent corruption.
> This change is done only on 64 bit.

Please spell '64-bit' consistently through the series. I've seen two variants:

  64 bit
  64bit

> +/*
> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
> + * original writeable version when needed.

s/In 64bit,
 /On 64-bit kernels,

> + */
> +#ifdef CONFIG_X86_64
> +static inline void native_load_tr_desc(void)
> +{
> +	struct desc_ptr gdt;
> +	int cpu = raw_smp_processor_id();
> +	bool restore = false;
> +	struct desc_struct *fixmap_gdt;
> +
> +	native_store_gdt(&gdt);
> +	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
> +
> +	/*
> +	 * If the current GDT is the read-only fixmap, swap to the original
> +	 * writeable version. Swap back at the end.
> +	 */
> +	if (gdt.address == (unsigned long)fixmap_gdt) {
> +		load_direct_gdt(cpu);
> +		restore = true;
> +	}
> +	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
> +	if (restore)
> +		load_fixmap_gdt(cpu);

Please use bool plus 0/1, it's more readable (to me) than the true/false notation.

>  extern void switch_to_new_gdt(int);
> +extern void load_direct_gdt(int);
>  extern void load_fixmap_gdt(int);

> +/* Load the original GDT from the per-cpu structure */
> +void load_direct_gdt(int cpu)
> +{
> +	struct desc_ptr gdt_descr;
> +
> +	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);

Please name the functions in an easier to understand way, such as:

	get_cpu_gdt_rw()
	get_cpu_gdt_ro()

that the GDT is in the direct mappings is less important than the fact the the 
address is writable ...

> +}
> +EXPORT_SYMBOL(load_direct_gdt);

EXPORT_SYMBOL_GPL(), or no export at all.

> +EXPORT_SYMBOL(load_fixmap_gdt);

ditto.

>  	 * VT restores TR but not its size.  Useless.
>  	 */
> -	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
>  	struct desc_struct *descs;
>  
> -	descs = (void *)gdt->address;
> +	descs = (void *)get_current_direct_gdt();

Couldn't the type cast be dropped?

>  
> -	table_base = gdt->address;
> +	table_base = (unsigned long)get_current_direct_gdt();

Instead of spreading these type casts far and wide please introduce another 
accessor the returns 'unsigned long':

	get_cpu_gdt_rw_vaddr()

or such.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-02-01  9:15     ` Ingo Molnar
  0 siblings, 0 replies; 38+ messages in thread
From: Ingo Molnar @ 2017-02-01  9:15 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Andy Lutomirski, Rafael J . Wysocki, Len Brown, Pavel Machek,
	Jiri Kosina, Matt Fleming, Ard Biesheuvel, Boris Ostrovsky,
	Juergen Gross, Rusty Russell, Christian Borntraeger, Fenghua Yu,
	H


* Thomas Garnier <thgarnie@google.com> wrote:

> This patch makes the GDT remapped pages read-only to prevent corruption.
> This change is done only on 64 bit.

Please spell '64-bit' consistently through the series. I've seen two variants:

  64 bit
  64bit

> +/*
> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
> + * original writeable version when needed.

s/In 64bit,
 /On 64-bit kernels,

> + */
> +#ifdef CONFIG_X86_64
> +static inline void native_load_tr_desc(void)
> +{
> +	struct desc_ptr gdt;
> +	int cpu = raw_smp_processor_id();
> +	bool restore = false;
> +	struct desc_struct *fixmap_gdt;
> +
> +	native_store_gdt(&gdt);
> +	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
> +
> +	/*
> +	 * If the current GDT is the read-only fixmap, swap to the original
> +	 * writeable version. Swap back at the end.
> +	 */
> +	if (gdt.address == (unsigned long)fixmap_gdt) {
> +		load_direct_gdt(cpu);
> +		restore = true;
> +	}
> +	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
> +	if (restore)
> +		load_fixmap_gdt(cpu);

Please use bool plus 0/1, it's more readable (to me) than the true/false notation.

>  extern void switch_to_new_gdt(int);
> +extern void load_direct_gdt(int);
>  extern void load_fixmap_gdt(int);

> +/* Load the original GDT from the per-cpu structure */
> +void load_direct_gdt(int cpu)
> +{
> +	struct desc_ptr gdt_descr;
> +
> +	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);

Please name the functions in an easier to understand way, such as:

	get_cpu_gdt_rw()
	get_cpu_gdt_ro()

that the GDT is in the direct mappings is less important than the fact the the 
address is writable ...

> +}
> +EXPORT_SYMBOL(load_direct_gdt);

EXPORT_SYMBOL_GPL(), or no export at all.

> +EXPORT_SYMBOL(load_fixmap_gdt);

ditto.

>  	 * VT restores TR but not its size.  Useless.
>  	 */
> -	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
>  	struct desc_struct *descs;
>  
> -	descs = (void *)gdt->address;
> +	descs = (void *)get_current_direct_gdt();

Couldn't the type cast be dropped?

>  
> -	table_base = gdt->address;
> +	table_base = (unsigned long)get_current_direct_gdt();

Instead of spreading these type casts far and wide please introduce another 
accessor the returns 'unsigned long':

	get_cpu_gdt_rw_vaddr()

or such.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [kernel-hardening] Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-02-01  9:15     ` Ingo Molnar
  0 siblings, 0 replies; 38+ messages in thread
From: Ingo Molnar @ 2017-02-01  9:15 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Andy Lutomirski, Rafael J . Wysocki, Len Brown, Pavel Machek,
	Jiri Kosina, Matt Fleming, Ard Biesheuvel, Boris Ostrovsky,
	Juergen Gross, Rusty Russell, Christian Borntraeger, Fenghua Yu,
	He Chen, Brian Gerst, Luis R . Rodriguez, Adam Buchbinder,
	Stanislaw Gruszka, Arnd Bergmann, Dave Hansen, Chen Yucong,
	Vitaly Kuznetsov, David Vrabel, Josh Poimboeuf, Tim Chen,
	Rik van Riel, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Paolo Bonzini,
	Radim Krčmář,
	x86, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm, kernel-hardening


* Thomas Garnier <thgarnie@google.com> wrote:

> This patch makes the GDT remapped pages read-only to prevent corruption.
> This change is done only on 64 bit.

Please spell '64-bit' consistently through the series. I've seen two variants:

  64 bit
  64bit

> +/*
> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
> + * original writeable version when needed.

s/In 64bit,
 /On 64-bit kernels,

> + */
> +#ifdef CONFIG_X86_64
> +static inline void native_load_tr_desc(void)
> +{
> +	struct desc_ptr gdt;
> +	int cpu = raw_smp_processor_id();
> +	bool restore = false;
> +	struct desc_struct *fixmap_gdt;
> +
> +	native_store_gdt(&gdt);
> +	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
> +
> +	/*
> +	 * If the current GDT is the read-only fixmap, swap to the original
> +	 * writeable version. Swap back at the end.
> +	 */
> +	if (gdt.address == (unsigned long)fixmap_gdt) {
> +		load_direct_gdt(cpu);
> +		restore = true;
> +	}
> +	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
> +	if (restore)
> +		load_fixmap_gdt(cpu);

Please use bool plus 0/1, it's more readable (to me) than the true/false notation.

>  extern void switch_to_new_gdt(int);
> +extern void load_direct_gdt(int);
>  extern void load_fixmap_gdt(int);

> +/* Load the original GDT from the per-cpu structure */
> +void load_direct_gdt(int cpu)
> +{
> +	struct desc_ptr gdt_descr;
> +
> +	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);

Please name the functions in an easier to understand way, such as:

	get_cpu_gdt_rw()
	get_cpu_gdt_ro()

that the GDT is in the direct mappings is less important than the fact the the 
address is writable ...

> +}
> +EXPORT_SYMBOL(load_direct_gdt);

EXPORT_SYMBOL_GPL(), or no export at all.

> +EXPORT_SYMBOL(load_fixmap_gdt);

ditto.

>  	 * VT restores TR but not its size.  Useless.
>  	 */
> -	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
>  	struct desc_struct *descs;
>  
> -	descs = (void *)gdt->address;
> +	descs = (void *)get_current_direct_gdt();

Couldn't the type cast be dropped?

>  
> -	table_base = gdt->address;
> +	table_base = (unsigned long)get_current_direct_gdt();

Instead of spreading these type casts far and wide please introduce another 
accessor the returns 'unsigned long':

	get_cpu_gdt_rw_vaddr()

or such.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-01-26 16:59   ` Thomas Garnier
                     ` (3 preceding siblings ...)
  (?)
@ 2017-02-01  9:15   ` Ingo Molnar
  -1 siblings, 0 replies; 38+ messages in thread
From: Ingo Molnar @ 2017-02-01  9:15 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: x86, Stanislaw Gruszka, kvm, Fenghua Yu, Matt Fleming,
	Paul Gortmaker, Radim Krčmář,
	linux-efi, Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening, Jiri Olsa, Boris Ostrovsky, Dave Hansen,
	Andi Kleen, xen-devel, Michael Ellerman, Joerg Roedel,
	Prarit Bhargava, kasan-dev, Christian Borntraeger, Ingo Molnar,
	Andrey Ryabinin, Borislav Petkov


* Thomas Garnier <thgarnie@google.com> wrote:

> This patch makes the GDT remapped pages read-only to prevent corruption.
> This change is done only on 64 bit.

Please spell '64-bit' consistently through the series. I've seen two variants:

  64 bit
  64bit

> +/*
> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
> + * original writeable version when needed.

s/In 64bit,
 /On 64-bit kernels,

> + */
> +#ifdef CONFIG_X86_64
> +static inline void native_load_tr_desc(void)
> +{
> +	struct desc_ptr gdt;
> +	int cpu = raw_smp_processor_id();
> +	bool restore = false;
> +	struct desc_struct *fixmap_gdt;
> +
> +	native_store_gdt(&gdt);
> +	fixmap_gdt = get_cpu_fixmap_gdt(cpu);
> +
> +	/*
> +	 * If the current GDT is the read-only fixmap, swap to the original
> +	 * writeable version. Swap back at the end.
> +	 */
> +	if (gdt.address == (unsigned long)fixmap_gdt) {
> +		load_direct_gdt(cpu);
> +		restore = true;
> +	}
> +	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
> +	if (restore)
> +		load_fixmap_gdt(cpu);

Please use bool plus 0/1, it's more readable (to me) than the true/false notation.

>  extern void switch_to_new_gdt(int);
> +extern void load_direct_gdt(int);
>  extern void load_fixmap_gdt(int);

> +/* Load the original GDT from the per-cpu structure */
> +void load_direct_gdt(int cpu)
> +{
> +	struct desc_ptr gdt_descr;
> +
> +	gdt_descr.address = (long)get_cpu_direct_gdt(cpu);

Please name the functions in an easier to understand way, such as:

	get_cpu_gdt_rw()
	get_cpu_gdt_ro()

that the GDT is in the direct mappings is less important than the fact the the 
address is writable ...

> +}
> +EXPORT_SYMBOL(load_direct_gdt);

EXPORT_SYMBOL_GPL(), or no export at all.

> +EXPORT_SYMBOL(load_fixmap_gdt);

ditto.

>  	 * VT restores TR but not its size.  Useless.
>  	 */
> -	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
>  	struct desc_struct *descs;
>  
> -	descs = (void *)gdt->address;
> +	descs = (void *)get_current_direct_gdt();

Couldn't the type cast be dropped?

>  
> -	table_base = gdt->address;
> +	table_base = (unsigned long)get_current_direct_gdt();

Instead of spreading these type casts far and wide please introduce another 
accessor the returns 'unsigned long':

	get_cpu_gdt_rw_vaddr()

or such.

Thanks,

	Ingo

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-02-01  9:15     ` Ingo Molnar
  (?)
@ 2017-02-02  5:13       ` Andy Lutomirski
  -1 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-02-02  5:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Garnier, Thomas Gleixner, Ingo Molnar, H . Peter Anvin,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Rafael J . Wysocki, Len Brown, Pavel Machek,
	Jiri Kosina, Matt Fleming, Ard Biesheuvel, Boris Ostrovsky,
	Juergen Gross, Rusty Russell, Christian Borntraeger, Fenghua Yu

On Wed, Feb 1, 2017 at 1:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Thomas Garnier <thgarnie@google.com> wrote:
>
>> This patch makes the GDT remapped pages read-only to prevent corruption.
>> This change is done only on 64 bit.
>


>>
>> -     table_base = gdt->address;
>> +     table_base = (unsigned long)get_current_direct_gdt();
>
> Instead of spreading these type casts far and wide please introduce another
> accessor the returns 'unsigned long':
>
>         get_cpu_gdt_rw_vaddr()
>

That whole function is an abomination.  How about replacing 'unsigned
long table_base' with 'struct desc_struct *table'?  If you're feeling
really adventurous, *delete* that function and replace all of its
users with something sane.

--Andy

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-02-02  5:13       ` Andy Lutomirski
  0 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-02-02  5:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Garnier, Thomas Gleixner, Ingo Molnar, H . Peter Anvin,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Rafael J . Wysocki, Len Brown, Pavel Machek,
	Jiri Kosina, Matt Fleming, Ard Biesheuvel, Boris Ostrovsky,
	Juergen Gross, Rusty Russell, Christian Borntraeger, Fenghua Yu

On Wed, Feb 1, 2017 at 1:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Thomas Garnier <thgarnie@google.com> wrote:
>
>> This patch makes the GDT remapped pages read-only to prevent corruption.
>> This change is done only on 64 bit.
>


>>
>> -     table_base = gdt->address;
>> +     table_base = (unsigned long)get_current_direct_gdt();
>
> Instead of spreading these type casts far and wide please introduce another
> accessor the returns 'unsigned long':
>
>         get_cpu_gdt_rw_vaddr()
>

That whole function is an abomination.  How about replacing 'unsigned
long table_base' with 'struct desc_struct *table'?  If you're feeling
really adventurous, *delete* that function and replace all of its
users with something sane.

--Andy

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [kernel-hardening] Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-02-02  5:13       ` Andy Lutomirski
  0 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-02-02  5:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Garnier, Thomas Gleixner, Ingo Molnar, H . Peter Anvin,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Kees Cook,
	Andy Lutomirski, Arjan van de Ven, Paul Gortmaker,
	Borislav Petkov, Rafael J . Wysocki, Len Brown, Pavel Machek,
	Jiri Kosina, Matt Fleming, Ard Biesheuvel, Boris Ostrovsky,
	Juergen Gross, Rusty Russell, Christian Borntraeger, Fenghua Yu,
	He Chen, Brian Gerst, Luis R . Rodriguez, Adam Buchbinder,
	Stanislaw Gruszka, Arnd Bergmann, Dave Hansen, Chen Yucong,
	Vitaly Kuznetsov, David Vrabel, Josh Poimboeuf, Tim Chen,
	Rik van Riel, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Paolo Bonzini,
	Radim Krčmář,
	X86 ML, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm list, kernel-hardening

On Wed, Feb 1, 2017 at 1:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Thomas Garnier <thgarnie@google.com> wrote:
>
>> This patch makes the GDT remapped pages read-only to prevent corruption.
>> This change is done only on 64 bit.
>


>>
>> -     table_base = gdt->address;
>> +     table_base = (unsigned long)get_current_direct_gdt();
>
> Instead of spreading these type casts far and wide please introduce another
> accessor the returns 'unsigned long':
>
>         get_cpu_gdt_rw_vaddr()
>

That whole function is an abomination.  How about replacing 'unsigned
long table_base' with 'struct desc_struct *table'?  If you're feeling
really adventurous, *delete* that function and replace all of its
users with something sane.

--Andy

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-02-01  9:15     ` Ingo Molnar
  (?)
  (?)
@ 2017-02-02  5:13     ` Andy Lutomirski
  -1 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-02-02  5:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: X86 ML, Stanislaw Gruszka, kvm list, Fenghua Yu, Matt Fleming,
	Paul Gortmaker, Radim Krčmář,
	linux-efi, Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening, Jiri Olsa, Thomas Gleixner, Dave Hansen,
	Andi Kleen, xen-devel, Michael Ellerman, Joerg Roedel,
	Prarit Bhargava, kasan-dev

On Wed, Feb 1, 2017 at 1:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Thomas Garnier <thgarnie@google.com> wrote:
>
>> This patch makes the GDT remapped pages read-only to prevent corruption.
>> This change is done only on 64 bit.
>


>>
>> -     table_base = gdt->address;
>> +     table_base = (unsigned long)get_current_direct_gdt();
>
> Instead of spreading these type casts far and wide please introduce another
> accessor the returns 'unsigned long':
>
>         get_cpu_gdt_rw_vaddr()
>

That whole function is an abomination.  How about replacing 'unsigned
long table_base' with 'struct desc_struct *table'?  If you're feeling
really adventurous, *delete* that function and replace all of its
users with something sane.

--Andy

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-01-26 16:59   ` Thomas Garnier
@ 2017-02-02  5:14     ` Andy Lutomirski
  -1 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-02-02  5:14 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen, Brian

On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
> This patch makes the GDT remapped pages read-only to prevent corruption.
> This change is done only on 64 bit.
>
> The native_load_tr_desc function was adapted to correctly handle a
> read-only GDT. The LTR instruction always writes to the GDT TSS entry.
> This generates a page fault if the GDT is read-only. This change checks
> if the current GDT is a remap and swap GDTs as needed. This function was
> tested by booting multiple machines and checking hibernation works
> properly.
>
> KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
> per-cpu variable was removed for functions to fetch the original GDT.
> Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
> expected. For testing, VMs were started and restored on multiple
> configurations.
>
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
> Based on next-20170125
> ---
>  arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
>  arch/x86/include/asm/processor.h |  1 +
>  arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
>  arch/x86/kvm/svm.c               |  4 +---
>  arch/x86/kvm/vmx.c               | 15 +++++--------
>  5 files changed, 70 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> index 4cc176f57b78..ca7b2224fcb4 100644
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
>         return per_cpu(gdt_page, cpu).gdt;
>  }
>
> +/* Provide the current original GDT */
> +static inline struct desc_struct *get_current_direct_gdt(void)
> +{
> +       return this_cpu_ptr(&gdt_page)->gdt;
> +}

I'm assuming that the reason that this isn't part of patch 2 and used
instead of the version that takes cpu as a parameter is that TLS
doesn't work until the GDT is set up.  If so, perhaps that's worthy of
a comment in patch 2.

But give this_cpu_read(gdt_page.gdt) a try, please.

> +/*
> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
> + * original writeable version when needed.
> + */
> +#ifdef CONFIG_X86_64
> +static inline void native_load_tr_desc(void)
> +{
> +       struct desc_ptr gdt;
> +       int cpu = raw_smp_processor_id();
> +       bool restore = false;
> +       struct desc_struct *fixmap_gdt;
> +
> +       native_store_gdt(&gdt);

Off the top of my head, this is something like 10 cycles.  IMO that's
fast enough not to worry about the regression this will cause to KVM
exits.  In any event, we'll get that back and *much* more when we do
the optimizations that this series enables.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [kernel-hardening] Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-02-02  5:14     ` Andy Lutomirski
  0 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-02-02  5:14 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Andy Lutomirski,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen,
	Brian Gerst, Luis R . Rodriguez, Adam Buchbinder,
	Stanislaw Gruszka, Arnd Bergmann, Dave Hansen, Chen Yucong,
	Vitaly Kuznetsov, David Vrabel, Josh Poimboeuf, Tim Chen,
	Rik van Riel, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Paolo Bonzini,
	Radim Krčmář,
	X86 ML, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm list, kernel-hardening

On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
> This patch makes the GDT remapped pages read-only to prevent corruption.
> This change is done only on 64 bit.
>
> The native_load_tr_desc function was adapted to correctly handle a
> read-only GDT. The LTR instruction always writes to the GDT TSS entry.
> This generates a page fault if the GDT is read-only. This change checks
> if the current GDT is a remap and swap GDTs as needed. This function was
> tested by booting multiple machines and checking hibernation works
> properly.
>
> KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
> per-cpu variable was removed for functions to fetch the original GDT.
> Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
> expected. For testing, VMs were started and restored on multiple
> configurations.
>
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
> Based on next-20170125
> ---
>  arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
>  arch/x86/include/asm/processor.h |  1 +
>  arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
>  arch/x86/kvm/svm.c               |  4 +---
>  arch/x86/kvm/vmx.c               | 15 +++++--------
>  5 files changed, 70 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> index 4cc176f57b78..ca7b2224fcb4 100644
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
>         return per_cpu(gdt_page, cpu).gdt;
>  }
>
> +/* Provide the current original GDT */
> +static inline struct desc_struct *get_current_direct_gdt(void)
> +{
> +       return this_cpu_ptr(&gdt_page)->gdt;
> +}

I'm assuming that the reason that this isn't part of patch 2 and used
instead of the version that takes cpu as a parameter is that TLS
doesn't work until the GDT is set up.  If so, perhaps that's worthy of
a comment in patch 2.

But give this_cpu_read(gdt_page.gdt) a try, please.

> +/*
> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
> + * original writeable version when needed.
> + */
> +#ifdef CONFIG_X86_64
> +static inline void native_load_tr_desc(void)
> +{
> +       struct desc_ptr gdt;
> +       int cpu = raw_smp_processor_id();
> +       bool restore = false;
> +       struct desc_struct *fixmap_gdt;
> +
> +       native_store_gdt(&gdt);

Off the top of my head, this is something like 10 cycles.  IMO that's
fast enough not to worry about the regression this will cause to KVM
exits.  In any event, we'll get that back and *much* more when we do
the optimizations that this series enables.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-01-26 16:59   ` Thomas Garnier
                     ` (4 preceding siblings ...)
  (?)
@ 2017-02-02  5:14   ` Andy Lutomirski
  -1 siblings, 0 replies; 38+ messages in thread
From: Andy Lutomirski @ 2017-02-02  5:14 UTC (permalink / raw)
  To: Thomas Garnier
  Cc: X86 ML, Stanislaw Gruszka, kvm list, Fenghua Yu, Matt Fleming,
	Paul Gortmaker, Radim Krčmář,
	linux-efi, Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening, Jiri Olsa, Boris Ostrovsky, Dave Hansen,
	Andi Kleen, xen-devel, Michael Ellerman, Joerg Roedel,
	Prarit Bhargava, kasan-dev

On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
> This patch makes the GDT remapped pages read-only to prevent corruption.
> This change is done only on 64 bit.
>
> The native_load_tr_desc function was adapted to correctly handle a
> read-only GDT. The LTR instruction always writes to the GDT TSS entry.
> This generates a page fault if the GDT is read-only. This change checks
> if the current GDT is a remap and swap GDTs as needed. This function was
> tested by booting multiple machines and checking hibernation works
> properly.
>
> KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
> per-cpu variable was removed for functions to fetch the original GDT.
> Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
> expected. For testing, VMs were started and restored on multiple
> configurations.
>
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
> Based on next-20170125
> ---
>  arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
>  arch/x86/include/asm/processor.h |  1 +
>  arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
>  arch/x86/kvm/svm.c               |  4 +---
>  arch/x86/kvm/vmx.c               | 15 +++++--------
>  5 files changed, 70 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> index 4cc176f57b78..ca7b2224fcb4 100644
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
>         return per_cpu(gdt_page, cpu).gdt;
>  }
>
> +/* Provide the current original GDT */
> +static inline struct desc_struct *get_current_direct_gdt(void)
> +{
> +       return this_cpu_ptr(&gdt_page)->gdt;
> +}

I'm assuming that the reason that this isn't part of patch 2 and used
instead of the version that takes cpu as a parameter is that TLS
doesn't work until the GDT is set up.  If so, perhaps that's worthy of
a comment in patch 2.

But give this_cpu_read(gdt_page.gdt) a try, please.

> +/*
> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
> + * original writeable version when needed.
> + */
> +#ifdef CONFIG_X86_64
> +static inline void native_load_tr_desc(void)
> +{
> +       struct desc_ptr gdt;
> +       int cpu = raw_smp_processor_id();
> +       bool restore = false;
> +       struct desc_struct *fixmap_gdt;
> +
> +       native_store_gdt(&gdt);

Off the top of my head, this is something like 10 cycles.  IMO that's
fast enough not to worry about the regression this will cause to KVM
exits.  In any event, we'll get that back and *much* more when we do
the optimizations that this series enables.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-02-02  5:13       ` Andy Lutomirski
  (?)
@ 2017-02-02  7:12         ` Ingo Molnar
  -1 siblings, 0 replies; 38+ messages in thread
From: Ingo Molnar @ 2017-02-02  7:12 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Garnier, Thomas Gleixner, Ingo Molnar, H . Peter Anvin,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Kees Cook,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen


* Andy Lutomirski <luto@kernel.org> wrote:

> On Wed, Feb 1, 2017 at 1:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > * Thomas Garnier <thgarnie@google.com> wrote:
> >
> >> This patch makes the GDT remapped pages read-only to prevent corruption.
> >> This change is done only on 64 bit.
> >
> 
> 
> >>
> >> -     table_base = gdt->address;
> >> +     table_base = (unsigned long)get_current_direct_gdt();
> >
> > Instead of spreading these type casts far and wide please introduce another
> > accessor the returns 'unsigned long':
> >
> >         get_cpu_gdt_rw_vaddr()
> >
> 
> That whole function is an abomination.  How about replacing 'unsigned
> long table_base' with 'struct desc_struct *table'?  If you're feeling
> really adventurous, *delete* that function and replace all of its
> users with something sane.

Yeah, even better!

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-02-02  7:12         ` Ingo Molnar
  0 siblings, 0 replies; 38+ messages in thread
From: Ingo Molnar @ 2017-02-02  7:12 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Garnier, Thomas Gleixner, Ingo Molnar, H . Peter Anvin,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Kees Cook,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen, Br


* Andy Lutomirski <luto@kernel.org> wrote:

> On Wed, Feb 1, 2017 at 1:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > * Thomas Garnier <thgarnie@google.com> wrote:
> >
> >> This patch makes the GDT remapped pages read-only to prevent corruption.
> >> This change is done only on 64 bit.
> >
> 
> 
> >>
> >> -     table_base = gdt->address;
> >> +     table_base = (unsigned long)get_current_direct_gdt();
> >
> > Instead of spreading these type casts far and wide please introduce another
> > accessor the returns 'unsigned long':
> >
> >         get_cpu_gdt_rw_vaddr()
> >
> 
> That whole function is an abomination.  How about replacing 'unsigned
> long table_base' with 'struct desc_struct *table'?  If you're feeling
> really adventurous, *delete* that function and replace all of its
> users with something sane.

Yeah, even better!

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [kernel-hardening] Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-02-02  7:12         ` Ingo Molnar
  0 siblings, 0 replies; 38+ messages in thread
From: Ingo Molnar @ 2017-02-02  7:12 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Garnier, Thomas Gleixner, Ingo Molnar, H . Peter Anvin,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Kees Cook,
	Arjan van de Ven, Paul Gortmaker, Borislav Petkov,
	Rafael J . Wysocki, Len Brown, Pavel Machek, Jiri Kosina,
	Matt Fleming, Ard Biesheuvel, Boris Ostrovsky, Juergen Gross,
	Rusty Russell, Christian Borntraeger, Fenghua Yu, He Chen,
	Brian Gerst, Luis R . Rodriguez, Adam Buchbinder,
	Stanislaw Gruszka, Arnd Bergmann, Dave Hansen, Chen Yucong,
	Vitaly Kuznetsov, David Vrabel, Josh Poimboeuf, Tim Chen,
	Rik van Riel, Andi Kleen, Jiri Olsa, Prarit Bhargava,
	Michael Ellerman, Joerg Roedel, Paolo Bonzini,
	Radim Krčmář,
	X86 ML, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm list, kernel-hardening


* Andy Lutomirski <luto@kernel.org> wrote:

> On Wed, Feb 1, 2017 at 1:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > * Thomas Garnier <thgarnie@google.com> wrote:
> >
> >> This patch makes the GDT remapped pages read-only to prevent corruption.
> >> This change is done only on 64 bit.
> >
> 
> 
> >>
> >> -     table_base = gdt->address;
> >> +     table_base = (unsigned long)get_current_direct_gdt();
> >
> > Instead of spreading these type casts far and wide please introduce another
> > accessor the returns 'unsigned long':
> >
> >         get_cpu_gdt_rw_vaddr()
> >
> 
> That whole function is an abomination.  How about replacing 'unsigned
> long table_base' with 'struct desc_struct *table'?  If you're feeling
> really adventurous, *delete* that function and replace all of its
> users with something sane.

Yeah, even better!

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-02-02  5:13       ` Andy Lutomirski
                         ` (2 preceding siblings ...)
  (?)
@ 2017-02-02  7:12       ` Ingo Molnar
  -1 siblings, 0 replies; 38+ messages in thread
From: Ingo Molnar @ 2017-02-02  7:12 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, Stanislaw Gruszka, kvm list, Fenghua Yu, Matt Fleming,
	Paul Gortmaker, Radim Krčmář,
	linux-efi, Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening, Jiri Olsa, Thomas Gleixner, Dave Hansen,
	Andi Kleen, xen-devel, Michael Ellerman, Joerg Roedel,
	Prarit Bhargava, kasan-dev


* Andy Lutomirski <luto@kernel.org> wrote:

> On Wed, Feb 1, 2017 at 1:15 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > * Thomas Garnier <thgarnie@google.com> wrote:
> >
> >> This patch makes the GDT remapped pages read-only to prevent corruption.
> >> This change is done only on 64 bit.
> >
> 
> 
> >>
> >> -     table_base = gdt->address;
> >> +     table_base = (unsigned long)get_current_direct_gdt();
> >
> > Instead of spreading these type casts far and wide please introduce another
> > accessor the returns 'unsigned long':
> >
> >         get_cpu_gdt_rw_vaddr()
> >
> 
> That whole function is an abomination.  How about replacing 'unsigned
> long table_base' with 'struct desc_struct *table'?  If you're feeling
> really adventurous, *delete* that function and replace all of its
> users with something sane.

Yeah, even better!

Thanks,

	Ingo

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-02-02  5:14     ` [kernel-hardening] " Andy Lutomirski
@ 2017-02-06 22:10       ` Thomas Garnier
  -1 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-02-06 22:10 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Arjan van de Ven,
	Paul Gortmaker, Borislav Petkov, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fenghua Yu, He Chen, Brian Gerst, Luis

On Wed, Feb 1, 2017 at 9:14 PM, Andy Lutomirski <luto@kernel.org> wrote:
> On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> This patch makes the GDT remapped pages read-only to prevent corruption.
>> This change is done only on 64 bit.
>>
>> The native_load_tr_desc function was adapted to correctly handle a
>> read-only GDT. The LTR instruction always writes to the GDT TSS entry.
>> This generates a page fault if the GDT is read-only. This change checks
>> if the current GDT is a remap and swap GDTs as needed. This function was
>> tested by booting multiple machines and checking hibernation works
>> properly.
>>
>> KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
>> per-cpu variable was removed for functions to fetch the original GDT.
>> Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
>> expected. For testing, VMs were started and restored on multiple
>> configurations.
>>
>> Signed-off-by: Thomas Garnier <thgarnie@google.com>
>> ---
>> Based on next-20170125
>> ---
>>  arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
>>  arch/x86/include/asm/processor.h |  1 +
>>  arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
>>  arch/x86/kvm/svm.c               |  4 +---
>>  arch/x86/kvm/vmx.c               | 15 +++++--------
>>  5 files changed, 70 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
>> index 4cc176f57b78..ca7b2224fcb4 100644
>> --- a/arch/x86/include/asm/desc.h
>> +++ b/arch/x86/include/asm/desc.h
>> @@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
>>         return per_cpu(gdt_page, cpu).gdt;
>>  }
>>
>> +/* Provide the current original GDT */
>> +static inline struct desc_struct *get_current_direct_gdt(void)
>> +{
>> +       return this_cpu_ptr(&gdt_page)->gdt;
>> +}
>
> I'm assuming that the reason that this isn't part of patch 2 and used
> instead of the version that takes cpu as a parameter is that TLS
> doesn't work until the GDT is set up.  If so, perhaps that's worthy of
> a comment in patch 2.
>
> But give this_cpu_read(gdt_page.gdt) a try, please.
>

I tried but I can't get it working properly because the gdt field is
an array, not a pointer. For example with this_cpu_read(gdt_page.gdt),
I get:

./arch/x86/include/asm/desc.h: In function ‘get_current_gdt_rw’:
./include/linux/percpu-defs.h:308:21: error: incompatible types when
assigning to type ‘struct desc_struct[16]’ from type ‘struct
desc_struct *’
  case 1: pscr_ret__ = stem##1(variable); break;   \
                     ^
I tried different variants without success. What do you think?

>> +/*
>> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
>> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
>> + * original writeable version when needed.
>> + */
>> +#ifdef CONFIG_X86_64
>> +static inline void native_load_tr_desc(void)
>> +{
>> +       struct desc_ptr gdt;
>> +       int cpu = raw_smp_processor_id();
>> +       bool restore = false;
>> +       struct desc_struct *fixmap_gdt;
>> +
>> +       native_store_gdt(&gdt);
>
> Off the top of my head, this is something like 10 cycles.  IMO that's
> fast enough not to worry about the regression this will cause to KVM
> exits.  In any event, we'll get that back and *much* more when we do
> the optimizations that this series enables.



-- 
Thomas

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [kernel-hardening] Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
@ 2017-02-06 22:10       ` Thomas Garnier
  0 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-02-06 22:10 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Kees Cook, Arjan van de Ven,
	Paul Gortmaker, Borislav Petkov, Rafael J . Wysocki, Len Brown,
	Pavel Machek, Jiri Kosina, Matt Fleming, Ard Biesheuvel,
	Boris Ostrovsky, Juergen Gross, Rusty Russell,
	Christian Borntraeger, Fenghua Yu, He Chen, Brian Gerst,
	Luis R . Rodriguez, Adam Buchbinder, Stanislaw Gruszka,
	Arnd Bergmann, Dave Hansen, Chen Yucong, Vitaly Kuznetsov,
	David Vrabel, Josh Poimboeuf, Tim Chen, Rik van Riel, Andi Kleen,
	Jiri Olsa, Prarit Bhargava, Michael Ellerman, Joerg Roedel,
	Paolo Bonzini, Radim Krčmář,
	X86 ML, linux-kernel, kasan-dev, linux-pm, linux-efi, xen-devel,
	lguest, kvm list, kernel-hardening

On Wed, Feb 1, 2017 at 9:14 PM, Andy Lutomirski <luto@kernel.org> wrote:
> On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> This patch makes the GDT remapped pages read-only to prevent corruption.
>> This change is done only on 64 bit.
>>
>> The native_load_tr_desc function was adapted to correctly handle a
>> read-only GDT. The LTR instruction always writes to the GDT TSS entry.
>> This generates a page fault if the GDT is read-only. This change checks
>> if the current GDT is a remap and swap GDTs as needed. This function was
>> tested by booting multiple machines and checking hibernation works
>> properly.
>>
>> KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
>> per-cpu variable was removed for functions to fetch the original GDT.
>> Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
>> expected. For testing, VMs were started and restored on multiple
>> configurations.
>>
>> Signed-off-by: Thomas Garnier <thgarnie@google.com>
>> ---
>> Based on next-20170125
>> ---
>>  arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
>>  arch/x86/include/asm/processor.h |  1 +
>>  arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
>>  arch/x86/kvm/svm.c               |  4 +---
>>  arch/x86/kvm/vmx.c               | 15 +++++--------
>>  5 files changed, 70 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
>> index 4cc176f57b78..ca7b2224fcb4 100644
>> --- a/arch/x86/include/asm/desc.h
>> +++ b/arch/x86/include/asm/desc.h
>> @@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
>>         return per_cpu(gdt_page, cpu).gdt;
>>  }
>>
>> +/* Provide the current original GDT */
>> +static inline struct desc_struct *get_current_direct_gdt(void)
>> +{
>> +       return this_cpu_ptr(&gdt_page)->gdt;
>> +}
>
> I'm assuming that the reason that this isn't part of patch 2 and used
> instead of the version that takes cpu as a parameter is that TLS
> doesn't work until the GDT is set up.  If so, perhaps that's worthy of
> a comment in patch 2.
>
> But give this_cpu_read(gdt_page.gdt) a try, please.
>

I tried but I can't get it working properly because the gdt field is
an array, not a pointer. For example with this_cpu_read(gdt_page.gdt),
I get:

./arch/x86/include/asm/desc.h: In function ‘get_current_gdt_rw’:
./include/linux/percpu-defs.h:308:21: error: incompatible types when
assigning to type ‘struct desc_struct[16]’ from type ‘struct
desc_struct *’
  case 1: pscr_ret__ = stem##1(variable); break;   \
                     ^
I tried different variants without success. What do you think?

>> +/*
>> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
>> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
>> + * original writeable version when needed.
>> + */
>> +#ifdef CONFIG_X86_64
>> +static inline void native_load_tr_desc(void)
>> +{
>> +       struct desc_ptr gdt;
>> +       int cpu = raw_smp_processor_id();
>> +       bool restore = false;
>> +       struct desc_struct *fixmap_gdt;
>> +
>> +       native_store_gdt(&gdt);
>
> Off the top of my head, this is something like 10 cycles.  IMO that's
> fast enough not to worry about the regression this will cause to KVM
> exits.  In any event, we'll get that back and *much* more when we do
> the optimizations that this series enables.



-- 
Thomas

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit
  2017-02-02  5:14     ` [kernel-hardening] " Andy Lutomirski
  (?)
@ 2017-02-06 22:10     ` Thomas Garnier
  -1 siblings, 0 replies; 38+ messages in thread
From: Thomas Garnier @ 2017-02-06 22:10 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: X86 ML, Stanislaw Gruszka, kvm list, Fenghua Yu, Matt Fleming,
	Paul Gortmaker, Radim Krčmář,
	linux-efi, Alexander Potapenko, Pavel Machek, H . Peter Anvin,
	kernel-hardening, Jiri Olsa, Boris Ostrovsky, Dave Hansen,
	Andi Kleen, xen-devel, Michael Ellerman, Joerg Roedel,
	Prarit Bhargava, kasan-dev

On Wed, Feb 1, 2017 at 9:14 PM, Andy Lutomirski <luto@kernel.org> wrote:
> On Thu, Jan 26, 2017 at 8:59 AM, Thomas Garnier <thgarnie@google.com> wrote:
>> This patch makes the GDT remapped pages read-only to prevent corruption.
>> This change is done only on 64 bit.
>>
>> The native_load_tr_desc function was adapted to correctly handle a
>> read-only GDT. The LTR instruction always writes to the GDT TSS entry.
>> This generates a page fault if the GDT is read-only. This change checks
>> if the current GDT is a remap and swap GDTs as needed. This function was
>> tested by booting multiple machines and checking hibernation works
>> properly.
>>
>> KVM SVM and VMX were adapted to use the writeable GDT. On VMX, the
>> per-cpu variable was removed for functions to fetch the original GDT.
>> Instead of reloading the previous GDT, VMX will reload the fixmap GDT as
>> expected. For testing, VMs were started and restored on multiple
>> configurations.
>>
>> Signed-off-by: Thomas Garnier <thgarnie@google.com>
>> ---
>> Based on next-20170125
>> ---
>>  arch/x86/include/asm/desc.h      | 46 +++++++++++++++++++++++++++++++++++-----
>>  arch/x86/include/asm/processor.h |  1 +
>>  arch/x86/kernel/cpu/common.c     | 28 ++++++++++++++++++------
>>  arch/x86/kvm/svm.c               |  4 +---
>>  arch/x86/kvm/vmx.c               | 15 +++++--------
>>  5 files changed, 70 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
>> index 4cc176f57b78..ca7b2224fcb4 100644
>> --- a/arch/x86/include/asm/desc.h
>> +++ b/arch/x86/include/asm/desc.h
>> @@ -52,6 +52,12 @@ static inline struct desc_struct *get_cpu_direct_gdt(unsigned int cpu)
>>         return per_cpu(gdt_page, cpu).gdt;
>>  }
>>
>> +/* Provide the current original GDT */
>> +static inline struct desc_struct *get_current_direct_gdt(void)
>> +{
>> +       return this_cpu_ptr(&gdt_page)->gdt;
>> +}
>
> I'm assuming that the reason that this isn't part of patch 2 and used
> instead of the version that takes cpu as a parameter is that TLS
> doesn't work until the GDT is set up.  If so, perhaps that's worthy of
> a comment in patch 2.
>
> But give this_cpu_read(gdt_page.gdt) a try, please.
>

I tried but I can't get it working properly because the gdt field is
an array, not a pointer. For example with this_cpu_read(gdt_page.gdt),
I get:

./arch/x86/include/asm/desc.h: In function ‘get_current_gdt_rw’:
./include/linux/percpu-defs.h:308:21: error: incompatible types when
assigning to type ‘struct desc_struct[16]’ from type ‘struct
desc_struct *’
  case 1: pscr_ret__ = stem##1(variable); break;   \
                     ^
I tried different variants without success. What do you think?

>> +/*
>> + * The LTR instruction marks the TSS GDT entry as busy. In 64bit, the GDT is
>> + * a read-only remapping. To prevent a page fault, the GDT is switched to the
>> + * original writeable version when needed.
>> + */
>> +#ifdef CONFIG_X86_64
>> +static inline void native_load_tr_desc(void)
>> +{
>> +       struct desc_ptr gdt;
>> +       int cpu = raw_smp_processor_id();
>> +       bool restore = false;
>> +       struct desc_struct *fixmap_gdt;
>> +
>> +       native_store_gdt(&gdt);
>
> Off the top of my head, this is something like 10 cycles.  IMO that's
> fast enough not to worry about the regression this will cause to KVM
> exits.  In any event, we'll get that back and *much* more when we do
> the optimizations that this series enables.



-- 
Thomas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2017-02-06 22:10 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-26 16:59 [PATCH v2 1/3] x86/mm: Adapt MODULES_END based on Fixmap section size Thomas Garnier
2017-01-26 16:59 ` [kernel-hardening] " Thomas Garnier
2017-01-26 16:59 ` Thomas Garnier
2017-01-26 16:59 ` Thomas Garnier
2017-01-26 16:59 ` [PATCH v2 2/3] x86: Remap GDT tables in the Fixmap section Thomas Garnier
2017-01-26 16:59 ` Thomas Garnier
2017-01-26 16:59   ` [kernel-hardening] " Thomas Garnier
2017-01-26 16:59   ` Thomas Garnier
2017-01-26 16:59   ` Thomas Garnier
2017-01-26 18:52   ` Andy Lutomirski
2017-01-26 18:52     ` [kernel-hardening] " Andy Lutomirski
2017-01-26 19:10     ` Thomas Garnier
2017-01-26 19:10       ` [kernel-hardening] " Thomas Garnier
2017-01-26 19:10     ` Thomas Garnier
2017-01-26 18:52   ` Andy Lutomirski
2017-01-26 16:59 ` [PATCH v2 3/3] x86: Make the GDT remapping read-only on 64 bit Thomas Garnier
2017-01-26 16:59 ` Thomas Garnier
2017-01-26 16:59   ` [kernel-hardening] " Thomas Garnier
2017-01-26 16:59   ` Thomas Garnier
2017-01-26 16:59   ` Thomas Garnier
2017-02-01  9:15   ` Ingo Molnar
2017-02-01  9:15     ` [kernel-hardening] " Ingo Molnar
2017-02-01  9:15     ` Ingo Molnar
2017-02-02  5:13     ` Andy Lutomirski
2017-02-02  5:13     ` Andy Lutomirski
2017-02-02  5:13       ` [kernel-hardening] " Andy Lutomirski
2017-02-02  5:13       ` Andy Lutomirski
2017-02-02  7:12       ` Ingo Molnar
2017-02-02  7:12         ` [kernel-hardening] " Ingo Molnar
2017-02-02  7:12         ` Ingo Molnar
2017-02-02  7:12       ` Ingo Molnar
2017-02-01  9:15   ` Ingo Molnar
2017-02-02  5:14   ` Andy Lutomirski
2017-02-02  5:14   ` Andy Lutomirski
2017-02-02  5:14     ` [kernel-hardening] " Andy Lutomirski
2017-02-06 22:10     ` Thomas Garnier
2017-02-06 22:10     ` Thomas Garnier
2017-02-06 22:10       ` [kernel-hardening] " Thomas Garnier

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.