All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Tatashin <pasha.tatashin@oracle.com>
To: steven.sistare@oracle.com, daniel.m.jordan@oracle.com,
	linux-kernel@vger.kernel.org, Alexander.Levin@microsoft.com,
	dan.j.williams@intel.com, sathyanarayanan.kuppuswamy@intel.com,
	pankaj.laxminarayan.bharadiya@intel.com, akuster@mvista.com,
	cminyard@mvista.com, pasha.tatashin@oracle.com,
	gregkh@linuxfoundation.org, stable@vger.kernel.org
Subject: [PATCH 4.1 44/65] kaiser: PCID 0 for kernel and 128 for user
Date: Mon,  5 Mar 2018 19:25:17 -0500	[thread overview]
Message-ID: <20180306002538.1761-45-pasha.tatashin@oracle.com> (raw)
In-Reply-To: <20180306002538.1761-1-pasha.tatashin@oracle.com>

From: Hugh Dickins <hughd@google.com>

Why was 4 chosen for kernel PCID and 6 for user PCID?
No good reason in a backport where PCIDs are only used for Kaiser.

If we continue with those, then we shall need to add Andy Lutomirski's
4.13 commit 6c690ee1039b ("x86/mm: Split read_cr3() into read_cr3_pa()
and __read_cr3()"), which deals with the problem of read_cr3() callers
finding stray bits in the cr3 that they expected to be page-aligned;
and for hibernation, his 4.14 commit f34902c5c6c0 ("x86/hibernate/64:
Mask off CR3's PCID bits in the saved CR3").

But if 0 is used for kernel PCID, then there's no need to add in those
commits - whenever the kernel looks, it sees 0 in the lower bits; and
0 for kernel seems an obvious choice.

And I naughtily propose 128 for user PCID.  Because there's a place
in _SWITCH_TO_USER_CR3 where it takes note of the need for TLB FLUSH,
but needs to reset that to NOFLUSH for the next occasion.  Currently
it does so with a "movb $(0x80)" into the high byte of the per-cpu
quadword, but that will cause a machine without PCID support to crash.
Now, if %al just happened to have 0x80 in it at that point, on a
machine with PCID support, but 0 on a machine without PCID support...

(That will go badly wrong once the pgd can be at a physical address
above 2^56, but even with 5-level paging, physical goes up to 2^52.)

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 3b4ce0e1a17228eec71815d7997e49e403ebf2a7)
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
---
 arch/x86/include/asm/kaiser.h        | 19 ++++++++++++-------
 arch/x86/include/asm/pgtable_types.h |  7 ++++---
 arch/x86/mm/tlb.c                    |  3 +++
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
index 009bca514c20..110a73e0572d 100644
--- a/arch/x86/include/asm/kaiser.h
+++ b/arch/x86/include/asm/kaiser.h
@@ -29,14 +29,19 @@ orq  X86_CR3_PCID_KERN_VAR, \reg
 movq \reg, %cr3
 .endm
 
-.macro _SWITCH_TO_USER_CR3 reg
+.macro _SWITCH_TO_USER_CR3 reg regb
+/*
+ * regb must be the low byte portion of reg: because we have arranged
+ * for the low byte of the user PCID to serve as the high byte of NOFLUSH
+ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are
+ * not enabled): so that the one register can update both memory and cr3.
+ */
 movq %cr3, \reg
 andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
 orq  PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
 js   9f
-// FLUSH this time, reset to NOFLUSH for next time
-// But if nopcid?  Consider using 0x80 for user pcid?
-movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
+/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
+movb \regb, PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
 9:
 movq \reg, %cr3
 .endm
@@ -49,7 +54,7 @@ popq %rax
 
 .macro SWITCH_USER_CR3
 pushq %rax
-_SWITCH_TO_USER_CR3 %rax
+_SWITCH_TO_USER_CR3 %rax %al
 popq %rax
 .endm
 
@@ -61,7 +66,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
 
 .macro SWITCH_USER_CR3_NO_STACK
 movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
-_SWITCH_TO_USER_CR3 %rax
+_SWITCH_TO_USER_CR3 %rax %al
 movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
 .endm
 
@@ -69,7 +74,7 @@ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
 
 .macro SWITCH_KERNEL_CR3 reg
 .endm
-.macro SWITCH_USER_CR3 reg
+.macro SWITCH_USER_CR3 reg regb
 .endm
 .macro SWITCH_USER_CR3_NO_STACK
 .endm
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index b3c77e6529c2..93554f37df99 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -111,16 +111,17 @@
 
 /* Mask for all the PCID-related bits in CR3: */
 #define X86_CR3_PCID_MASK       (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
+#define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))
+
 #if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
-#define X86_CR3_PCID_ASID_KERN  (_AC(0x4,UL))
-#define X86_CR3_PCID_ASID_USER  (_AC(0x6,UL))
+/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
+#define X86_CR3_PCID_ASID_USER	(_AC(0x80,UL))
 
 #define X86_CR3_PCID_KERN_FLUSH		(X86_CR3_PCID_ASID_KERN)
 #define X86_CR3_PCID_USER_FLUSH		(X86_CR3_PCID_ASID_USER)
 #define X86_CR3_PCID_KERN_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
 #define X86_CR3_PCID_USER_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
 #else
-#define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))
 #define X86_CR3_PCID_ASID_USER  (_AC(0x0,UL))
 /*
  * PCIDs are unsupported on 32-bit and none of these bits can be
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 9c9657f139cd..749e2134bd7a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -50,6 +50,9 @@ static void load_new_mm_cr3(pgd_t *pgdir)
 		 * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
 		 * do it here, but can only be used if X86_FEATURE_INVPCID is
 		 * available - and many machines support pcid without invpcid.
+		 *
+		 * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
+		 * but keep that line in there in case something changes.
 		 */
 		new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
 		kaiser_flush_tlb_on_return_to_user();
-- 
2.16.2

  parent reply	other threads:[~2018-03-06  0:25 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-06  0:24 [PATCH 4.1 00/65] page table isolation for stable 4.1 Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 01/65] x86/mm: Add INVPCID helpers Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 02/65] x86/mm: Fix INVPCID asm constraint Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 03/65] x86/mm: Add a 'noinvpcid' boot option to turn off INVPCID Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 04/65] x86/mm: If INVPCID is available, use it to flush global mappings Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 05/65] mm/mmu_context, sched/core: Fix mmu_context.h assumption Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 06/65] sched/core: Add switch_mm_irqs_off() and use it in the scheduler Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 07/65] x86/mm: Build arch/x86/mm/tlb.c even on !SMP Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 08/65] x86/mm, sched/core: Uninline switch_mm() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 09/65] x86/mm, sched/core: Turn off IRQs in switch_mm() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 10/65] ARM: Hide finish_arch_post_lock_switch() from modules Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 11/65] sched/core: Idle_task_exit() shouldn't use switch_mm_irqs_off() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 12/65] x86/irq: Do not substract irq_tlb_count from irq_call_count Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 13/65] x86/vm86/32: Switch to flush_tlb_mm_range() in mark_screen_rdonly() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 14/65] x86/mm: Remove flush_tlb() and flush_tlb_current_task() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 15/65] x86/mm: Make flush_tlb_mm_range() more predictable Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 16/65] x86/mm: Reimplement flush_tlb_page() using flush_tlb_mm_range() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 17/65] x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 18/65] x86/mm: Disable PCID on 32-bit kernels Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 19/65] x86/mm: Add the 'nopcid' boot option to turn off PCID Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 20/65] x86/mm: Enable CR4.PCIDE on supported systems Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 21/65] x86/mm/64: Fix reboot interaction with CR4.PCIDE Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 22/65] x86/boot: Add early cmdline parsing for options with arguments Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 23/65] x86/entry: Stop using PER_CPU_VAR(kernel_stack) Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 24/65] x86/entry: Remove unused 'kernel_stack' per-cpu variable Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 25/65] x86/entry: Define 'cpu_current_top_of_stack' for 64-bit code Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 26/65] KAISER: Kernel Address Isolation Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 27/65] kaiser: merged update Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 28/65] kaiser: do not set _PAGE_NX on pgd_none Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 29/65] kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 30/65] kaiser: fix build and FIXME in alloc_ldt_struct() Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 31/65] kaiser: KAISER depends on SMP Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 32/65] kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 33/65] kaiser: fix perf crashes Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 34/65] kaiser: ENOMEM if kaiser_pagetable_walk() NULL Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 35/65] kaiser: tidied up asm/kaiser.h somewhat Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 36/65] kaiser: tidied up kaiser_add/remove_mapping slightly Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 37/65] kaiser: kaiser_remove_mapping() move along the pgd Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 38/65] kaiser: cleanups while trying for gold link Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 39/65] kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 40/65] kaiser: delete KAISER_REAL_SWITCH option Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 41/65] kaiser: vmstat show NR_KAISERTABLE as nr_overhead Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 42/65] kaiser: enhanced by kernel and user PCIDs Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 43/65] kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user Pavel Tatashin
2018-03-06  0:25 ` Pavel Tatashin [this message]
2018-03-06  0:25 ` [PATCH 4.1 45/65] kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 46/65] kaiser: paranoid_entry pass cr3 need to paranoid_exit Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 47/65] kaiser: _pgd_alloc() without __GFP_REPEAT to avoid stalls Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 48/65] kaiser: fix unlikely error in alloc_ldt_struct() Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 49/65] kaiser: add "nokaiser" boot option, using ALTERNATIVE Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 50/65] " Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 51/65] x86/kaiser: Rename and simplify X86_FEATURE_KAISER handling Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 52/65] x86/kaiser: Check boottime cmdline params Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 53/65] kaiser: use ALTERNATIVE instead of x86_cr3_pcid_noflush Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 54/65] kaiser: drop is_atomic arg to kaiser_pagetable_walk() Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 55/65] kaiser: asm/tlbflush.h handle noPGE at lower level Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 56/65] kaiser: kaiser_flush_tlb_on_return_to_user() check PCID Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 57/65] x86/paravirt: Dont patch flush_tlb_single Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 58/65] x86/kaiser: Reenable PARAVIRT Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 59/65] x86/kaiser: Move feature detection up Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 60/65] KPTI: Rename to PAGE_TABLE_ISOLATION Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 61/65] x86/ldt: fix crash in ldt freeing Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 62/65] PTI: unbreak EFI old_memmap Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 63/65] kpti: Disable when running under Xen PV Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 64/65] pti: Rename X86_FEATURE_KAISER to X86_FEATURE_PTI Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 65/65] x86/pti/efi: broken conversion from efi to kernel page table Pavel Tatashin
2018-03-07 12:55 ` [PATCH 4.1 00/65] page table isolation for stable 4.1 Jiri Kosina

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180306002538.1761-45-pasha.tatashin@oracle.com \
    --to=pasha.tatashin@oracle.com \
    --cc=Alexander.Levin@microsoft.com \
    --cc=akuster@mvista.com \
    --cc=cminyard@mvista.com \
    --cc=dan.j.williams@intel.com \
    --cc=daniel.m.jordan@oracle.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pankaj.laxminarayan.bharadiya@intel.com \
    --cc=sathyanarayanan.kuppuswamy@intel.com \
    --cc=stable@vger.kernel.org \
    --cc=steven.sistare@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.