linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/3] x86/paravirt: Rework paravirt patching
@ 2019-04-24 13:41 Thomas Gleixner
  2019-04-24 13:41 ` [patch 1/3] x86/paravirt: Remove bogus extern declarations Thomas Gleixner
                   ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Thomas Gleixner @ 2019-04-24 13:41 UTC (permalink / raw)
  To: LKML; +Cc: x86, Juergen Gross, Andi Kleen

Andi tried to make the paravirt patch magic work nicely with LTO. It turned
out that the DEF_NATIVE() macro is clever but fragile. Even without LTO it
works by chance and not by design.

As the paravirt patch tables are not changed frequently it's reasonable to
convert them to regular data which is initialized with byte sequences.

Looking at Andi's attempt to change the existing code made me realize
that there is no real reason to have two variants for 32 and 64 bit
which just differ slightly.

The following series unifies the 32/64bit implementation and converts
it to use regular data at the end.

Thanks,

	tglx

8<--------------
 a/arch/x86/kernel/paravirt_patch_32.c |   67 ------------------
 a/arch/x86/kernel/paravirt_patch_64.c |   75 --------------------
 arch/x86/include/asm/paravirt_types.h |    4 -
 arch/x86/kernel/Makefile              |    4 -
 b/arch/x86/kernel/paravirt_patch.c    |  126 ++++++++++++++++++++++++++++++++++
 5 files changed, 128 insertions(+), 148 deletions(-)



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch 1/3] x86/paravirt: Remove bogus extern declarations
  2019-04-24 13:41 [patch 0/3] x86/paravirt: Rework paravirt patching Thomas Gleixner
@ 2019-04-24 13:41 ` Thomas Gleixner
  2019-04-25  7:31   ` [tip:x86/paravirt] " tip-bot for Thomas Gleixner
  2019-05-24  7:58   ` tip-bot for Thomas Gleixner
  2019-04-24 13:41 ` [patch 2/3] x86/paravirt: Unify 32/64 bit patch code Thomas Gleixner
  2019-04-24 13:41 ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Thomas Gleixner
  2 siblings, 2 replies; 28+ messages in thread
From: Thomas Gleixner @ 2019-04-24 13:41 UTC (permalink / raw)
  To: LKML; +Cc: x86, Juergen Gross, Andi Kleen

These functions are already declared in asm/paravirt.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt_patch_32.c |    3 ---
 arch/x86/kernel/paravirt_patch_64.c |    3 ---
 2 files changed, 6 deletions(-)

--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -23,9 +23,6 @@ DEF_NATIVE(lock, queued_spin_unlock, "mo
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
 unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 #define PATCH_SITE(ops, x)					\
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -29,9 +29,6 @@ DEF_NATIVE(lock, queued_spin_unlock, "mo
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
 unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 #define PATCH_SITE(ops, x)					\



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch 2/3] x86/paravirt: Unify 32/64 bit patch code
  2019-04-24 13:41 [patch 0/3] x86/paravirt: Rework paravirt patching Thomas Gleixner
  2019-04-24 13:41 ` [patch 1/3] x86/paravirt: Remove bogus extern declarations Thomas Gleixner
@ 2019-04-24 13:41 ` Thomas Gleixner
  2019-04-25  7:32   ` [tip:x86/paravirt] " tip-bot for Thomas Gleixner
  2019-05-24  8:00   ` [tip:x86/paravirt] x86/paravirt: Unify the 32/64 bit paravirt patching code tip-bot for Thomas Gleixner
  2019-04-24 13:41 ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Thomas Gleixner
  2 siblings, 2 replies; 28+ messages in thread
From: Thomas Gleixner @ 2019-04-24 13:41 UTC (permalink / raw)
  To: LKML; +Cc: x86, Juergen Gross, Andi Kleen

Large parts of these two files are identical. Merge them together.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/Makefile            |    4 -
 arch/x86/kernel/paravirt_patch.c    |  106 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/paravirt_patch_32.c |   64 ---------------------
 arch/x86/kernel/paravirt_patch_64.c |   72 ------------------------
 4 files changed, 108 insertions(+), 138 deletions(-)

--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o				:= n
 
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
-OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o	:= y
+OBJECT_FILES_NON_STANDARD_paravirt_patch.o		:= y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o		:= y
@@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB)		+= amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
--- /dev/null
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+
+#include <asm/paravirt.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_X86_64
+# ifdef CONFIG_PARAVIRT_XXL
+DEF_NATIVE(irq, irq_disable, "cli");
+DEF_NATIVE(irq, irq_enable, "sti");
+DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
+DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
+DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
+DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
+DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
+DEF_NATIVE(cpu, wbinvd, "wbinvd");
+DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
+DEF_NATIVE(cpu, swapgs, "swapgs");
+DEF_NATIVE(, mov64, "mov %rdi, %rax");
+
+unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
+{
+	return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64);
+}
+# endif /* CONFIG_PARAVIRT_XXL */
+
+# ifdef CONFIG_PARAVIRT_SPINLOCKS
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+# endif
+
+#else /* CONFIG_X86_64 */
+
+# ifdef CONFIG_PARAVIRT_XXL
+DEF_NATIVE(irq, irq_disable, "cli");
+DEF_NATIVE(irq, irq_enable, "sti");
+DEF_NATIVE(irq, restore_fl, "push %eax; popf");
+DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
+DEF_NATIVE(cpu, iret, "iret");
+DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+
+unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
+{
+	/* arg in %edx:%eax, return in %edx:%eax */
+	return 0;
+}
+# endif /* CONFIG_PARAVIRT_XXL */
+
+# ifdef CONFIG_PARAVIRT_SPINLOCKS
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+# endif
+
+#endif /* !CONFIG_X86_64 */
+
+unsigned int native_patch(u8 type, void *ibuf, unsigned long addr,
+			  unsigned int len)
+{
+#define PATCH_SITE(ops, x)					\
+	case PARAVIRT_PATCH(ops.x):				\
+		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
+
+	switch (type) {
+#ifdef CONFIG_PARAVIRT_XXL
+		PATCH_SITE(irq, restore_fl);
+		PATCH_SITE(irq, save_fl);
+		PATCH_SITE(irq, irq_enable);
+		PATCH_SITE(irq, irq_disable);
+
+		PATCH_SITE(mmu, read_cr2);
+		PATCH_SITE(mmu, read_cr3);
+		PATCH_SITE(mmu, write_cr3);
+
+# ifdef CONFIG_X86_64
+		PATCH_SITE(cpu, usergs_sysret64);
+		PATCH_SITE(cpu, swapgs);
+		PATCH_SITE(cpu, wbinvd);
+# else
+		PATCH_SITE(cpu, iret);
+# endif
+#endif
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+	case PARAVIRT_PATCH(lock.queued_spin_unlock):
+		if (pv_is_native_spin_unlock())
+			return paravirt_patch_insns(ibuf, len,
+						    start_lock_queued_spin_unlock,
+						    end_lock_queued_spin_unlock);
+		break;
+
+	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
+		if (pv_is_native_vcpu_is_preempted())
+			return paravirt_patch_insns(ibuf, len,
+						    start_lock_vcpu_is_preempted,
+						    end_lock_vcpu_is_preempted);
+		break;
+#endif
+
+	default:
+		break;
+	}
+#undef PATCH_SITE
+	return paravirt_patch_default(type, ibuf, addr, len);
+}
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ /dev/null
@@ -1,64 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
-	/* arg in %edx:%eax, return in %edx:%eax */
-	return 0;
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x)					\
-	case PARAVIRT_PATCH(ops.x):				\
-		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
-	switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
-		PATCH_SITE(irq, irq_disable);
-		PATCH_SITE(irq, irq_enable);
-		PATCH_SITE(irq, restore_fl);
-		PATCH_SITE(irq, save_fl);
-		PATCH_SITE(cpu, iret);
-		PATCH_SITE(mmu, read_cr2);
-		PATCH_SITE(mmu, read_cr3);
-		PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-	case PARAVIRT_PATCH(lock.queued_spin_unlock):
-		if (pv_is_native_spin_unlock())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_queued_spin_unlock,
-						    end_lock_queued_spin_unlock);
-		break;
-
-	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
-		if (pv_is_native_vcpu_is_preempted())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_vcpu_is_preempted,
-						    end_lock_vcpu_is_preempted);
-		break;
-#endif
-
-	default:
-		break;
-	}
-#undef PATCH_SITE
-	return paravirt_patch_default(type, ibuf, addr, len);
-}
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-#include <asm/asm-offsets.h>
-#include <linux/stringify.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(cpu, wbinvd, "wbinvd");
-
-DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(cpu, swapgs, "swapgs");
-DEF_NATIVE(, mov64, "mov %rdi, %rax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
-	return paravirt_patch_insns(insnbuf, len,
-				    start__mov64, end__mov64);
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x)					\
-	case PARAVIRT_PATCH(ops.x):				\
-		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
-	switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
-		PATCH_SITE(irq, restore_fl);
-		PATCH_SITE(irq, save_fl);
-		PATCH_SITE(irq, irq_enable);
-		PATCH_SITE(irq, irq_disable);
-		PATCH_SITE(cpu, usergs_sysret64);
-		PATCH_SITE(cpu, swapgs);
-		PATCH_SITE(cpu, wbinvd);
-		PATCH_SITE(mmu, read_cr2);
-		PATCH_SITE(mmu, read_cr3);
-		PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-	case PARAVIRT_PATCH(lock.queued_spin_unlock):
-		if (pv_is_native_spin_unlock())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_queued_spin_unlock,
-						    end_lock_queued_spin_unlock);
-		break;
-
-	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
-		if (pv_is_native_vcpu_is_preempted())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_vcpu_is_preempted,
-						    end_lock_vcpu_is_preempted);
-		break;
-#endif
-
-	default:
-		break;
-	}
-#undef PATCH_SITE
-	return paravirt_patch_default(type, ibuf, addr, len);
-}



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [patch 3/3] x86/paravirt: Replace paravirt patch asm magic
  2019-04-24 13:41 [patch 0/3] x86/paravirt: Rework paravirt patching Thomas Gleixner
  2019-04-24 13:41 ` [patch 1/3] x86/paravirt: Remove bogus extern declarations Thomas Gleixner
  2019-04-24 13:41 ` [patch 2/3] x86/paravirt: Unify 32/64 bit patch code Thomas Gleixner
@ 2019-04-24 13:41 ` Thomas Gleixner
  2019-04-25  6:52   ` Ingo Molnar
  2019-05-24  8:00   ` [tip:x86/paravirt] x86/paravirt: Replace the " tip-bot for Thomas Gleixner
  2 siblings, 2 replies; 28+ messages in thread
From: Thomas Gleixner @ 2019-04-24 13:41 UTC (permalink / raw)
  To: LKML; +Cc: x86, Juergen Gross, Andi Kleen

The magic macro DEF_NATIVE() in the paravirt patching code uses inline
assembly to generate a data table for patching in the native instructions.

While clever this is falling apart with LTO and even aside of LTO the
construct is just working by chance according to GCC folks.

Aside of that the tables are constant data and not some form of magic
text.

As these constructs are not subject to frequent changes it is not a
maintenance issue to convert them to regular data tables which are
initialized with hex bytes.

Create a new set of macros and data structures to store the instruction
sequences and convert the code over.

Reported-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/paravirt_types.h |    4 
 arch/x86/kernel/paravirt_patch.c      |  144 +++++++++++++++++++---------------
 2 files changed, 82 insertions(+), 66 deletions(-)

--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -370,10 +370,6 @@ extern struct paravirt_patch_template pv
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
-#define DEF_NATIVE(ops, name, code)					\
-	__visible extern const char start_##ops##_##name[], end_##ops##_##name[];	\
-	asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
-
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
 unsigned paravirt_patch_default(u8 type, void *insnbuf,
 				unsigned long addr, unsigned len);
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -4,103 +4,123 @@
 #include <asm/paravirt.h>
 #include <asm/asm-offsets.h>
 
-#ifdef CONFIG_X86_64
-# ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(cpu, wbinvd, "wbinvd");
-DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(cpu, swapgs, "swapgs");
-DEF_NATIVE(, mov64, "mov %rdi, %rax");
+#define PSTART(d, m)							\
+	patch_data_##d.m
 
-unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
-{
-	return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64);
-}
-# endif /* CONFIG_PARAVIRT_XXL */
+#define PEND(d, m)							\
+	(PSTART(d, m) + sizeof(patch_data_##d.m))
 
-# ifdef CONFIG_PARAVIRT_SPINLOCKS
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-# endif
+#define PATCH(d, m, ibuf, len)						\
+	paravirt_patch_insns(ibuf, len, PSTART(d, m), PEND(d, m))
 
-#else /* CONFIG_X86_64 */
+#define PATCH_CASE(ops, m, data, ibuf, len)				\
+	case PARAVIRT_PATCH(ops.m):					\
+		return PATCH(data, ops##_##m, ibuf, len)
 
-# ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+#ifdef CONFIG_PARAVIRT_XXL
+struct patch_xxl {
+	const unsigned char	irq_irq_disable[1];
+	const unsigned char	irq_irq_enable[1];
+	const unsigned char	irq_restore_fl[2];
+	const unsigned char	irq_save_fl[2];
+	const unsigned char	mmu_read_cr2[3];
+	const unsigned char	mmu_read_cr3[3];
+	const unsigned char	mmu_write_cr3[3];
+# ifdef CONFIG_X86_64
+	const unsigned char	cpu_wbinvd[2];
+	const unsigned char	cpu_usergs_sysret64[6];
+	const unsigned char	cpu_swapgs[3];
+	const unsigned char	mov64[3];
+# else
+	const unsigned char	cpu_iret[1];
+# endif
+};
+
+static const struct patch_xxl patch_data_xxl = {
+	.irq_irq_disable	= { 0xfa },		// cli
+	.irq_irq_enable		= { 0xfb },		// sti
+	.irq_save_fl		= { 0x9c, 0x58 },	// pushf; pop %[re]ax
+	.mmu_read_cr2		= { 0x0f, 0x20, 0xd0 },	// mov %cr2, %[re]ax
+	.mmu_read_cr3		= { 0x0f, 0x20, 0xd8 },	// mov %cr3, %[re]ax
+# ifdef CONFIG_X86_64
+	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
+	.mmu_write_cr3		= { 0x0f, 0x22, 0xdf },	// mov %rdi, %cr3
+	.cpu_wbinvd		= { 0x0f, 0x09 },	// wbinvd
+	.cpu_usergs_sysret64	= { 0x0f, 0x01, 0xf8,
+				    0x48, 0x0f, 0x07 },	// swapgs; sysretq
+	.cpu_swapgs		= { 0x0f, 0x01, 0xf8 },	// swapgs
+	.mov64			= { 0x48, 0x89, 0xf8 },	// mov %rdi, %rax
+# else
+	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
+	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
+	.cpu_iret		= { 0xcf },		// iret
+# endif
+};
 
 unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
 {
-	/* arg in %edx:%eax, return in %edx:%eax */
+#ifdef CONFIG_X86_64
+	return PATCH(xxl, mov64, insnbuf, len);
+#endif
 	return 0;
 }
 # endif /* CONFIG_PARAVIRT_XXL */
 
-# ifdef CONFIG_PARAVIRT_SPINLOCKS
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-# endif
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+struct patch_lock {
+	unsigned char queued_spin_unlock[3];
+	unsigned char vcpu_is_preempted[2];
+};
 
-#endif /* !CONFIG_X86_64 */
+static const struct patch_lock patch_data_lock = {
+	.vcpu_is_preempted	= { 0x31, 0xc0 },	// xor %eax, %eax
+
+# ifdef CONFIG_X86_64
+	.queued_spin_unlock	= { 0xc6, 0x07, 0x00 },	// movb $0, (%rdi)
+# else
+	.queued_spin_unlock	= { 0xc6, 0x00, 0x00 },	// movb $0, (%eax)
+# endif
+};
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
 
 unsigned int native_patch(u8 type, void *ibuf, unsigned long addr,
 			  unsigned int len)
 {
-#define PATCH_SITE(ops, x)					\
-	case PARAVIRT_PATCH(ops.x):				\
-		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
 	switch (type) {
+
 #ifdef CONFIG_PARAVIRT_XXL
-		PATCH_SITE(irq, restore_fl);
-		PATCH_SITE(irq, save_fl);
-		PATCH_SITE(irq, irq_enable);
-		PATCH_SITE(irq, irq_disable);
-
-		PATCH_SITE(mmu, read_cr2);
-		PATCH_SITE(mmu, read_cr3);
-		PATCH_SITE(mmu, write_cr3);
+	PATCH_CASE(irq, restore_fl, xxl, ibuf, len);
+	PATCH_CASE(irq, save_fl, xxl, ibuf, len);
+	PATCH_CASE(irq, irq_enable, xxl, ibuf, len);
+	PATCH_CASE(irq, irq_disable, xxl, ibuf, len);
+
+	PATCH_CASE(mmu, read_cr2, xxl, ibuf, len);
+	PATCH_CASE(mmu, read_cr3, xxl, ibuf, len);
+	PATCH_CASE(mmu, write_cr3, xxl, ibuf, len);
 
 # ifdef CONFIG_X86_64
-		PATCH_SITE(cpu, usergs_sysret64);
-		PATCH_SITE(cpu, swapgs);
-		PATCH_SITE(cpu, wbinvd);
+	PATCH_CASE(cpu, usergs_sysret64, xxl, ibuf, len);
+	PATCH_CASE(cpu, swapgs, xxl, ibuf, len);
+	PATCH_CASE(cpu, wbinvd, xxl, ibuf, len);
 # else
-		PATCH_SITE(cpu, iret);
+	PATCH_CASE(cpu, iret, xxl, ibuf, len);
 # endif
 #endif
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 	case PARAVIRT_PATCH(lock.queued_spin_unlock):
 		if (pv_is_native_spin_unlock())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_queued_spin_unlock,
-						    end_lock_queued_spin_unlock);
+			return PATCH(lock, queued_spin_unlock, ibuf, len);
 		break;
 
 	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
 		if (pv_is_native_vcpu_is_preempted())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_vcpu_is_preempted,
-						    end_lock_vcpu_is_preempted);
+			return PATCH(lock, vcpu_is_preempted, ibuf, len);
 		break;
 #endif
-
 	default:
 		break;
 	}
-#undef PATCH_SITE
+
 	return paravirt_patch_default(type, ibuf, addr, len);
 }



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 3/3] x86/paravirt: Replace paravirt patch asm magic
  2019-04-24 13:41 ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Thomas Gleixner
@ 2019-04-25  6:52   ` Ingo Molnar
  2019-04-25  7:22     ` Thomas Gleixner
  2019-04-25  8:08     ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Peter Zijlstra
  2019-05-24  8:00   ` [tip:x86/paravirt] x86/paravirt: Replace the " tip-bot for Thomas Gleixner
  1 sibling, 2 replies; 28+ messages in thread
From: Ingo Molnar @ 2019-04-25  6:52 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, x86, Juergen Gross, Andi Kleen


* Thomas Gleixner <tglx@linutronix.de> wrote:

> The magic macro DEF_NATIVE() in the paravirt patching code uses inline
> assembly to generate a data table for patching in the native instructions.
> 
> While clever this is falling apart with LTO and even aside of LTO the
> construct is just working by chance according to GCC folks.
> 
> Aside of that the tables are constant data and not some form of magic
> text.
> 
> As these constructs are not subject to frequent changes it is not a
> maintenance issue to convert them to regular data tables which are
> initialized with hex bytes.
> 
> Create a new set of macros and data structures to store the instruction
> sequences and convert the code over.
> 
> Reported-by: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

> -# ifdef CONFIG_PARAVIRT_XXL
> -DEF_NATIVE(irq, irq_disable, "cli");
> -DEF_NATIVE(irq, irq_enable, "sti");
> -DEF_NATIVE(irq, restore_fl, "push %eax; popf");
> -DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
> -DEF_NATIVE(cpu, iret, "iret");
> -DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
> -DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
> -DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");

> +static const struct patch_xxl patch_data_xxl = {
> +	.irq_irq_disable	= { 0xfa },		// cli
> +	.irq_irq_enable		= { 0xfb },		// sti
> +	.irq_save_fl		= { 0x9c, 0x58 },	// pushf; pop %[re]ax
> +	.mmu_read_cr2		= { 0x0f, 0x20, 0xd0 },	// mov %cr2, %[re]ax
> +	.mmu_read_cr3		= { 0x0f, 0x20, 0xd8 },	// mov %cr3, %[re]ax
> +# ifdef CONFIG_X86_64
> +	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
> +	.mmu_write_cr3		= { 0x0f, 0x22, 0xdf },	// mov %rdi, %cr3
> +	.cpu_wbinvd		= { 0x0f, 0x09 },	// wbinvd
> +	.cpu_usergs_sysret64	= { 0x0f, 0x01, 0xf8,
> +				    0x48, 0x0f, 0x07 },	// swapgs; sysretq
> +	.cpu_swapgs		= { 0x0f, 0x01, 0xf8 },	// swapgs
> +	.mov64			= { 0x48, 0x89, 0xf8 },	// mov %rdi, %rax
> +# else
> +	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
> +	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
> +	.cpu_iret		= { 0xcf },		// iret
> +# endif

I think these open-coded hexa versions are somewhat fragile as well, how 
about putting these into a .S file and controlling the sections in an LTO 
safe manner there?

That will also allow us to write proper asm, and global labels can be 
used to extract the patchlets and their length?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 3/3] x86/paravirt: Replace paravirt patch asm magic
  2019-04-25  6:52   ` Ingo Molnar
@ 2019-04-25  7:22     ` Thomas Gleixner
  2019-04-25  7:46       ` Juergen Gross
  2019-04-25  8:10       ` [PATCH] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering Ingo Molnar
  2019-04-25  8:08     ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Peter Zijlstra
  1 sibling, 2 replies; 28+ messages in thread
From: Thomas Gleixner @ 2019-04-25  7:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML, x86, Juergen Gross, Andi Kleen

On Thu, 25 Apr 2019, Ingo Molnar wrote:
> > +# else
> > +	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
> > +	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
> > +	.cpu_iret		= { 0xcf },		// iret
> > +# endif
> 
> I think these open-coded hexa versions are somewhat fragile as well, how 
> about putting these into a .S file and controlling the sections in an LTO 
> safe manner there?
> 
> That will also allow us to write proper asm, and global labels can be 
> used to extract the patchlets and their length?

We are not changing these any other day and I really don't see a reason to
have these things global just because.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [tip:x86/paravirt] x86/paravirt: Remove bogus extern declarations
  2019-04-24 13:41 ` [patch 1/3] x86/paravirt: Remove bogus extern declarations Thomas Gleixner
@ 2019-04-25  7:31   ` tip-bot for Thomas Gleixner
  2019-05-24  7:58   ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 28+ messages in thread
From: tip-bot for Thomas Gleixner @ 2019-04-25  7:31 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, linux-kernel, mingo, jgross, tglx, torvalds, bp, hpa

Commit-ID:  465f81857c16ae17f461e4738ceb1f4f8cce2077
Gitweb:     https://git.kernel.org/tip/465f81857c16ae17f461e4738ceb1f4f8cce2077
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Wed, 24 Apr 2019 15:41:16 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 25 Apr 2019 08:45:01 +0200

x86/paravirt: Remove bogus extern declarations

These functions are already declared in asm/paravirt.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20190424134223.501598258@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/paravirt_patch_32.c | 3 ---
 arch/x86/kernel/paravirt_patch_64.c | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index de138d3912e4..05d771f81e74 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -23,9 +23,6 @@ DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
 unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 #define PATCH_SITE(ops, x)					\
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 9d9e04b31077..bd1558f90cfb 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -29,9 +29,6 @@ DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
 unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 #define PATCH_SITE(ops, x)					\

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [tip:x86/paravirt] x86/paravirt: Unify 32/64 bit patch code
  2019-04-24 13:41 ` [patch 2/3] x86/paravirt: Unify 32/64 bit patch code Thomas Gleixner
@ 2019-04-25  7:32   ` tip-bot for Thomas Gleixner
  2019-05-24  8:00   ` [tip:x86/paravirt] x86/paravirt: Unify the 32/64 bit paravirt patching code tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 28+ messages in thread
From: tip-bot for Thomas Gleixner @ 2019-04-25  7:32 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, linux-kernel, hpa, mingo, jgross, tglx, bp, torvalds

Commit-ID:  92c814ed5f39d0f03d8cc432449a07f82e2c8a73
Gitweb:     https://git.kernel.org/tip/92c814ed5f39d0f03d8cc432449a07f82e2c8a73
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Wed, 24 Apr 2019 15:41:17 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 25 Apr 2019 08:45:02 +0200

x86/paravirt: Unify 32/64 bit patch code

Large parts of these two files are identical. Merge them together.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20190424134223.603491680@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/Makefile                           |  4 +-
 .../{paravirt_patch_64.c => paravirt_patch.c}      | 62 ++++++++++++++++-----
 arch/x86/kernel/paravirt_patch_32.c                | 64 ----------------------
 3 files changed, 50 insertions(+), 80 deletions(-)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 00b7e27bc2b7..62e78a3fd31e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o				:= n
 
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
-OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o	:= y
+OBJECT_FILES_NON_STANDARD_paravirt_patch.o		:= y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o		:= y
@@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB)		+= amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch.c
similarity index 59%
rename from arch/x86/kernel/paravirt_patch_64.c
rename to arch/x86/kernel/paravirt_patch.c
index bd1558f90cfb..a47899db9932 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+
 #include <asm/paravirt.h>
 #include <asm/asm-offsets.h>
-#include <linux/stringify.h>
 
-#ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_X86_64
+# ifdef CONFIG_PARAVIRT_XXL
 DEF_NATIVE(irq, irq_disable, "cli");
 DEF_NATIVE(irq, irq_enable, "sti");
 DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
@@ -12,24 +14,49 @@ DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
 DEF_NATIVE(cpu, wbinvd, "wbinvd");
-
 DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
 DEF_NATIVE(cpu, swapgs, "swapgs");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
+unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
 {
-	return paravirt_patch_insns(insnbuf, len,
-				    start__mov64, end__mov64);
+	return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64);
 }
-#endif
+# endif /* CONFIG_PARAVIRT_XXL */
 
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+# ifdef CONFIG_PARAVIRT_SPINLOCKS
 DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
+# endif
+
+#else /* CONFIG_X86_64 */
+
+# ifdef CONFIG_PARAVIRT_XXL
+DEF_NATIVE(irq, irq_disable, "cli");
+DEF_NATIVE(irq, irq_enable, "sti");
+DEF_NATIVE(irq, restore_fl, "push %eax; popf");
+DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
+DEF_NATIVE(cpu, iret, "iret");
+DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+
+unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
+{
+	/* arg in %edx:%eax, return in %edx:%eax */
+	return 0;
+}
+# endif /* CONFIG_PARAVIRT_XXL */
+
+# ifdef CONFIG_PARAVIRT_SPINLOCKS
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+# endif
+
+#endif /* !CONFIG_X86_64 */
 
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
+unsigned int native_patch(u8 type, void *ibuf, unsigned long addr,
+			  unsigned int len)
 {
 #define PATCH_SITE(ops, x)					\
 	case PARAVIRT_PATCH(ops.x):				\
@@ -41,14 +68,21 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 		PATCH_SITE(irq, save_fl);
 		PATCH_SITE(irq, irq_enable);
 		PATCH_SITE(irq, irq_disable);
-		PATCH_SITE(cpu, usergs_sysret64);
-		PATCH_SITE(cpu, swapgs);
-		PATCH_SITE(cpu, wbinvd);
+
 		PATCH_SITE(mmu, read_cr2);
 		PATCH_SITE(mmu, read_cr3);
 		PATCH_SITE(mmu, write_cr3);
+
+# ifdef CONFIG_X86_64
+		PATCH_SITE(cpu, usergs_sysret64);
+		PATCH_SITE(cpu, swapgs);
+		PATCH_SITE(cpu, wbinvd);
+# else
+		PATCH_SITE(cpu, iret);
+# endif
 #endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
 	case PARAVIRT_PATCH(lock.queued_spin_unlock):
 		if (pv_is_native_spin_unlock())
 			return paravirt_patch_insns(ibuf, len,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
deleted file mode 100644
index 05d771f81e74..000000000000
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ /dev/null
@@ -1,64 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
-	/* arg in %edx:%eax, return in %edx:%eax */
-	return 0;
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x)					\
-	case PARAVIRT_PATCH(ops.x):				\
-		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
-	switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
-		PATCH_SITE(irq, irq_disable);
-		PATCH_SITE(irq, irq_enable);
-		PATCH_SITE(irq, restore_fl);
-		PATCH_SITE(irq, save_fl);
-		PATCH_SITE(cpu, iret);
-		PATCH_SITE(mmu, read_cr2);
-		PATCH_SITE(mmu, read_cr3);
-		PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-	case PARAVIRT_PATCH(lock.queued_spin_unlock):
-		if (pv_is_native_spin_unlock())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_queued_spin_unlock,
-						    end_lock_queued_spin_unlock);
-		break;
-
-	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
-		if (pv_is_native_vcpu_is_preempted())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_vcpu_is_preempted,
-						    end_lock_vcpu_is_preempted);
-		break;
-#endif
-
-	default:
-		break;
-	}
-#undef PATCH_SITE
-	return paravirt_patch_default(type, ibuf, addr, len);
-}

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [patch 3/3] x86/paravirt: Replace paravirt patch asm magic
  2019-04-25  7:22     ` Thomas Gleixner
@ 2019-04-25  7:46       ` Juergen Gross
  2019-04-25  8:10       ` [PATCH] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering Ingo Molnar
  1 sibling, 0 replies; 28+ messages in thread
From: Juergen Gross @ 2019-04-25  7:46 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar; +Cc: LKML, x86, Andi Kleen

On 25/04/2019 09:22, Thomas Gleixner wrote:
> On Thu, 25 Apr 2019, Ingo Molnar wrote:
>>> +# else
>>> +	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
>>> +	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
>>> +	.cpu_iret		= { 0xcf },		// iret
>>> +# endif
>>
>> I think these open-coded hexa versions are somewhat fragile as well, how 
>> about putting these into a .S file and controlling the sections in an LTO 
>> safe manner there?
>>
>> That will also allow us to write proper asm, and global labels can be 
>> used to extract the patchlets and their length?
> 
> We are not changing these any other day and I really don't see a reason to
> have these things global just because.

What about generating the hex values from the Makefile using a temporary
.S file? This would make the result easy verifiable and I don't think
setting this up is too complicated.


Juergen

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 3/3] x86/paravirt: Replace paravirt patch asm magic
  2019-04-25  6:52   ` Ingo Molnar
  2019-04-25  7:22     ` Thomas Gleixner
@ 2019-04-25  8:08     ` Peter Zijlstra
  2019-04-25  8:19       ` Peter Zijlstra
  2019-04-25  9:20       ` Ingo Molnar
  1 sibling, 2 replies; 28+ messages in thread
From: Peter Zijlstra @ 2019-04-25  8:08 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen

On Thu, Apr 25, 2019 at 08:52:09AM +0200, Ingo Molnar wrote:

> > -# ifdef CONFIG_PARAVIRT_XXL
> > -DEF_NATIVE(irq, irq_disable, "cli");
> > -DEF_NATIVE(irq, irq_enable, "sti");
> > -DEF_NATIVE(irq, restore_fl, "push %eax; popf");
> > -DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
> > -DEF_NATIVE(cpu, iret, "iret");
> > -DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
> > -DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
> > -DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
> 
> > +static const struct patch_xxl patch_data_xxl = {
> > +	.irq_irq_disable	= { 0xfa },		// cli
> > +	.irq_irq_enable		= { 0xfb },		// sti
> > +	.irq_save_fl		= { 0x9c, 0x58 },	// pushf; pop %[re]ax
> > +	.mmu_read_cr2		= { 0x0f, 0x20, 0xd0 },	// mov %cr2, %[re]ax
> > +	.mmu_read_cr3		= { 0x0f, 0x20, 0xd8 },	// mov %cr3, %[re]ax
> > +# ifdef CONFIG_X86_64
> > +	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
> > +	.mmu_write_cr3		= { 0x0f, 0x22, 0xdf },	// mov %rdi, %cr3
> > +	.cpu_wbinvd		= { 0x0f, 0x09 },	// wbinvd
> > +	.cpu_usergs_sysret64	= { 0x0f, 0x01, 0xf8,
> > +				    0x48, 0x0f, 0x07 },	// swapgs; sysretq
> > +	.cpu_swapgs		= { 0x0f, 0x01, 0xf8 },	// swapgs
> > +	.mov64			= { 0x48, 0x89, 0xf8 },	// mov %rdi, %rax
> > +# else
> > +	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
> > +	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
> > +	.cpu_iret		= { 0xcf },		// iret
> > +# endif
> 
> I think these open-coded hexa versions are somewhat fragile as well, how 
> about putting these into a .S file and controlling the sections in an LTO 
> safe manner there?
> 
> That will also allow us to write proper asm, and global labels can be 
> used to extract the patchlets and their length?

While I'm not fan either; I think that will be worse still, because it
splits the information over multiple files.

The advantage of this form is that it is clear how long the instructions
are, which is important for the patching. These immediates have to be
shorter than 5 bytes because they overwrite the CALL/JMP to the paravirt
function.

/me eyes .cpu_usergs_sysret64 and goes wtf..

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering
  2019-04-25  7:22     ` Thomas Gleixner
  2019-04-25  7:46       ` Juergen Gross
@ 2019-04-25  8:10       ` Ingo Molnar
  2019-04-25  9:17         ` [PATCH] x86/paravirt: Detect oversized patching bugs as they happen and BUG_ON() to avoid later crashes Ingo Molnar
  2019-05-24  8:01         ` [tip:x86/paravirt] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering tip-bot for Ingo Molnar
  1 sibling, 2 replies; 28+ messages in thread
From: Ingo Molnar @ 2019-04-25  8:10 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, x86, Juergen Gross, Andi Kleen


* Thomas Gleixner <tglx@linutronix.de> wrote:

> On Thu, 25 Apr 2019, Ingo Molnar wrote:
> > > +# else
> > > +	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
> > > +	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
> > > +	.cpu_iret		= { 0xcf },		// iret
> > > +# endif
> > 
> > I think these open-coded hexa versions are somewhat fragile as well, how 
> > about putting these into a .S file and controlling the sections in an LTO 
> > safe manner there?
> > 
> > That will also allow us to write proper asm, and global labels can be 
> > used to extract the patchlets and their length?
> 
> We are not changing these any other day and I really don't see a reason to
> have these things global just because.

Firstly, I'm not sure I understand the objection to using symbols: in an 
average distro kernel we have more than 75,000 symbols - as long as the 
namespace is clear they are there to be used, there's nothing wrong with 
them per se.

Secondly, my main point is that putting these code patching sequences 
into the .S it's easy to verify that the instructions and patchlets are 
what we claim them to be.

Right now they are randomly ordered data tables that don't match to the 
source code.

I know, because I tried. :-)

Here's the objdump -D output of the PATCH_XXL data table:

0000000000000010 <patch_data_xxl>:
  10:   fa                      cli    
  11:   fb                      sti    
  12:   57                      push   %rdi
  13:   9d                      popfq  
  14:   9c                      pushfq 
  15:   58                      pop    %rax
  16:   0f 20 d0                mov    %cr2,%rax
  19:   0f 20 d8                mov    %cr3,%rax
  1c:   0f 22 df                mov    %rdi,%cr3
  1f:   0f 09                   wbinvd 
  21:   0f 01 f8                swapgs 
  24:   48 0f 07                sysretq 
  27:   0f 01 f8                swapgs 
  2a:   48 89 f8                mov    %rdi,%rax

Note how this doesn't match up to the source code:

static const struct patch_xxl patch_data_xxl = {
        .irq_irq_disable        = { 0xfa },             // cli
        .irq_irq_enable         = { 0xfb },             // sti
        .irq_save_fl            = { 0x9c, 0x58 },       // pushf; pop %[re]ax
        .mmu_read_cr2           = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
        .mmu_read_cr3           = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
# ifdef CONFIG_X86_64
        .irq_restore_fl         = { 0x57, 0x9d },       // push %rdi; popfq
        .mmu_write_cr3          = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
        .cpu_wbinvd             = { 0x0f, 0x09 },       // wbinvd
        .cpu_usergs_sysret64    = { 0x0f, 0x01, 0xf8,
                                    0x48, 0x0f, 0x07 }, // swapgs; sysretq
        .cpu_swapgs             = { 0x0f, 0x01, 0xf8 }, // swapgs
        .mov64                  = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
# else
        .irq_restore_fl         = { 0x50, 0x9d },       // push %eax; popf
        .mmu_write_cr3          = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3
        .cpu_iret               = { 0xcf },             // iret
# endif
};

Note how they are reordered: in the generated code .irq_restore_fl comes 
before .irq_save_fl, etc. This is because the field ordering in struct 
patch_xxl does not match the initialization ordering of patch_data_xxl.

Third, beyond readability there's another advantage of my suggested 
approach as well: for example that way we could verify the passed in 
length with the patchlet length. Right now it's completely unverified:

        case PARAVIRT_PATCH(ops.m):                                     \
                return PATCH(data, ops##_##m, ibuf, len)

right now we don't check whether the 'len' passed in by the usage site 
matches the actual structure field length.

Although maybe we could do that with your C space structure as well.

Anyway, no strong feelings and I didn't want to create extra work for you 
- but I think at minimum we should do the patch below, which matches up 
the initialization order with the definition order - this at least makes 
the disassembly reviewable:

0000000000000010 <patch_data_xxl>:
  10:   fa                      cli    
  11:   fb                      sti    
  12:   9c                      pushfq 
  13:   58                      pop    %rax
  14:   0f 20 d0                mov    %cr2,%rax
  17:   0f 20 d8                mov    %cr3,%rax
  1a:   0f 22 df                mov    %rdi,%cr3
  1d:   57                      push   %rdi
  1e:   9d                      popfq  
  1f:   0f 09                   wbinvd 
  21:   0f 01 f8                swapgs 
  24:   48 0f 07                sysretq 
  27:   0f 01 f8                swapgs 
  2a:   48 89 f8                mov    %rdi,%rax

And yes, with that applied it verifies 100%. :-)

Thanks,

	Ingo

Signed-off-by: Ingo Molnar <mingo@kernel.org>

--- tip.orig/arch/x86/kernel/paravirt_patch.c
+++ tip/arch/x86/kernel/paravirt_patch.c
@@ -21,11 +21,11 @@
 struct patch_xxl {
 	const unsigned char	irq_irq_disable[1];
 	const unsigned char	irq_irq_enable[1];
-	const unsigned char	irq_restore_fl[2];
 	const unsigned char	irq_save_fl[2];
 	const unsigned char	mmu_read_cr2[3];
 	const unsigned char	mmu_read_cr3[3];
 	const unsigned char	mmu_write_cr3[3];
+	const unsigned char	irq_restore_fl[2];
 # ifdef CONFIG_X86_64
 	const unsigned char	cpu_wbinvd[2];
 	const unsigned char	cpu_usergs_sysret64[6];
@@ -43,16 +43,16 @@ static const struct patch_xxl patch_data
 	.mmu_read_cr2		= { 0x0f, 0x20, 0xd0 },	// mov %cr2, %[re]ax
 	.mmu_read_cr3		= { 0x0f, 0x20, 0xd8 },	// mov %cr3, %[re]ax
 # ifdef CONFIG_X86_64
-	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
 	.mmu_write_cr3		= { 0x0f, 0x22, 0xdf },	// mov %rdi, %cr3
+	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
 	.cpu_wbinvd		= { 0x0f, 0x09 },	// wbinvd
 	.cpu_usergs_sysret64	= { 0x0f, 0x01, 0xf8,
 				    0x48, 0x0f, 0x07 },	// swapgs; sysretq
 	.cpu_swapgs		= { 0x0f, 0x01, 0xf8 },	// swapgs
 	.mov64			= { 0x48, 0x89, 0xf8 },	// mov %rdi, %rax
 # else
-	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
 	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
+	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
 	.cpu_iret		= { 0xcf },		// iret
 # endif
 };

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 3/3] x86/paravirt: Replace paravirt patch asm magic
  2019-04-25  8:08     ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Peter Zijlstra
@ 2019-04-25  8:19       ` Peter Zijlstra
  2019-04-25  9:20       ` Ingo Molnar
  1 sibling, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2019-04-25  8:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen

On Thu, Apr 25, 2019 at 10:08:10AM +0200, Peter Zijlstra wrote:
> On Thu, Apr 25, 2019 at 08:52:09AM +0200, Ingo Molnar wrote:
> 
> > > -# ifdef CONFIG_PARAVIRT_XXL
> > > -DEF_NATIVE(irq, irq_disable, "cli");
> > > -DEF_NATIVE(irq, irq_enable, "sti");
> > > -DEF_NATIVE(irq, restore_fl, "push %eax; popf");
> > > -DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
> > > -DEF_NATIVE(cpu, iret, "iret");
> > > -DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
> > > -DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
> > > -DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
> > 
> > > +static const struct patch_xxl patch_data_xxl = {
> > > +	.irq_irq_disable	= { 0xfa },		// cli
> > > +	.irq_irq_enable		= { 0xfb },		// sti
> > > +	.irq_save_fl		= { 0x9c, 0x58 },	// pushf; pop %[re]ax
> > > +	.mmu_read_cr2		= { 0x0f, 0x20, 0xd0 },	// mov %cr2, %[re]ax
> > > +	.mmu_read_cr3		= { 0x0f, 0x20, 0xd8 },	// mov %cr3, %[re]ax
> > > +# ifdef CONFIG_X86_64
> > > +	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
> > > +	.mmu_write_cr3		= { 0x0f, 0x22, 0xdf },	// mov %rdi, %cr3
> > > +	.cpu_wbinvd		= { 0x0f, 0x09 },	// wbinvd
> > > +	.cpu_usergs_sysret64	= { 0x0f, 0x01, 0xf8,
> > > +				    0x48, 0x0f, 0x07 },	// swapgs; sysretq
> > > +	.cpu_swapgs		= { 0x0f, 0x01, 0xf8 },	// swapgs
> > > +	.mov64			= { 0x48, 0x89, 0xf8 },	// mov %rdi, %rax
> > > +# else
> > > +	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
> > > +	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
> > > +	.cpu_iret		= { 0xcf },		// iret
> > > +# endif
> > 
> > I think these open-coded hexa versions are somewhat fragile as well, how 
> > about putting these into a .S file and controlling the sections in an LTO 
> > safe manner there?
> > 
> > That will also allow us to write proper asm, and global labels can be 
> > used to extract the patchlets and their length?
> 
> While I'm not fan either; I think that will be worse still, because it
> splits the information over multiple files.
> 
> The advantage of this form is that it is clear how long the instructions
> are, which is important for the patching. These immediates have to be
> shorter than 5 bytes because they overwrite the CALL/JMP to the paravirt
> function.
> 
> /me eyes .cpu_usergs_sysret64 and goes wtf..

OK, my bad, the indirect CALL/JMP is 6 bytes.. phew.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH] x86/paravirt: Detect oversized patching bugs as they happen and BUG_ON() to avoid later crashes
  2019-04-25  8:10       ` [PATCH] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering Ingo Molnar
@ 2019-04-25  9:17         ` Ingo Molnar
  2019-04-25  9:21           ` Peter Zijlstra
  2019-05-24  7:58           ` [tip:x86/paravirt] x86/paravirt: Detect over-sized patching bugs in paravirt_patch_insns() tip-bot for Ingo Molnar
  2019-05-24  8:01         ` [tip:x86/paravirt] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering tip-bot for Ingo Molnar
  1 sibling, 2 replies; 28+ messages in thread
From: Ingo Molnar @ 2019-04-25  9:17 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: LKML, x86, Juergen Gross, Andi Kleen, Peter Zijlstra


* Ingo Molnar <mingo@kernel.org> wrote:

> Third, beyond readability there's another advantage of my suggested 
> approach as well: for example that way we could verify the passed in 
> length with the patchlet length. Right now it's completely unverified:
> 
>         case PARAVIRT_PATCH(ops.m):                                     \
>                 return PATCH(data, ops##_##m, ibuf, len)
> 
> right now we don't check whether the 'len' passed in by the usage site 
> matches the actual structure field length.
> 
> Although maybe we could do that with your C space structure as well.

So I was wrong here, got confused by the 'len' name which doesn't mean 
what it suggests: it's not the length of the patchlet, but the 
maximum/original length of the patch site - which we trim down in 
paravirt_patch_insns():

unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
                              const char *start, const char *end)
{
        unsigned insn_len = end - start;

        if (insn_len > len || start == NULL)
                insn_len = len;
        else
                memcpy(insnbuf, start, insn_len);

        return insn_len;
}

What is the logic behind silently returning 'len' here and not copying 
anything?

It basically means that we silently won't do any patching and the kernel 
will crash later on in mysterious ways, because paravirt patching is 
usually relied on.

Instead I think we should BUG_ON() that condition with the patch below - 
there's no way to continue successfully at that point.

I've tested this patch, with the vanilla kernel check never triggers, and 
if I intentionally increase the size of one of the patch templates to a 
too high value the assert triggers:

[    0.164385] kernel BUG at arch/x86/kernel/paravirt.c:167!

Without this patch a broken kernel randomly crashes in later places, 
after the silent patching failure.

Thanks,

	Ingo

Signed-off-by: Ingo Molnar <mingo@kernel.org>

--- tip.orig/arch/x86/kernel/paravirt.c
+++ tip/arch/x86/kernel/paravirt.c
@@ -163,10 +163,10 @@ unsigned paravirt_patch_insns(void *insn
 {
 	unsigned insn_len = end - start;
 
-	if (insn_len > len || start == NULL)
-		insn_len = len;
-	else
-		memcpy(insnbuf, start, insn_len);
+	/* Alternative instruction is too large for the patch site and we cannot continue: */
+	BUG_ON(insn_len > len || start == NULL);
+
+	memcpy(insnbuf, start, insn_len);
 
 	return insn_len;
 }

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [patch 3/3] x86/paravirt: Replace paravirt patch asm magic
  2019-04-25  8:08     ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Peter Zijlstra
  2019-04-25  8:19       ` Peter Zijlstra
@ 2019-04-25  9:20       ` Ingo Molnar
  1 sibling, 0 replies; 28+ messages in thread
From: Ingo Molnar @ 2019-04-25  9:20 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen


* Peter Zijlstra <peterz@infradead.org> wrote:

> > I think these open-coded hexa versions are somewhat fragile as well, how 
> > about putting these into a .S file and controlling the sections in an LTO 
> > safe manner there?
> > 
> > That will also allow us to write proper asm, and global labels can be 
> > used to extract the patchlets and their length?
> 
> While I'm not fan either; I think that will be worse still, because it
> splits the information over multiple files.

Yeah, so that's a drawback of the .S files.

> The advantage of this form is that it is clear how long the instructions
> are, which is important for the patching. These immediates have to be
> shorter than 5 bytes because they overwrite the CALL/JMP to the paravirt
> function.
> 
> /me eyes .cpu_usergs_sysret64 and goes wtf..

I just posted a patch that adds an assert to detect too large patching 
attempt: we'd silently ignore them before, which isn't healthy.

With the two patches I now like the .c version better.

Thomas, want me to organize all these changes, or do you want to?
1
Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] x86/paravirt: Detect oversized patching bugs as they happen and BUG_ON() to avoid later crashes
  2019-04-25  9:17         ` [PATCH] x86/paravirt: Detect oversized patching bugs as they happen and BUG_ON() to avoid later crashes Ingo Molnar
@ 2019-04-25  9:21           ` Peter Zijlstra
  2019-04-25  9:50             ` x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call() Ingo Molnar
  2019-05-24  7:58           ` [tip:x86/paravirt] x86/paravirt: Detect over-sized patching bugs in paravirt_patch_insns() tip-bot for Ingo Molnar
  1 sibling, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2019-04-25  9:21 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen

On Thu, Apr 25, 2019 at 11:17:17AM +0200, Ingo Molnar wrote:
> It basically means that we silently won't do any patching and the kernel 
> will crash later on in mysterious ways, because paravirt patching is 
> usually relied on.

That's OK. The compiler emits an indirect CALL/JMP to the pv_ops
structure contents. That _should_ stay valid and function correctly at
all times.

Not patching should at the very least cause a WARN with RETPOLINE
kernels though, we hard rely on the patching actually working and
writing at least a direct call.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  2019-04-25  9:21           ` Peter Zijlstra
@ 2019-04-25  9:50             ` Ingo Molnar
  2019-04-25 10:22               ` Peter Zijlstra
  2019-05-24  7:59               ` [tip:x86/paravirt] " tip-bot for Ingo Molnar
  0 siblings, 2 replies; 28+ messages in thread
From: Ingo Molnar @ 2019-04-25  9:50 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Thu, Apr 25, 2019 at 11:17:17AM +0200, Ingo Molnar wrote:
> > It basically means that we silently won't do any patching and the kernel 
> > will crash later on in mysterious ways, because paravirt patching is 
> > usually relied on.
> 
> That's OK. The compiler emits an indirect CALL/JMP to the pv_ops
> structure contents. That _should_ stay valid and function correctly at
> all times.

It might result in a correctly executing kernel in terms of code 
generation, but it doesn't result in a viable kernel: some of the places 
rely on the patching going through and don't know what to do when it 
doesn't and misbehave or crash in interesting ways.

Guess how I know this. ;-)

> Not patching should at the very least cause a WARN with RETPOLINE 
> kernels though, we hard rely on the patching actually working and 
> writing at least a direct call.

We hard rely in other places too.

How about just BUG_ON()-ing in paravirt_patch_call() as well? It's not 
like these are *supposed* to fail, and if they do we want to know it, 
even if the outcome might be more benign in some cases?

I.e. how about the patch below? This not only makes it more apparent when 
patching fails, it also makes the kernel smaller and removes an #ifdef 
ugly.

I tried it with a richly paravirt-enabled kernel and no patching bugs 
were detected.

Thanks,

	Ingo

Signed-off-by: Ingo Molnar <mingo@kernel.org>

---
 arch/x86/kernel/paravirt.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7f9121f2fdac..544d386ded45 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -73,21 +73,21 @@ struct branch {
 static unsigned paravirt_patch_call(void *insnbuf, const void *target,
 				    unsigned long addr, unsigned len)
 {
+	const int call_len = 5;
 	struct branch *b = insnbuf;
-	unsigned long delta = (unsigned long)target - (addr+5);
+	unsigned long delta = (unsigned long)target - (addr+call_len);
 
-	if (len < 5) {
-#ifdef CONFIG_RETPOLINE
-		WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
-#endif
-		return len;	/* call too long for patch site */
+	if (len < call_len) {
+		pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
+		/* Kernel might not be viable if patching fails, bail out: */
+		BUG_ON(1);
 	}
 
 	b->opcode = 0xe8; /* call */
 	b->delta = delta;
-	BUILD_BUG_ON(sizeof(*b) != 5);
+	BUILD_BUG_ON(sizeof(*b) != call_len);
 
-	return 5;
+	return call_len;
 }
 
 #ifdef CONFIG_PARAVIRT_XXL

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  2019-04-25  9:50             ` x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call() Ingo Molnar
@ 2019-04-25 10:22               ` Peter Zijlstra
  2019-04-25 10:57                 ` Ingo Molnar
  2019-05-24  7:59               ` [tip:x86/paravirt] " tip-bot for Ingo Molnar
  1 sibling, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2019-04-25 10:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen

On Thu, Apr 25, 2019 at 11:50:39AM +0200, Ingo Molnar wrote:
> 
> * Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > On Thu, Apr 25, 2019 at 11:17:17AM +0200, Ingo Molnar wrote:
> > > It basically means that we silently won't do any patching and the kernel 
> > > will crash later on in mysterious ways, because paravirt patching is 
> > > usually relied on.
> > 
> > That's OK. The compiler emits an indirect CALL/JMP to the pv_ops
> > structure contents. That _should_ stay valid and function correctly at
> > all times.
> 
> It might result in a correctly executing kernel in terms of code 
> generation, but it doesn't result in a viable kernel: some of the places 
> rely on the patching going through and don't know what to do when it 
> doesn't and misbehave or crash in interesting ways.
> 
> Guess how I know this. ;-)

What sites would that be? It really should work AFAIK.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  2019-04-25 10:22               ` Peter Zijlstra
@ 2019-04-25 10:57                 ` Ingo Molnar
  2019-04-25 11:30                   ` Juergen Gross
  2019-04-25 11:40                   ` Peter Zijlstra
  0 siblings, 2 replies; 28+ messages in thread
From: Ingo Molnar @ 2019-04-25 10:57 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Thu, Apr 25, 2019 at 11:50:39AM +0200, Ingo Molnar wrote:
> > 
> > * Peter Zijlstra <peterz@infradead.org> wrote:
> > 
> > > On Thu, Apr 25, 2019 at 11:17:17AM +0200, Ingo Molnar wrote:
> > > > It basically means that we silently won't do any patching and the kernel 
> > > > will crash later on in mysterious ways, because paravirt patching is 
> > > > usually relied on.
> > > 
> > > That's OK. The compiler emits an indirect CALL/JMP to the pv_ops
> > > structure contents. That _should_ stay valid and function correctly at
> > > all times.
> > 
> > It might result in a correctly executing kernel in terms of code 
> > generation, but it doesn't result in a viable kernel: some of the places 
> > rely on the patching going through and don't know what to do when it 
> > doesn't and misbehave or crash in interesting ways.
> > 
> > Guess how I know this. ;-)
> 
> What sites would that be? It really should work AFAIK.

So for example I tried to increasing the size of one of the struct 
patch_xxl members:

--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -28,7 +28,7 @@ struct patch_xxl {
 	const unsigned char	irq_restore_fl[2];
 # ifdef CONFIG_X86_64
 	const unsigned char	cpu_wbinvd[2];
-	const unsigned char	cpu_usergs_sysret64[6];
+	const unsigned char	cpu_usergs_sysret64[60];
 	const unsigned char	cpu_swapgs[3];
 	const unsigned char	mov64[3];
 # else

Which with the vanilla kernel crashes on boot much, much later:

[    2.478026] PANIC: double fault, error_code: 0x0

But in any case, even if many of the others will work if the patching 
fails silently, is there any case where we'd treat patching failure as an 
acceptable case?

BUG_ON() in paravirt kernels is an easily debuggable condition and beats 
the above kinds of symptoms. But I can turn it into a WARN_ON_ONCE() if 
you think that's better?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  2019-04-25 10:57                 ` Ingo Molnar
@ 2019-04-25 11:30                   ` Juergen Gross
  2019-04-25 12:30                     ` Juergen Gross
  2019-04-25 11:40                   ` Peter Zijlstra
  1 sibling, 1 reply; 28+ messages in thread
From: Juergen Gross @ 2019-04-25 11:30 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra; +Cc: Thomas Gleixner, LKML, x86, Andi Kleen

On 25/04/2019 12:57, Ingo Molnar wrote:
> 
> * Peter Zijlstra <peterz@infradead.org> wrote:
> 
>> On Thu, Apr 25, 2019 at 11:50:39AM +0200, Ingo Molnar wrote:
>>>
>>> * Peter Zijlstra <peterz@infradead.org> wrote:
>>>
>>>> On Thu, Apr 25, 2019 at 11:17:17AM +0200, Ingo Molnar wrote:
>>>>> It basically means that we silently won't do any patching and the kernel 
>>>>> will crash later on in mysterious ways, because paravirt patching is 
>>>>> usually relied on.
>>>>
>>>> That's OK. The compiler emits an indirect CALL/JMP to the pv_ops
>>>> structure contents. That _should_ stay valid and function correctly at
>>>> all times.
>>>
>>> It might result in a correctly executing kernel in terms of code 
>>> generation, but it doesn't result in a viable kernel: some of the places 
>>> rely on the patching going through and don't know what to do when it 
>>> doesn't and misbehave or crash in interesting ways.
>>>
>>> Guess how I know this. ;-)
>>
>> What sites would that be? It really should work AFAIK.
> 
> So for example I tried to increasing the size of one of the struct 
> patch_xxl members:
> 
> --- a/arch/x86/kernel/paravirt_patch.c
> +++ b/arch/x86/kernel/paravirt_patch.c
> @@ -28,7 +28,7 @@ struct patch_xxl {
>  	const unsigned char	irq_restore_fl[2];
>  # ifdef CONFIG_X86_64
>  	const unsigned char	cpu_wbinvd[2];
> -	const unsigned char	cpu_usergs_sysret64[6];
> +	const unsigned char	cpu_usergs_sysret64[60];
>  	const unsigned char	cpu_swapgs[3];
>  	const unsigned char	mov64[3];
>  # else
> 
> Which with the vanilla kernel crashes on boot much, much later:
> 
> [    2.478026] PANIC: double fault, error_code: 0x0

Sure, there is no NOP padding applied. Pre-populating the area with
1 byte NOPs would avoid the crash.

> But in any case, even if many of the others will work if the patching 
> fails silently, is there any case where we'd treat patching failure as an 
> acceptable case?
> 
> BUG_ON() in paravirt kernels is an easily debuggable condition and beats 
> the above kinds of symptoms. But I can turn it into a WARN_ON_ONCE() if 
> you think that's better?

I'd prefer the BUG_ON(). Its not as if those conditions will occur on
very few machines only. In case some patching isn't working we should
catch those issues early.


Juergen

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  2019-04-25 10:57                 ` Ingo Molnar
  2019-04-25 11:30                   ` Juergen Gross
@ 2019-04-25 11:40                   ` Peter Zijlstra
  2019-04-25 12:30                     ` Peter Zijlstra
  1 sibling, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2019-04-25 11:40 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen

On Thu, Apr 25, 2019 at 12:57:45PM +0200, Ingo Molnar wrote:
> * Peter Zijlstra <peterz@infradead.org> wrote:
> > On Thu, Apr 25, 2019 at 11:50:39AM +0200, Ingo Molnar wrote:
> > > * Peter Zijlstra <peterz@infradead.org> wrote:
> > > > On Thu, Apr 25, 2019 at 11:17:17AM +0200, Ingo Molnar wrote:
> > > > > It basically means that we silently won't do any patching and the kernel 
> > > > > will crash later on in mysterious ways, because paravirt patching is 
> > > > > usually relied on.
> > > > 
> > > > That's OK. The compiler emits an indirect CALL/JMP to the pv_ops
> > > > structure contents. That _should_ stay valid and function correctly at
> > > > all times.
> > > 
> > > It might result in a correctly executing kernel in terms of code 
> > > generation, but it doesn't result in a viable kernel: some of the places 
> > > rely on the patching going through and don't know what to do when it 
> > > doesn't and misbehave or crash in interesting ways.
> > > 
> > > Guess how I know this. ;-)
> > 
> > What sites would that be? It really should work AFAIK.
> 
> So for example I tried to increasing the size of one of the struct 
> patch_xxl members:
> 
> --- a/arch/x86/kernel/paravirt_patch.c
> +++ b/arch/x86/kernel/paravirt_patch.c
> @@ -28,7 +28,7 @@ struct patch_xxl {
>  	const unsigned char	irq_restore_fl[2];
>  # ifdef CONFIG_X86_64
>  	const unsigned char	cpu_wbinvd[2];
> -	const unsigned char	cpu_usergs_sysret64[6];
> +	const unsigned char	cpu_usergs_sysret64[60];
>  	const unsigned char	cpu_swapgs[3];
>  	const unsigned char	mov64[3];
>  # else

So this then fails to patch the immediate; but the compiler emitted:

175:       ff 25 00 00 00 00       jmpq   *0x0(%rip)        # 17b <syscall_return_via_sysret+0x75>
		177: R_X86_64_PC32      pv_ops+0xfc

and pv_ops+0xfc is (+4 because of reloc magic):

void               (*usergs_sysret64)(void);                             /* 0x100   0x8 */

which defaults to:

arch/x86/kernel/paravirt.c:     .cpu.usergs_sysret64    = native_usergs_sysret64,

which in turn reads like:

0000000000000000 <native_usergs_sysret64>:
0:       0f 01 f8                swapgs
3:       48 0f 07                sysretq

So I _really_ don't understand how:

> Which with the vanilla kernel crashes on boot much, much later:
> 
> [    2.478026] PANIC: double fault, error_code: 0x0

happens.

> But in any case, even if many of the others will work if the patching 
> fails silently, is there any case where we'd treat patching failure as an 
> acceptable case?

It really should just work. And we need to figure out why it comes
unstuck. Can you print the code when it fails patching?

> BUG_ON() in paravirt kernels is an easily debuggable condition and beats 
> the above kinds of symptoms. But I can turn it into a WARN_ON_ONCE() if 
> you think that's better?

Not patching should be a performance issue; not a correctness issue, as
per the above. So WARN is the right thing.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  2019-04-25 11:30                   ` Juergen Gross
@ 2019-04-25 12:30                     ` Juergen Gross
  0 siblings, 0 replies; 28+ messages in thread
From: Juergen Gross @ 2019-04-25 12:30 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra; +Cc: Thomas Gleixner, LKML, x86, Andi Kleen

On 25/04/2019 13:30, Juergen Gross wrote:
> On 25/04/2019 12:57, Ingo Molnar wrote:
>>
>> * Peter Zijlstra <peterz@infradead.org> wrote:
>>
>>> On Thu, Apr 25, 2019 at 11:50:39AM +0200, Ingo Molnar wrote:
>>>>
>>>> * Peter Zijlstra <peterz@infradead.org> wrote:
>>>>
>>>>> On Thu, Apr 25, 2019 at 11:17:17AM +0200, Ingo Molnar wrote:
>>>>>> It basically means that we silently won't do any patching and the kernel 
>>>>>> will crash later on in mysterious ways, because paravirt patching is 
>>>>>> usually relied on.
>>>>>
>>>>> That's OK. The compiler emits an indirect CALL/JMP to the pv_ops
>>>>> structure contents. That _should_ stay valid and function correctly at
>>>>> all times.
>>>>
>>>> It might result in a correctly executing kernel in terms of code 
>>>> generation, but it doesn't result in a viable kernel: some of the places 
>>>> rely on the patching going through and don't know what to do when it 
>>>> doesn't and misbehave or crash in interesting ways.
>>>>
>>>> Guess how I know this. ;-)
>>>
>>> What sites would that be? It really should work AFAIK.
>>
>> So for example I tried to increasing the size of one of the struct 
>> patch_xxl members:
>>
>> --- a/arch/x86/kernel/paravirt_patch.c
>> +++ b/arch/x86/kernel/paravirt_patch.c
>> @@ -28,7 +28,7 @@ struct patch_xxl {
>>  	const unsigned char	irq_restore_fl[2];
>>  # ifdef CONFIG_X86_64
>>  	const unsigned char	cpu_wbinvd[2];
>> -	const unsigned char	cpu_usergs_sysret64[6];
>> +	const unsigned char	cpu_usergs_sysret64[60];
>>  	const unsigned char	cpu_swapgs[3];
>>  	const unsigned char	mov64[3];
>>  # else
>>
>> Which with the vanilla kernel crashes on boot much, much later:
>>
>> [    2.478026] PANIC: double fault, error_code: 0x0
> 
> Sure, there is no NOP padding applied. Pre-populating the area with
> 1 byte NOPs would avoid the crash.

This is wrong, of course.

But the indirect jmp is failing as struct pv_ops isn't mapped by the
user page tables.


Juergen

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  2019-04-25 11:40                   ` Peter Zijlstra
@ 2019-04-25 12:30                     ` Peter Zijlstra
  0 siblings, 0 replies; 28+ messages in thread
From: Peter Zijlstra @ 2019-04-25 12:30 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, LKML, x86, Juergen Gross, Andi Kleen

On Thu, Apr 25, 2019 at 01:40:04PM +0200, Peter Zijlstra wrote:
> So this then fails to patch the immediate; but the compiler emitted:
> 
> 175:       ff 25 00 00 00 00       jmpq   *0x0(%rip)        # 17b <syscall_return_via_sysret+0x75>
> 		177: R_X86_64_PC32      pv_ops+0xfc
> 
> and pv_ops+0xfc is (+4 because of reloc magic):
> 
> void               (*usergs_sysret64)(void);                             /* 0x100   0x8 */
> 
> which defaults to:
> 
> arch/x86/kernel/paravirt.c:     .cpu.usergs_sysret64    = native_usergs_sysret64,
> 
> which in turn reads like:
> 
> 0000000000000000 <native_usergs_sysret64>:
> 0:       0f 01 f8                swapgs
> 3:       48 0f 07                sysretq
> 
> So I _really_ don't understand how:
> 
> > Which with the vanilla kernel crashes on boot much, much later:
> > 
> > [    2.478026] PANIC: double fault, error_code: 0x0
> 
> happens.

Just spoke to Juergen on IRC; the most likely explanation is that
(because of PTI) the pv_ops table isn't mapped in the user page-tables
and therefore the indirect jump comes unstuck.



^ permalink raw reply	[flat|nested] 28+ messages in thread

* [tip:x86/paravirt] x86/paravirt: Remove bogus extern declarations
  2019-04-24 13:41 ` [patch 1/3] x86/paravirt: Remove bogus extern declarations Thomas Gleixner
  2019-04-25  7:31   ` [tip:x86/paravirt] " tip-bot for Thomas Gleixner
@ 2019-05-24  7:58   ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 28+ messages in thread
From: tip-bot for Thomas Gleixner @ 2019-05-24  7:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, jgross, brgerst, peterz, riel, tglx, luto, hpa, mingo,
	torvalds, dvlasenk, linux-kernel, dave.hansen

Commit-ID:  e05196401657cff3178dc392b739e520b26d4aef
Gitweb:     https://git.kernel.org/tip/e05196401657cff3178dc392b739e520b26d4aef
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Wed, 24 Apr 2019 15:41:16 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 25 Apr 2019 11:35:55 +0200

x86/paravirt: Remove bogus extern declarations

These functions are already declared in asm/paravirt.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Link: http://lkml.kernel.org/r/20190424134223.501598258@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/paravirt_patch_32.c | 3 ---
 arch/x86/kernel/paravirt_patch_64.c | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index de138d3912e4..05d771f81e74 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -23,9 +23,6 @@ DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
 unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 #define PATCH_SITE(ops, x)					\
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 9d9e04b31077..bd1558f90cfb 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -29,9 +29,6 @@ DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
 unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 #define PATCH_SITE(ops, x)					\

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [tip:x86/paravirt] x86/paravirt: Detect over-sized patching bugs in paravirt_patch_insns()
  2019-04-25  9:17         ` [PATCH] x86/paravirt: Detect oversized patching bugs as they happen and BUG_ON() to avoid later crashes Ingo Molnar
  2019-04-25  9:21           ` Peter Zijlstra
@ 2019-05-24  7:58           ` tip-bot for Ingo Molnar
  1 sibling, 0 replies; 28+ messages in thread
From: tip-bot for Ingo Molnar @ 2019-05-24  7:58 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, dave.hansen, a.p.zijlstra, mingo, riel, hpa,
	brgerst, torvalds, jgross, luto, tglx, bp, dvlasenk, peterz

Commit-ID:  2777cae2b19d4a08ad233b3504c19c6f7a6a2ef3
Gitweb:     https://git.kernel.org/tip/2777cae2b19d4a08ad233b3504c19c6f7a6a2ef3
Author:     Ingo Molnar <mingo@kernel.org>
AuthorDate: Thu, 25 Apr 2019 11:17:17 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 25 Apr 2019 12:00:31 +0200

x86/paravirt: Detect over-sized patching bugs in paravirt_patch_insns()

So paravirt_patch_insns() contains this gem of logic:

unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
                              const char *start, const char *end)
{
        unsigned insn_len = end - start;

        if (insn_len > len || start == NULL)
                insn_len = len;
        else
                memcpy(insnbuf, start, insn_len);

        return insn_len;
}

Note how 'len' (size of the original instruction) is checked against the new
instruction, and silently discarded with no warning printed whatsoever.

This crashes the kernel in funny ways if the patching template is buggy,
and usually in much later places.

Instead do a direct BUG_ON(), there's no way to continue successfully at that point.

I've tested this patch, with the vanilla kernel check never triggers, and
if I intentionally increase the size of one of the patch templates to a
too high value the assert triggers:

[    0.164385] kernel BUG at arch/x86/kernel/paravirt.c:167!

Without this patch a broken kernel randomly crashes in later places,
after the silent patching failure.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190425091717.GA72229@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/paravirt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c0e0101133f3..7f9121f2fdac 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -163,10 +163,10 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 {
 	unsigned insn_len = end - start;
 
-	if (insn_len > len || start == NULL)
-		insn_len = len;
-	else
-		memcpy(insnbuf, start, insn_len);
+	/* Alternative instruction is too large for the patch site and we cannot continue: */
+	BUG_ON(insn_len > len || start == NULL);
+
+	memcpy(insnbuf, start, insn_len);
 
 	return insn_len;
 }

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [tip:x86/paravirt] x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  2019-04-25  9:50             ` x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call() Ingo Molnar
  2019-04-25 10:22               ` Peter Zijlstra
@ 2019-05-24  7:59               ` tip-bot for Ingo Molnar
  1 sibling, 0 replies; 28+ messages in thread
From: tip-bot for Ingo Molnar @ 2019-05-24  7:59 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, brgerst, jgross, hpa, peterz, bp, tglx, dvlasenk,
	linux-kernel, luto, torvalds

Commit-ID:  11e86dc7f2746210f9c7dc10deaa7658f8dc8350
Gitweb:     https://git.kernel.org/tip/11e86dc7f2746210f9c7dc10deaa7658f8dc8350
Author:     Ingo Molnar <mingo@kernel.org>
AuthorDate: Thu, 25 Apr 2019 11:50:39 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 25 Apr 2019 12:00:44 +0200

x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()

paravirt_patch_call() currently handles patching failures inconsistently:
we generate a warning in the retpoline case, but don't in other cases where
we might end up with a non-working kernel as well.

So just convert it all to a BUG_ON(), these patching calls are *not* supposed
to fail, and if they do we want to know it immediately.

This also makes the kernel smaller and removes an #ifdef ugly.

I tried it with a richly paravirt-enabled kernel and no patching bugs
were detected.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190425095039.GC115378@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/paravirt.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7f9121f2fdac..544d386ded45 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -73,21 +73,21 @@ struct branch {
 static unsigned paravirt_patch_call(void *insnbuf, const void *target,
 				    unsigned long addr, unsigned len)
 {
+	const int call_len = 5;
 	struct branch *b = insnbuf;
-	unsigned long delta = (unsigned long)target - (addr+5);
+	unsigned long delta = (unsigned long)target - (addr+call_len);
 
-	if (len < 5) {
-#ifdef CONFIG_RETPOLINE
-		WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
-#endif
-		return len;	/* call too long for patch site */
+	if (len < call_len) {
+		pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
+		/* Kernel might not be viable if patching fails, bail out: */
+		BUG_ON(1);
 	}
 
 	b->opcode = 0xe8; /* call */
 	b->delta = delta;
-	BUILD_BUG_ON(sizeof(*b) != 5);
+	BUILD_BUG_ON(sizeof(*b) != call_len);
 
-	return 5;
+	return call_len;
 }
 
 #ifdef CONFIG_PARAVIRT_XXL

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [tip:x86/paravirt] x86/paravirt: Unify the 32/64 bit paravirt patching code
  2019-04-24 13:41 ` [patch 2/3] x86/paravirt: Unify 32/64 bit patch code Thomas Gleixner
  2019-04-25  7:32   ` [tip:x86/paravirt] " tip-bot for Thomas Gleixner
@ 2019-05-24  8:00   ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 28+ messages in thread
From: tip-bot for Thomas Gleixner @ 2019-05-24  8:00 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: dave.hansen, linux-kernel, bp, dvlasenk, luto, brgerst, riel,
	jgross, peterz, hpa, tglx, mingo, torvalds

Commit-ID:  fb2af0712fe8831dc152b0b5dd8bc516970da336
Gitweb:     https://git.kernel.org/tip/fb2af0712fe8831dc152b0b5dd8bc516970da336
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Wed, 24 Apr 2019 15:41:17 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 25 Apr 2019 12:00:44 +0200

x86/paravirt: Unify the 32/64 bit paravirt patching code

Large parts of these two files are identical. Merge them together.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Link: http://lkml.kernel.org/r/20190424134223.603491680@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/Makefile                           |  4 +-
 .../{paravirt_patch_64.c => paravirt_patch.c}      | 62 ++++++++++++++++-----
 arch/x86/kernel/paravirt_patch_32.c                | 64 ----------------------
 3 files changed, 50 insertions(+), 80 deletions(-)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 00b7e27bc2b7..62e78a3fd31e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o				:= n
 
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
-OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o	:= y
+OBJECT_FILES_NON_STANDARD_paravirt_patch.o		:= y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o		:= y
@@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB)		+= amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch.c
similarity index 59%
rename from arch/x86/kernel/paravirt_patch_64.c
rename to arch/x86/kernel/paravirt_patch.c
index bd1558f90cfb..a47899db9932 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+
 #include <asm/paravirt.h>
 #include <asm/asm-offsets.h>
-#include <linux/stringify.h>
 
-#ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_X86_64
+# ifdef CONFIG_PARAVIRT_XXL
 DEF_NATIVE(irq, irq_disable, "cli");
 DEF_NATIVE(irq, irq_enable, "sti");
 DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
@@ -12,24 +14,49 @@ DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
 DEF_NATIVE(cpu, wbinvd, "wbinvd");
-
 DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
 DEF_NATIVE(cpu, swapgs, "swapgs");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
+unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
 {
-	return paravirt_patch_insns(insnbuf, len,
-				    start__mov64, end__mov64);
+	return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64);
 }
-#endif
+# endif /* CONFIG_PARAVIRT_XXL */
 
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+# ifdef CONFIG_PARAVIRT_SPINLOCKS
 DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
+# endif
+
+#else /* CONFIG_X86_64 */
+
+# ifdef CONFIG_PARAVIRT_XXL
+DEF_NATIVE(irq, irq_disable, "cli");
+DEF_NATIVE(irq, irq_enable, "sti");
+DEF_NATIVE(irq, restore_fl, "push %eax; popf");
+DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
+DEF_NATIVE(cpu, iret, "iret");
+DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+
+unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
+{
+	/* arg in %edx:%eax, return in %edx:%eax */
+	return 0;
+}
+# endif /* CONFIG_PARAVIRT_XXL */
+
+# ifdef CONFIG_PARAVIRT_SPINLOCKS
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+# endif
+
+#endif /* !CONFIG_X86_64 */
 
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
+unsigned int native_patch(u8 type, void *ibuf, unsigned long addr,
+			  unsigned int len)
 {
 #define PATCH_SITE(ops, x)					\
 	case PARAVIRT_PATCH(ops.x):				\
@@ -41,14 +68,21 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 		PATCH_SITE(irq, save_fl);
 		PATCH_SITE(irq, irq_enable);
 		PATCH_SITE(irq, irq_disable);
-		PATCH_SITE(cpu, usergs_sysret64);
-		PATCH_SITE(cpu, swapgs);
-		PATCH_SITE(cpu, wbinvd);
+
 		PATCH_SITE(mmu, read_cr2);
 		PATCH_SITE(mmu, read_cr3);
 		PATCH_SITE(mmu, write_cr3);
+
+# ifdef CONFIG_X86_64
+		PATCH_SITE(cpu, usergs_sysret64);
+		PATCH_SITE(cpu, swapgs);
+		PATCH_SITE(cpu, wbinvd);
+# else
+		PATCH_SITE(cpu, iret);
+# endif
 #endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
 	case PARAVIRT_PATCH(lock.queued_spin_unlock):
 		if (pv_is_native_spin_unlock())
 			return paravirt_patch_insns(ibuf, len,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
deleted file mode 100644
index 05d771f81e74..000000000000
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ /dev/null
@@ -1,64 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
-	/* arg in %edx:%eax, return in %edx:%eax */
-	return 0;
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x)					\
-	case PARAVIRT_PATCH(ops.x):				\
-		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
-	switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
-		PATCH_SITE(irq, irq_disable);
-		PATCH_SITE(irq, irq_enable);
-		PATCH_SITE(irq, restore_fl);
-		PATCH_SITE(irq, save_fl);
-		PATCH_SITE(cpu, iret);
-		PATCH_SITE(mmu, read_cr2);
-		PATCH_SITE(mmu, read_cr3);
-		PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-	case PARAVIRT_PATCH(lock.queued_spin_unlock):
-		if (pv_is_native_spin_unlock())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_queued_spin_unlock,
-						    end_lock_queued_spin_unlock);
-		break;
-
-	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
-		if (pv_is_native_vcpu_is_preempted())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_vcpu_is_preempted,
-						    end_lock_vcpu_is_preempted);
-		break;
-#endif
-
-	default:
-		break;
-	}
-#undef PATCH_SITE
-	return paravirt_patch_default(type, ibuf, addr, len);
-}

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [tip:x86/paravirt] x86/paravirt: Replace the paravirt patch asm magic
  2019-04-24 13:41 ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Thomas Gleixner
  2019-04-25  6:52   ` Ingo Molnar
@ 2019-05-24  8:00   ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 28+ messages in thread
From: tip-bot for Thomas Gleixner @ 2019-05-24  8:00 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, mingo, dvlasenk, luto, dave.hansen, riel, linux-kernel, ak,
	peterz, hpa, bp, torvalds, jgross, brgerst

Commit-ID:  0b9d2fc1d0d628c94c6866a2ed3005c6730db512
Gitweb:     https://git.kernel.org/tip/0b9d2fc1d0d628c94c6866a2ed3005c6730db512
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Wed, 24 Apr 2019 15:41:18 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 25 Apr 2019 12:00:44 +0200

x86/paravirt: Replace the paravirt patch asm magic

The magic macro DEF_NATIVE() in the paravirt patching code uses inline
assembly to generate a data table for patching in the native instructions.

While clever this is falling apart with LTO and even aside of LTO the
construct is just working by chance according to GCC folks.

Aside of that the tables are constant data and not some form of magic
text.

As these constructs are not subject to frequent changes it is not a
maintenance issue to convert them to regular data tables which are
initialized with hex bytes.

Create a new set of macros and data structures to store the instruction
sequences and convert the code over.

Reported-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Link: http://lkml.kernel.org/r/20190424134223.690835713@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/paravirt_types.h |   4 -
 arch/x86/kernel/paravirt_patch.c      | 142 +++++++++++++++++++---------------
 2 files changed, 81 insertions(+), 65 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 2474e434a6f7..ae8d6ddfe39a 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -370,10 +370,6 @@ extern struct paravirt_patch_template pv_ops;
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
-#define DEF_NATIVE(ops, name, code)					\
-	__visible extern const char start_##ops##_##name[], end_##ops##_##name[];	\
-	asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
-
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
 unsigned paravirt_patch_default(u8 type, void *insnbuf,
 				unsigned long addr, unsigned len);
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
index a47899db9932..60e7a5e236c0 100644
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -4,103 +4,123 @@
 #include <asm/paravirt.h>
 #include <asm/asm-offsets.h>
 
-#ifdef CONFIG_X86_64
-# ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(cpu, wbinvd, "wbinvd");
-DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(cpu, swapgs, "swapgs");
-DEF_NATIVE(, mov64, "mov %rdi, %rax");
+#define PSTART(d, m)							\
+	patch_data_##d.m
 
-unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
-{
-	return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64);
-}
-# endif /* CONFIG_PARAVIRT_XXL */
+#define PEND(d, m)							\
+	(PSTART(d, m) + sizeof(patch_data_##d.m))
 
-# ifdef CONFIG_PARAVIRT_SPINLOCKS
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-# endif
+#define PATCH(d, m, ibuf, len)						\
+	paravirt_patch_insns(ibuf, len, PSTART(d, m), PEND(d, m))
 
-#else /* CONFIG_X86_64 */
+#define PATCH_CASE(ops, m, data, ibuf, len)				\
+	case PARAVIRT_PATCH(ops.m):					\
+		return PATCH(data, ops##_##m, ibuf, len)
 
-# ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+#ifdef CONFIG_PARAVIRT_XXL
+struct patch_xxl {
+	const unsigned char	irq_irq_disable[1];
+	const unsigned char	irq_irq_enable[1];
+	const unsigned char	irq_restore_fl[2];
+	const unsigned char	irq_save_fl[2];
+	const unsigned char	mmu_read_cr2[3];
+	const unsigned char	mmu_read_cr3[3];
+	const unsigned char	mmu_write_cr3[3];
+# ifdef CONFIG_X86_64
+	const unsigned char	cpu_wbinvd[2];
+	const unsigned char	cpu_usergs_sysret64[6];
+	const unsigned char	cpu_swapgs[3];
+	const unsigned char	mov64[3];
+# else
+	const unsigned char	cpu_iret[1];
+# endif
+};
+
+static const struct patch_xxl patch_data_xxl = {
+	.irq_irq_disable	= { 0xfa },		// cli
+	.irq_irq_enable		= { 0xfb },		// sti
+	.irq_save_fl		= { 0x9c, 0x58 },	// pushf; pop %[re]ax
+	.mmu_read_cr2		= { 0x0f, 0x20, 0xd0 },	// mov %cr2, %[re]ax
+	.mmu_read_cr3		= { 0x0f, 0x20, 0xd8 },	// mov %cr3, %[re]ax
+# ifdef CONFIG_X86_64
+	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
+	.mmu_write_cr3		= { 0x0f, 0x22, 0xdf },	// mov %rdi, %cr3
+	.cpu_wbinvd		= { 0x0f, 0x09 },	// wbinvd
+	.cpu_usergs_sysret64	= { 0x0f, 0x01, 0xf8,
+				    0x48, 0x0f, 0x07 },	// swapgs; sysretq
+	.cpu_swapgs		= { 0x0f, 0x01, 0xf8 },	// swapgs
+	.mov64			= { 0x48, 0x89, 0xf8 },	// mov %rdi, %rax
+# else
+	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
+	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
+	.cpu_iret		= { 0xcf },		// iret
+# endif
+};
 
 unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
 {
-	/* arg in %edx:%eax, return in %edx:%eax */
+#ifdef CONFIG_X86_64
+	return PATCH(xxl, mov64, insnbuf, len);
+#endif
 	return 0;
 }
 # endif /* CONFIG_PARAVIRT_XXL */
 
-# ifdef CONFIG_PARAVIRT_SPINLOCKS
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-# endif
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+struct patch_lock {
+	unsigned char queued_spin_unlock[3];
+	unsigned char vcpu_is_preempted[2];
+};
+
+static const struct patch_lock patch_data_lock = {
+	.vcpu_is_preempted	= { 0x31, 0xc0 },	// xor %eax, %eax
 
-#endif /* !CONFIG_X86_64 */
+# ifdef CONFIG_X86_64
+	.queued_spin_unlock	= { 0xc6, 0x07, 0x00 },	// movb $0, (%rdi)
+# else
+	.queued_spin_unlock	= { 0xc6, 0x00, 0x00 },	// movb $0, (%eax)
+# endif
+};
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
 
 unsigned int native_patch(u8 type, void *ibuf, unsigned long addr,
 			  unsigned int len)
 {
-#define PATCH_SITE(ops, x)					\
-	case PARAVIRT_PATCH(ops.x):				\
-		return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
 	switch (type) {
+
 #ifdef CONFIG_PARAVIRT_XXL
-		PATCH_SITE(irq, restore_fl);
-		PATCH_SITE(irq, save_fl);
-		PATCH_SITE(irq, irq_enable);
-		PATCH_SITE(irq, irq_disable);
+	PATCH_CASE(irq, restore_fl, xxl, ibuf, len);
+	PATCH_CASE(irq, save_fl, xxl, ibuf, len);
+	PATCH_CASE(irq, irq_enable, xxl, ibuf, len);
+	PATCH_CASE(irq, irq_disable, xxl, ibuf, len);
 
-		PATCH_SITE(mmu, read_cr2);
-		PATCH_SITE(mmu, read_cr3);
-		PATCH_SITE(mmu, write_cr3);
+	PATCH_CASE(mmu, read_cr2, xxl, ibuf, len);
+	PATCH_CASE(mmu, read_cr3, xxl, ibuf, len);
+	PATCH_CASE(mmu, write_cr3, xxl, ibuf, len);
 
 # ifdef CONFIG_X86_64
-		PATCH_SITE(cpu, usergs_sysret64);
-		PATCH_SITE(cpu, swapgs);
-		PATCH_SITE(cpu, wbinvd);
+	PATCH_CASE(cpu, usergs_sysret64, xxl, ibuf, len);
+	PATCH_CASE(cpu, swapgs, xxl, ibuf, len);
+	PATCH_CASE(cpu, wbinvd, xxl, ibuf, len);
 # else
-		PATCH_SITE(cpu, iret);
+	PATCH_CASE(cpu, iret, xxl, ibuf, len);
 # endif
 #endif
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 	case PARAVIRT_PATCH(lock.queued_spin_unlock):
 		if (pv_is_native_spin_unlock())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_queued_spin_unlock,
-						    end_lock_queued_spin_unlock);
+			return PATCH(lock, queued_spin_unlock, ibuf, len);
 		break;
 
 	case PARAVIRT_PATCH(lock.vcpu_is_preempted):
 		if (pv_is_native_vcpu_is_preempted())
-			return paravirt_patch_insns(ibuf, len,
-						    start_lock_vcpu_is_preempted,
-						    end_lock_vcpu_is_preempted);
+			return PATCH(lock, vcpu_is_preempted, ibuf, len);
 		break;
 #endif
-
 	default:
 		break;
 	}
-#undef PATCH_SITE
+
 	return paravirt_patch_default(type, ibuf, addr, len);
 }

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [tip:x86/paravirt] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering
  2019-04-25  8:10       ` [PATCH] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering Ingo Molnar
  2019-04-25  9:17         ` [PATCH] x86/paravirt: Detect oversized patching bugs as they happen and BUG_ON() to avoid later crashes Ingo Molnar
@ 2019-05-24  8:01         ` tip-bot for Ingo Molnar
  1 sibling, 0 replies; 28+ messages in thread
From: tip-bot for Ingo Molnar @ 2019-05-24  8:01 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: brgerst, torvalds, jgross, peterz, dvlasenk, linux-kernel, bp,
	hpa, luto, dave.hansen, mingo, riel, tglx

Commit-ID:  fc93dfd9345bb8b29a62b21cb0447dd1a3815f91
Gitweb:     https://git.kernel.org/tip/fc93dfd9345bb8b29a62b21cb0447dd1a3815f91
Author:     Ingo Molnar <mingo@kernel.org>
AuthorDate: Thu, 25 Apr 2019 10:10:12 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 25 Apr 2019 12:00:44 +0200

x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering

Here's the objdump -D output of the PATCH_XXL data table:

0000000000000010 <patch_data_xxl>:
  10:   fa                      cli
  11:   fb                      sti
  12:   57                      push   %rdi
  13:   9d                      popfq
  14:   9c                      pushfq
  15:   58                      pop    %rax
  16:   0f 20 d0                mov    %cr2,%rax
  19:   0f 20 d8                mov    %cr3,%rax
  1c:   0f 22 df                mov    %rdi,%cr3
  1f:   0f 09                   wbinvd
  21:   0f 01 f8                swapgs
  24:   48 0f 07                sysretq
  27:   0f 01 f8                swapgs
  2a:   48 89 f8                mov    %rdi,%rax

Note how this doesn't match up to the source code:

static const struct patch_xxl patch_data_xxl = {
        .irq_irq_disable        = { 0xfa },             // cli
        .irq_irq_enable         = { 0xfb },             // sti
        .irq_save_fl            = { 0x9c, 0x58 },       // pushf; pop %[re]ax
        .mmu_read_cr2           = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
        .mmu_read_cr3           = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
        .irq_restore_fl         = { 0x57, 0x9d },       // push %rdi; popfq
        .mmu_write_cr3          = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
        .cpu_wbinvd             = { 0x0f, 0x09 },       // wbinvd
        .cpu_usergs_sysret64    = { 0x0f, 0x01, 0xf8,
                                    0x48, 0x0f, 0x07 }, // swapgs; sysretq
        .cpu_swapgs             = { 0x0f, 0x01, 0xf8 }, // swapgs
        .mov64                  = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
        .irq_restore_fl         = { 0x50, 0x9d },       // push %eax; popf
        .mmu_write_cr3          = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3
        .cpu_iret               = { 0xcf },             // iret
};

Note how they are reordered: in the generated code .irq_restore_fl comes
before .irq_save_fl, etc. This is because the field ordering in struct
patch_xxl does not match the initialization ordering of patch_data_xxl.

Match up the initialization order with the definition order - this makes
the disassembly easily reviewable:

0000000000000010 <patch_data_xxl>:
  10:   fa                      cli
  11:   fb                      sti
  12:   9c                      pushfq
  13:   58                      pop    %rax
  14:   0f 20 d0                mov    %cr2,%rax
  17:   0f 20 d8                mov    %cr3,%rax
  1a:   0f 22 df                mov    %rdi,%cr3
  1d:   57                      push   %rdi
  1e:   9d                      popfq
  1f:   0f 09                   wbinvd
  21:   0f 01 f8                swapgs
  24:   48 0f 07                sysretq
  27:   0f 01 f8                swapgs
  2a:   48 89 f8                mov    %rdi,%rax

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190425081012.GA115378@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/paravirt_patch.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
index 60e7a5e236c0..37b1d43d1e17 100644
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -21,11 +21,11 @@
 struct patch_xxl {
 	const unsigned char	irq_irq_disable[1];
 	const unsigned char	irq_irq_enable[1];
-	const unsigned char	irq_restore_fl[2];
 	const unsigned char	irq_save_fl[2];
 	const unsigned char	mmu_read_cr2[3];
 	const unsigned char	mmu_read_cr3[3];
 	const unsigned char	mmu_write_cr3[3];
+	const unsigned char	irq_restore_fl[2];
 # ifdef CONFIG_X86_64
 	const unsigned char	cpu_wbinvd[2];
 	const unsigned char	cpu_usergs_sysret64[6];
@@ -43,16 +43,16 @@ static const struct patch_xxl patch_data_xxl = {
 	.mmu_read_cr2		= { 0x0f, 0x20, 0xd0 },	// mov %cr2, %[re]ax
 	.mmu_read_cr3		= { 0x0f, 0x20, 0xd8 },	// mov %cr3, %[re]ax
 # ifdef CONFIG_X86_64
-	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
 	.mmu_write_cr3		= { 0x0f, 0x22, 0xdf },	// mov %rdi, %cr3
+	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
 	.cpu_wbinvd		= { 0x0f, 0x09 },	// wbinvd
 	.cpu_usergs_sysret64	= { 0x0f, 0x01, 0xf8,
 				    0x48, 0x0f, 0x07 },	// swapgs; sysretq
 	.cpu_swapgs		= { 0x0f, 0x01, 0xf8 },	// swapgs
 	.mov64			= { 0x48, 0x89, 0xf8 },	// mov %rdi, %rax
 # else
-	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
 	.mmu_write_cr3		= { 0x0f, 0x22, 0xd8 },	// mov %eax, %cr3
+	.irq_restore_fl		= { 0x50, 0x9d },	// push %eax; popf
 	.cpu_iret		= { 0xcf },		// iret
 # endif
 };

^ permalink raw reply related	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2019-05-24  8:01 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-24 13:41 [patch 0/3] x86/paravirt: Rework paravirt patching Thomas Gleixner
2019-04-24 13:41 ` [patch 1/3] x86/paravirt: Remove bogus extern declarations Thomas Gleixner
2019-04-25  7:31   ` [tip:x86/paravirt] " tip-bot for Thomas Gleixner
2019-05-24  7:58   ` tip-bot for Thomas Gleixner
2019-04-24 13:41 ` [patch 2/3] x86/paravirt: Unify 32/64 bit patch code Thomas Gleixner
2019-04-25  7:32   ` [tip:x86/paravirt] " tip-bot for Thomas Gleixner
2019-05-24  8:00   ` [tip:x86/paravirt] x86/paravirt: Unify the 32/64 bit paravirt patching code tip-bot for Thomas Gleixner
2019-04-24 13:41 ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Thomas Gleixner
2019-04-25  6:52   ` Ingo Molnar
2019-04-25  7:22     ` Thomas Gleixner
2019-04-25  7:46       ` Juergen Gross
2019-04-25  8:10       ` [PATCH] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering Ingo Molnar
2019-04-25  9:17         ` [PATCH] x86/paravirt: Detect oversized patching bugs as they happen and BUG_ON() to avoid later crashes Ingo Molnar
2019-04-25  9:21           ` Peter Zijlstra
2019-04-25  9:50             ` x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call() Ingo Molnar
2019-04-25 10:22               ` Peter Zijlstra
2019-04-25 10:57                 ` Ingo Molnar
2019-04-25 11:30                   ` Juergen Gross
2019-04-25 12:30                     ` Juergen Gross
2019-04-25 11:40                   ` Peter Zijlstra
2019-04-25 12:30                     ` Peter Zijlstra
2019-05-24  7:59               ` [tip:x86/paravirt] " tip-bot for Ingo Molnar
2019-05-24  7:58           ` [tip:x86/paravirt] x86/paravirt: Detect over-sized patching bugs in paravirt_patch_insns() tip-bot for Ingo Molnar
2019-05-24  8:01         ` [tip:x86/paravirt] x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering tip-bot for Ingo Molnar
2019-04-25  8:08     ` [patch 3/3] x86/paravirt: Replace paravirt patch asm magic Peter Zijlstra
2019-04-25  8:19       ` Peter Zijlstra
2019-04-25  9:20       ` Ingo Molnar
2019-05-24  8:00   ` [tip:x86/paravirt] x86/paravirt: Replace the " tip-bot for Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).