On 16.11.20 14:04, Peter Zijlstra wrote: > On Mon, Nov 16, 2020 at 12:56:32PM +0100, Jürgen Groß wrote: >>>>>>>> static inline notrace unsigned long arch_local_save_flags(void) >>>>>>>> { >>>>>>>> PVOP_CALL_ARGS; >>>>>>>> PVOP_TEST_NULL(irq.save_fl); >>>>>>>> asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), >>>>>>>> "PUSHF; POP _ASM_AX", >>>>>>>> X86_FEATURE_NATIVE) >> >> I am wondering whether we really want a new feature (basically "not >> XENPV). We could use ~X86_FEATURE_XENPV and teach apply_alternatives() >> to understand negated features (yes, this limits the number of features >> to 32767, but I don't think this is a real problem for quite some time). >> >> Thoughts? > > I went with the simple thing for now... If we ever want/need another > negative alternative I suppose we can do as you suggest... > > I was still poking at objtool to actually dtrt though.. I'd like to include this part in my series (with a different solution for the restore_fl part, as suggested by Andy before). Peter, are you fine with me taking your patch and adding your SoB? Juergen > > --- > diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h > index dad350d42ecf..cc88f358bac5 100644 > --- a/arch/x86/include/asm/cpufeatures.h > +++ b/arch/x86/include/asm/cpufeatures.h > @@ -237,6 +237,7 @@ > #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ > #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ > #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ > +#define X86_FEATURE_NOT_XENPV ( 8*32+21) /* "" inverse of X86_FEATURE_XENPV */ > > /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ > #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ > diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h > index d25cc6830e89..e2a9d3e6b7ad 100644 > --- a/arch/x86/include/asm/paravirt.h > +++ b/arch/x86/include/asm/paravirt.h > @@ -645,22 +645,56 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); > #ifdef CONFIG_PARAVIRT_XXL > static inline notrace unsigned long arch_local_save_flags(void) > { > - return PVOP_CALLEE0(unsigned long, irq.save_fl); > + PVOP_CALL_ARGS; > + PVOP_TEST_NULL(irq.save_fl); > + asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), > + "pushf; pop %%" _ASM_AX ";", > + X86_FEATURE_NOT_XENPV) > + : PVOP_CALLEE_CLOBBERS, ASM_CALL_CONSTRAINT > + : paravirt_type(irq.save_fl.func), > + paravirt_clobber(CLBR_RET_REG) > + : "memory", "cc"); > + return __eax; > } > > static inline notrace void arch_local_irq_restore(unsigned long f) > { > - PVOP_VCALLEE1(irq.restore_fl, f); > + PVOP_CALL_ARGS; > + PVOP_TEST_NULL(irq.restore_fl); > + asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), > + "push %%" _ASM_ARG1 "; popf;", > + X86_FEATURE_NOT_XENPV) > + : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT > + : paravirt_type(irq.restore_fl.func), > + paravirt_clobber(CLBR_RET_REG), > + PVOP_CALL_ARG1(f) > + : "memory", "cc"); > } > > static inline notrace void arch_local_irq_disable(void) > { > - PVOP_VCALLEE0(irq.irq_disable); > + PVOP_CALL_ARGS; > + PVOP_TEST_NULL(irq.irq_disable); > + asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), > + "cli;", > + X86_FEATURE_NOT_XENPV) > + : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT > + : paravirt_type(irq.irq_disable.func), > + paravirt_clobber(CLBR_RET_REG) > + : "memory", "cc"); > } > > static inline notrace void arch_local_irq_enable(void) > { > - PVOP_VCALLEE0(irq.irq_enable); > + PVOP_CALL_ARGS; > + PVOP_TEST_NULL(irq.irq_enable); > + asm_inline volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), > + "sti;", > + X86_FEATURE_NOT_XENPV) > + : PVOP_VCALLEE_CLOBBERS, ASM_CALL_CONSTRAINT > + : paravirt_type(irq.irq_enable.func), > + paravirt_clobber(CLBR_RET_REG) > + : "memory", "cc"); > } > > static inline notrace unsigned long arch_local_irq_save(void) > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c > index 2400ad62f330..5bd8f5503652 100644 > --- a/arch/x86/kernel/alternative.c > +++ b/arch/x86/kernel/alternative.c > @@ -723,6 +723,19 @@ void __init alternative_instructions(void) > * patching. > */ > > + if (!boot_cpu_has(X86_FEATURE_XENPV)) > + setup_force_cpu_cap(X86_FEATURE_NOT_XENPV); > + > + /* > + * First patch paravirt functions, such that we overwrite the indirect > + * call with the direct call. > + */ > + apply_paravirt(__parainstructions, __parainstructions_end); > + > + /* > + * Then patch alternatives, such that those paravirt calls that are in > + * alternatives can be overwritten by their immediate fragments. > + */ > apply_alternatives(__alt_instructions, __alt_instructions_end); > > #ifdef CONFIG_SMP > @@ -741,8 +754,6 @@ void __init alternative_instructions(void) > } > #endif > > - apply_paravirt(__parainstructions, __parainstructions_end); > - > restart_nmi(); > alternatives_patched = 1; > } > diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c > index ace6e334cb39..867498db53ad 100644 > --- a/arch/x86/kernel/paravirt_patch.c > +++ b/arch/x86/kernel/paravirt_patch.c > @@ -33,13 +33,9 @@ struct patch_xxl { > }; > > static const struct patch_xxl patch_data_xxl = { > - .irq_irq_disable = { 0xfa }, // cli > - .irq_irq_enable = { 0xfb }, // sti > - .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax > .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax > .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax > .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3 > - .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq > .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd > .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8, > 0x48, 0x0f, 0x07 }, // swapgs; sysretq > @@ -76,11 +72,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr, > switch (type) { > > #ifdef CONFIG_PARAVIRT_XXL > - PATCH_CASE(irq, restore_fl, xxl, insn_buff, len); > - PATCH_CASE(irq, save_fl, xxl, insn_buff, len); > - PATCH_CASE(irq, irq_enable, xxl, insn_buff, len); > - PATCH_CASE(irq, irq_disable, xxl, insn_buff, len); > - > PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len); > PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len); > PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len); >