From mboxrd@z Thu Jan 1 00:00:00 1970 From: Boris Ostrovsky Subject: Re: [PATCH 1/2] x86/xen: Set EFER.NX and EFER.SCE in PVH guests Date: Fri, 05 Sep 2014 11:15:00 -0400 Message-ID: <5409D374.3000009@oracle.com> References: <1409926309-8345-1-git-send-email-david.vrabel@citrix.com> <1409926309-8345-2-git-send-email-david.vrabel@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta14.messagelabs.com ([193.109.254.103]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1XPvDf-00061B-BD for xen-devel@lists.xenproject.org; Fri, 05 Sep 2014 15:14:35 +0000 In-Reply-To: <1409926309-8345-2-git-send-email-david.vrabel@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: David Vrabel , xen-devel@lists.xenproject.org List-Id: xen-devel@lists.xenproject.org On 09/05/2014 10:11 AM, David Vrabel wrote: > From: Mukesh Rathor > > This fixes two bugs in PVH guests: > > - Not setting EFER.NX means the NX bit in page table entries is > ignored on Intel processors and causes reserved bit page faults on > AMD processors. > > - After the Xen commit 7645640d6ff1 (x86/PVH: don't set EFER_SCE for > pvh guest) PVH guests are required to set EFER.SCE to enable the > SYSCALL instruction. > > Secondary VCPUs are started with pagetables with the NX bit set so > EFER.NX must be set before using any stack or data segment. > xen_pvh_cpu_early_init() is the new secondary VCPU entry point that > sets EFER before jumping to cpu_bringup_and_idle(). > > Signed-off-by: Mukesh Rathor > Signed-off-by: David Vrabel > --- > arch/x86/xen/enlighten.c | 2 ++ > arch/x86/xen/smp.c | 28 +++++++++++++++++----------- > arch/x86/xen/smp.h | 8 ++++++++ > arch/x86/xen/xen-head.S | 30 ++++++++++++++++++++++++++++++ > 4 files changed, 57 insertions(+), 11 deletions(-) > > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c > index c0cb11f..134d6a0 100644 > --- a/arch/x86/xen/enlighten.c > +++ b/arch/x86/xen/enlighten.c > @@ -1508,6 +1508,8 @@ static void __init xen_pvh_early_guest_init(void) > return; > > xen_have_vector_callback = 1; > + > + xen_pvh_cpu_early_init(0); Nit: for consistency may be better to call this xen_pvh_early_cpu_init(). > xen_pvh_set_cr_flags(0); > > #ifdef CONFIG_X86_32 > diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c > index 7005974..1b668dc 100644 > --- a/arch/x86/xen/smp.c > +++ b/arch/x86/xen/smp.c > @@ -37,6 +37,7 @@ > #include > #include "xen-ops.h" > #include "mmu.h" > +#include "smp.h" > > cpumask_var_t xen_cpu_initialized_map; > > @@ -99,8 +100,12 @@ static void cpu_bringup(void) > wmb(); /* make sure everything is out */ > } > > -/* Note: cpu parameter is only relevant for PVH */ > -static void cpu_bringup_and_idle(int cpu) > +/* > + * Note: cpu parameter is only relevant for PVH. The reason for passing it > + * is we can't do smp_processor_id until the percpu segments are loaded, for > + * which we need the cpu number! So we pass it in rdi as first parameter. > + */ > +asmlinkage __visible void cpu_bringup_and_idle(int cpu) > { > #ifdef CONFIG_X86_64 > if (xen_feature(XENFEAT_auto_translated_physmap) && > @@ -374,11 +379,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) > ctxt->user_regs.fs = __KERNEL_PERCPU; > ctxt->user_regs.gs = __KERNEL_STACK_CANARY; > #endif > - ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; > - > memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); > > if (!xen_feature(XENFEAT_auto_translated_physmap)) { > + ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; > ctxt->flags = VGCF_IN_KERNEL; > ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ > ctxt->user_regs.ds = __USER_DS; > @@ -413,15 +417,17 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) > (unsigned long)xen_failsafe_callback; > ctxt->user_regs.cs = __KERNEL_CS; > per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); > -#ifdef CONFIG_X86_32 > - } > -#else > - } else > - /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with > - * %rdi having the cpu number - which means are passing in > - * as the first parameter the cpu. Subtle! > + } > +#ifdef CONFIG_XEN_PVH Since you are wrapping this in ifdef here we should probably also do the same for xen_pvh_early_guest_init() xen_cpu_secondary_cpu_init(). -boris > + else { > + /* > + * The vcpu comes on kernel page tables which have the NX pte > + * bit set. This means before DS/SS is touched, NX in > + * EFER must be set. Hence the following assembly glue code. > */ > + ctxt->user_regs.eip = (unsigned long)xen_pvh_cpu_early_init; > ctxt->user_regs.rdi = cpu; > + } > #endif > ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); > ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); > diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h > index c7c2d89..d5dbc65 100644 > --- a/arch/x86/xen/smp.h > +++ b/arch/x86/xen/smp.h > @@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector); > extern void xen_send_IPI_all(int vector); > extern void xen_send_IPI_self(int vector); > > +#ifdef CONFIG_XEN_PVH > +extern void xen_pvh_cpu_early_init(int cpu); > +#else > +static inline void xen_pvh_cpu_early_init(int cpu) > +{ > +} > +#endif > + > #endif > diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S > index 485b695..62b3f10 100644 > --- a/arch/x86/xen/xen-head.S > +++ b/arch/x86/xen/xen-head.S > @@ -47,6 +47,36 @@ ENTRY(startup_xen) > > __FINIT > > +#ifdef CONFIG_XEN_PVH > + > +/** > + * xen_pvh_cpu_early_init() - early PVH VCPU initialization > + * @cpu: this cpu number (%rdi) > + * > + * Note: This is called as a function on the boot CPU and as the secondary > + * CPU entry point. > + */ > +ENTRY(xen_pvh_cpu_early_init) > + /* Gather features to see if NX implemented. */ > + mov $0x80000001, %eax > + cpuid > + mov %edx,%esi > + > + mov $MSR_EFER, %ecx > + rdmsr > + bts $_EFER_SCE, %eax > + > + bt $20,%esi > + jnc 1f /* No NX, skip setting it */ > + bts $_EFER_NX, %eax > +1: wrmsr > + > + cmp $0,%rdi /* non-zero => secondary cpu */ > + jne cpu_bringup_and_idle > + ret > + > +#endif /* CONFIG_XEN_PVH */ > + > .pushsection .text > .balign PAGE_SIZE > ENTRY(hypercall_page)