From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: [PATCH 8/8] xen/x86: Additional SMAP modes to work around buggy 32bit PV guests Date: Wed, 24 Jun 2015 17:31:40 +0100 Message-ID: <1435163500-10589-9-git-send-email-andrew.cooper3@citrix.com> References: <1435163500-10589-1-git-send-email-andrew.cooper3@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1435163500-10589-1-git-send-email-andrew.cooper3@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel Cc: Andrew Cooper , Jan Beulich List-Id: xen-devel@lists.xenproject.org Experimentally, older Linux guests perform construction of `init` with user pagetable mappings. This is fine for native systems as such a guest would not set CR4.SMAP itself. However if Xen uses SMAP itself, 32bit PV guests (whose kernels run in ring1) are also affected. Older Linux guests end up spinning in a loop assuming that the SMAP violation pagefaults are spurious, and make no further progress. One option is to disable SMAP completely, but this is unreasonable. A better alternative is to disable SMAP only in the context of 32bit PV guests, but reduces the effectiveness SMAP security. A 3rd option is for Xen to fix up behind a 32bit guest if it were SMAP-aware. It is a heuristic, and does result in a guest-visible state change, but allows Xen to keep CR4.SMAP unconditionally enabled. Signed-off-by: Andrew Cooper CC: Jan Beulich --- docs/misc/xen-command-line.markdown | 25 ++++++++++++++++++-- xen/arch/x86/domain.c | 16 +++++++++++-- xen/arch/x86/setup.c | 31 +++++++++++++++++++++---- xen/arch/x86/traps.c | 43 ++++++++++++++++++++++++----------- xen/include/asm-x86/processor.h | 10 ++++++++ 5 files changed, 103 insertions(+), 22 deletions(-) diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown index aa684c0..a63d2b2 100644 --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -1261,11 +1261,32 @@ Set the serial transmit buffer size. Flag to enable Supervisor Mode Execution Protection ### smap -> `= ` +> `= | compat | fixup` > Default: `true` -Flag to enable Supervisor Mode Access Prevention +Handling of Supervisor Mode Access Prevention. + +32bit PV guest kernels qualify as supervisor code, as they execute in ring 1. +If Xen uses SMAP protection itself, a PV guest which is not SMAP aware may +suffer unexpected pagefaults which it cannot handle. (Experimentally, there +are 32bit PV guests which fall foul of SMAP enforcement and spin in an +infinite loop taking pagefaults early on boot.) + +Two further SMAP modes are introduced to work around buggy 32bit PV guests to +prevent functional regressions of VMs on newer hardware. At any point if the +guest sets `CR4.SMAP` itself, it is deemed aware, and **compat/fixup** cease +to apply. + +A SMAP mode of **compat** causes Xen to disable `CR4.SMAP` in the context of +an unaware 32bit PV guest. This prevents the guest from being subject to SMAP +enforcement, but also prevents Xen from benefiting from the added security +checks. + +A SMAP mode of **fixup** causes Xen to set `EFLAGS.AC` when discovering a SMAP +pagefault in the context of an unaware 32bit PV guest. This allows Xen to +retain the added security from SMAP checks, but results in a guest-visible +state change which it might object to. ### snb\_igd\_quirk > `= | cap | ` diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 18c3c62..045a2b9 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -687,9 +687,10 @@ void arch_domain_unpause(struct domain *d) * - TSD for vtsc * - DE for %%dr4/5 emulation * - OSXSAVE for xsetbv emulation + * - SMAP for smap_mode_{compat,fixup} handling */ #define PV_CR4_SHADOW \ - (X86_CR4_TSD | X86_CR4_DE | X86_CR4_OSXSAVE) + (X86_CR4_TSD | X86_CR4_DE | X86_CR4_OSXSAVE | X86_CR4_SMAP) /* CR4 bits a guest controls. */ #define PV_CR4_GUEST (X86_CR4_TSD) @@ -700,7 +701,7 @@ void arch_domain_unpause(struct domain *d) */ #define PV_CR4_READ \ (X86_CR4_PAE | X86_CR4_PGE |X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT | \ - X86_CR4_FSGSBASE | X86_CR4_SMEP | X86_CR4_SMAP) + X86_CR4_FSGSBASE | X86_CR4_SMEP) /* * These are the masks of CR4 bits (subject to hardware availability) which a @@ -730,6 +731,12 @@ static int __init init_pv_cr4_masks(void) if ( cpu_has_fsgsbase ) pv_cr4_mask &= ~X86_CR4_FSGSBASE; + /* + * 32bit PV guests may attempt to modify SMAP. + */ + if ( cpu_has_smap ) + compat_pv_cr4_mask &= ~X86_CR4_SMAP; + BUILD_BUG_ON(PV_CR4_SHADOW & PV_CR4_READ); return 0; @@ -784,6 +791,11 @@ unsigned long pv_guest_cr4_to_real_cr4(const struct vcpu *v) if ( v->domain->arch.vtsc ) cr4 |= X86_CR4_TSD; + /* Disable SMAP behind unaware 32bit PV guests. */ + if ( (smap_mode == smap_mode_compat) && is_pv_32bit_vcpu(v) && + ((v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_SMAP) == 0) ) + cr4 &= ~X86_CR4_SMAP; + return cr4; } diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index ff34670..36fce42 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -63,10 +63,6 @@ static bool_t __initdata disable_smep; invbool_param("smep", disable_smep); -/* smap: Enable/disable Supervisor Mode Access Prevention (default on). */ -static bool_t __initdata disable_smap; -invbool_param("smap", disable_smap); - /* Boot dom0 in pvh mode */ static bool_t __initdata opt_dom0pvh; boolean_param("dom0pvh", opt_dom0pvh); @@ -138,6 +134,29 @@ static void __init parse_acpi_param(char *s) } } +enum xen_smap_mode smap_mode __read_mostly = smap_mode_enable; +static void __init parse_smap(char *s) +{ + switch ( parse_bool(s) ) + { + case 1: + smap_mode = smap_mode_enable; + break; + + case 0: + smap_mode = smap_mode_disable; + break; + + default: + if ( !strcmp(s, "compat") ) + smap_mode = smap_mode_compat; + else if ( !strcmp(s, "fixup") ) + smap_mode = smap_mode_fixup; + break; + } +} +custom_param("smap", parse_smap); + static const module_t *__initdata initial_images; static unsigned int __initdata nr_initial_images; @@ -1296,10 +1315,12 @@ void __init noreturn __start_xen(unsigned long mbi_p) if ( cpu_has_smep ) set_in_cr4(X86_CR4_SMEP); - if ( disable_smap ) + if ( smap_mode == smap_mode_disable ) setup_clear_cpu_cap(X86_FEATURE_SMAP); if ( cpu_has_smap ) set_in_cr4(X86_CR4_SMAP); + else + smap_mode = smap_mode_disable; if ( cpu_has_fsgsbase ) set_in_cr4(X86_CR4_FSGSBASE); diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 1ab870d..149a255 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1437,21 +1437,38 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs) return 0; } - if ( guest_kernel_mode(v, regs) && - !(regs->error_code & (PFEC_reserved_bit | PFEC_insn_fetch)) && - (regs->error_code & PFEC_write_access) ) - { - if ( VM_ASSIST(d, writable_pagetables) && - /* Do not check if access-protection fault since the page may - legitimately be not present in shadow page tables */ - (paging_mode_enabled(d) || - (regs->error_code & PFEC_page_present)) && - ptwr_do_page_fault(v, addr, regs) ) - return EXCRET_fault_fixed; + if ( guest_kernel_mode(v, regs) ) + { + if (!(regs->error_code & (PFEC_reserved_bit | PFEC_insn_fetch)) && + (regs->error_code & PFEC_write_access) ) + { + if ( VM_ASSIST(d, writable_pagetables) && + /* Do not check if access-protection fault since the page may + legitimately be not present in shadow page tables */ + (paging_mode_enabled(d) || + (regs->error_code & PFEC_page_present)) && + ptwr_do_page_fault(v, addr, regs) ) + return EXCRET_fault_fixed; - if ( is_hardware_domain(d) && (regs->error_code & PFEC_page_present) && - mmio_ro_do_page_fault(v, addr, regs) ) + if ( is_hardware_domain(d) && (regs->error_code & PFEC_page_present) && + mmio_ro_do_page_fault(v, addr, regs) ) + return EXCRET_fault_fixed; + } + + /* + * SMAP violation behind an unaware 32bit PV guest kernel? Set + * EFLAGS.AC behind its back and try again. + */ + if ( (smap_mode == smap_mode_fixup) && is_pv_32bit_domain(d) && + ((regs->error_code & + (PFEC_insn_fetch | PFEC_reserved_bit | + PFEC_user_mode | PFEC_page_present)) == PFEC_page_present) && + ((v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_SMAP) == 0) && + ((regs->eflags & X86_EFLAGS_AC) == 0) ) + { + regs->eflags |= X86_EFLAGS_AC; return EXCRET_fault_fixed; + } } /* For non-external shadowed guests, we fix up both their own diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index 5fb340f..22d5cfa 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -581,6 +581,16 @@ enum get_cpu_vendor { int get_cpu_vendor(const char vendor_id[], enum get_cpu_vendor); void pv_cpuid(struct cpu_user_regs *regs); +enum xen_smap_mode { + smap_mode_enable, /* Use SMAP. */ + smap_mode_disable, /* Don't use SMAP. */ + smap_mode_compat, /* Context switch CR4.SMAP behind an unaware 32bit */ + /* PV guest. */ + smap_mode_fixup, /* Set EFLAGAS.AC on a SMAP fault behind an */ + /* unaware 32bit PV guest. */ +}; +extern enum xen_smap_mode smap_mode; + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_X86_PROCESSOR_H */ -- 1.7.10.4