From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH 1/2] x86/shadow: compile most write emulation code just once Date: Thu, 10 Mar 2016 03:13:06 -0700 Message-ID: <56E156C202000078000DB2BA@prv-mh.provo.novell.com> References: <56E1555002000078000DB293@prv-mh.provo.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__Part35028FA2.0__=" Return-path: Received: from mail6.bemta5.messagelabs.com ([195.245.231.135]) by lists.xen.org with esmtp (Exim 4.84) (envelope-from ) id 1adxah-0002HI-4A for xen-devel@lists.xenproject.org; Thu, 10 Mar 2016 10:13:11 +0000 In-Reply-To: <56E1555002000078000DB293@prv-mh.provo.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" To: xen-devel Cc: Tim Deegan List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__Part35028FA2.0__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline No need to compile all of this code three times, as most of it really is guest mode independent. The savings are between 3k and 4k of binary code in my builds. No functional change (i.e. only formatting and naming changes) except for - sh_emulate_map_dest()'s user mode check corrected for the PV case (affecting debugging mode only, this isn't being split out) - simplifying the vaddr argument to emulate_gva_to_mfn() for the second part in the cross page write case Signed-off-by: Jan Beulich --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -1679,6 +1679,211 @@ static unsigned int shadow_get_allocatio } =20 /*************************************************************************= */ +/* Handling guest writes to pagetables. */ + +/* Translate a VA to an MFN, injecting a page-fault if we fail. */ +#define BAD_GVA_TO_GFN (~0UL) +#define BAD_GFN_TO_MFN (~1UL) +#define READONLY_GFN (~2UL) +static mfn_t emulate_gva_to_mfn(struct vcpu *v, unsigned long vaddr, + struct sh_emulate_ctxt *sh_ctxt) +{ + unsigned long gfn; + struct page_info *page; + mfn_t mfn; + p2m_type_t p2mt; + uint32_t pfec =3D PFEC_page_present | PFEC_write_access; + + /* Translate the VA to a GFN. */ + gfn =3D paging_get_hostmode(v)->gva_to_gfn(v, NULL, vaddr, &pfec); + if ( gfn =3D=3D INVALID_GFN ) + { + if ( is_hvm_vcpu(v) ) + hvm_inject_page_fault(pfec, vaddr); + else + propagate_page_fault(vaddr, pfec); + return _mfn(BAD_GVA_TO_GFN); + } + + /* Translate the GFN to an MFN. */ + ASSERT(!paging_locked_by_me(v->domain)); + + page =3D get_page_from_gfn(v->domain, gfn, &p2mt, P2M_ALLOC); + + /* Sanity checking. */ + if ( page =3D=3D NULL ) + { + return _mfn(BAD_GFN_TO_MFN); + } + if ( p2m_is_discard_write(p2mt) ) + { + put_page(page); + return _mfn(READONLY_GFN); + } + if ( !p2m_is_ram(p2mt) ) + { + put_page(page); + return _mfn(BAD_GFN_TO_MFN); + } + mfn =3D page_to_mfn(page); + ASSERT(mfn_valid(mfn)); + + v->arch.paging.last_write_was_pt =3D !!sh_mfn_is_a_page_table(mfn); + /* + * Note shadow cannot page out or unshare this mfn, so the map won't + * disappear. Otherwise, caller must hold onto page until done. + */ + put_page(page); + + return mfn; +} + +/* Check that the user is allowed to perform this write. */ +void *sh_emulate_map_dest(struct vcpu *v, unsigned long vaddr, + unsigned int bytes, + struct sh_emulate_ctxt *sh_ctxt) +{ + struct domain *d =3D v->domain; + void *map; + + sh_ctxt->mfn1 =3D emulate_gva_to_mfn(v, vaddr, sh_ctxt); + if ( !mfn_valid(sh_ctxt->mfn1) ) + return ((mfn_x(sh_ctxt->mfn1) =3D=3D BAD_GVA_TO_GFN) ? + MAPPING_EXCEPTION : + (mfn_x(sh_ctxt->mfn1) =3D=3D READONLY_GFN) ? + MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE); + +#ifndef NDEBUG + /* We don't emulate user-mode writes to page tables. */ + if ( has_hvm_container_domain(d) + ? hvm_get_seg_reg(x86_seg_ss, sh_ctxt)->attr.fields.dpl =3D=3D 3 + : !guest_kernel_mode(v, guest_cpu_user_regs()) ) + { + gdprintk(XENLOG_DEBUG, "User-mode write to pagetable reached " + "emulate_map_dest(). This should never happen!\n"); + return MAPPING_UNHANDLEABLE; + } +#endif + + /* Unaligned writes mean probably this isn't a pagetable. */ + if ( vaddr & (bytes - 1) ) + sh_remove_shadows(d, sh_ctxt->mfn1, 0, 0 /* Slow, can fail. */ ); + + if ( likely(((vaddr + bytes - 1) & PAGE_MASK) =3D=3D (vaddr & = PAGE_MASK)) ) + { + /* Whole write fits on a single page. */ + sh_ctxt->mfn2 =3D _mfn(INVALID_MFN); + map =3D map_domain_page(sh_ctxt->mfn1) + (vaddr & ~PAGE_MASK); + } + else + { + mfn_t mfns[2]; + + /* + * Cross-page emulated writes are only supported for HVM guests; + * PV guests ought to know better. + */ + if ( !is_hvm_domain(d) ) + return MAPPING_UNHANDLEABLE; + + /* This write crosses a page boundary. Translate the second page. = */ + sh_ctxt->mfn2 =3D emulate_gva_to_mfn(v, vaddr + bytes, sh_ctxt); + if ( !mfn_valid(sh_ctxt->mfn2) ) + return ((mfn_x(sh_ctxt->mfn2) =3D=3D BAD_GVA_TO_GFN) ? + MAPPING_EXCEPTION : + (mfn_x(sh_ctxt->mfn2) =3D=3D READONLY_GFN) ? + MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE); + + /* Cross-page writes mean probably not a pagetable. */ + sh_remove_shadows(d, sh_ctxt->mfn2, 0, 0 /* Slow, can fail. */ ); + + mfns[0] =3D sh_ctxt->mfn1; + mfns[1] =3D sh_ctxt->mfn2; + map =3D vmap(mfns, 2); + if ( !map ) + return MAPPING_UNHANDLEABLE; + map +=3D (vaddr & ~PAGE_MASK); + } + +#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY) + /* + * Remember if the bottom bit was clear, so we can choose not to run + * the change through the verify code if it's still clear afterwards. + */ + sh_ctxt->low_bit_was_clear =3D map !=3D NULL && !(*(u8 *)map & = _PAGE_PRESENT); +#endif + + return map; +} + +/* + * Tidy up after the emulated write: mark pages dirty, verify the new + * contents, and undo the mapping. + */ +void sh_emulate_unmap_dest(struct vcpu *v, void *addr, unsigned int = bytes, + struct sh_emulate_ctxt *sh_ctxt) +{ + u32 b1 =3D bytes, b2 =3D 0, shflags; + + /* + * We can avoid re-verifying the page contents after the write if: + * - it was no larger than the PTE type of this pagetable; + * - it was aligned to the PTE boundaries; and + * - _PAGE_PRESENT was clear before and after the write. + */ + shflags =3D mfn_to_page(sh_ctxt->mfn1)->shadow_flags; +#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY) + if ( sh_ctxt->low_bit_was_clear + && !(*(u8 *)addr & _PAGE_PRESENT) + && ((!(shflags & SHF_32) + /* + * Not shadowed 32-bit: aligned 64-bit writes that leave + * the present bit unset are safe to ignore. + */ + && ((unsigned long)addr & 7) =3D=3D 0 + && bytes <=3D 8) + || + (!(shflags & (SHF_PAE|SHF_64)) + /* + * Not shadowed PAE/64-bit: aligned 32-bit writes that + * leave the present bit unset are safe to ignore. + */ + && ((unsigned long)addr & 3) =3D=3D 0 + && bytes <=3D 4)) ) + { + /* Writes with this alignment constraint can't possibly cross = pages. */ + ASSERT(!mfn_valid(sh_ctxt->mfn2)); + } + else +#endif /* SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY */ + { + if ( unlikely(mfn_valid(sh_ctxt->mfn2)) ) + { + /* Validate as two writes, one to each page. */ + b1 =3D PAGE_SIZE - (((unsigned long)addr) & ~PAGE_MASK); + b2 =3D bytes - b1; + ASSERT(b2 < bytes); + } + if ( likely(b1 > 0) ) + sh_validate_guest_pt_write(v, sh_ctxt->mfn1, addr, b1); + if ( unlikely(b2 > 0) ) + sh_validate_guest_pt_write(v, sh_ctxt->mfn2, addr + b1, b2); + } + + paging_mark_dirty(v->domain, mfn_x(sh_ctxt->mfn1)); + + if ( unlikely(mfn_valid(sh_ctxt->mfn2)) ) + { + paging_mark_dirty(v->domain, mfn_x(sh_ctxt->mfn2)); + vunmap((void *)((unsigned long)addr & PAGE_MASK)); + } + else + unmap_domain_page(addr); + + atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version); +} + +/*************************************************************************= */ /* Hash table for storing the guest->shadow mappings. * The table itself is an array of pointers to shadows; the shadows are = then * threaded on a singly-linked list of shadows with the same hash value = */ --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -4606,141 +4606,7 @@ static void sh_pagetable_dying(struct vc #endif =20 /*************************************************************************= */ -/* Handling HVM guest writes to pagetables */ - -/* Translate a VA to an MFN, injecting a page-fault if we fail */ -#define BAD_GVA_TO_GFN (~0UL) -#define BAD_GFN_TO_MFN (~1UL) -#define READONLY_GFN (~2UL) -static mfn_t emulate_gva_to_mfn(struct vcpu *v, - unsigned long vaddr, - struct sh_emulate_ctxt *sh_ctxt) -{ - unsigned long gfn; - struct page_info *page; - mfn_t mfn; - p2m_type_t p2mt; - uint32_t pfec =3D PFEC_page_present | PFEC_write_access; - - /* Translate the VA to a GFN */ - gfn =3D sh_gva_to_gfn(v, NULL, vaddr, &pfec); - if ( gfn =3D=3D INVALID_GFN ) - { - if ( is_hvm_vcpu(v) ) - hvm_inject_page_fault(pfec, vaddr); - else - propagate_page_fault(vaddr, pfec); - return _mfn(BAD_GVA_TO_GFN); - } - - /* Translate the GFN to an MFN */ - ASSERT(!paging_locked_by_me(v->domain)); - - page =3D get_page_from_gfn(v->domain, gfn, &p2mt, P2M_ALLOC); - - /* Sanity checking */ - if ( page =3D=3D NULL ) - { - return _mfn(BAD_GFN_TO_MFN); - } - if ( p2m_is_discard_write(p2mt) ) - { - put_page(page); - return _mfn(READONLY_GFN); - } - if ( !p2m_is_ram(p2mt) ) - { - put_page(page); - return _mfn(BAD_GFN_TO_MFN); - } - mfn =3D page_to_mfn(page); - ASSERT(mfn_valid(mfn)); - - v->arch.paging.last_write_was_pt =3D !!sh_mfn_is_a_page_table(mfn); - /* Note shadow cannot page out or unshare this mfn, so the map won't - * disappear. Otherwise, caller must hold onto page until done. */ - put_page(page); - return mfn; -} - -/* Check that the user is allowed to perform this write. - * Returns a mapped pointer to write to, or NULL for error. */ -#define MAPPING_UNHANDLEABLE ((void *)(unsigned long)X86EMUL_UNHANDLEABLE)= -#define MAPPING_EXCEPTION ((void *)(unsigned long)X86EMUL_EXCEPTION) -#define MAPPING_SILENT_FAIL ((void *)(unsigned long)X86EMUL_OKAY) -#define emulate_map_dest_failed(rc) ((unsigned long)(rc) <=3D 3) -static void *emulate_map_dest(struct vcpu *v, - unsigned long vaddr, - u32 bytes, - struct sh_emulate_ctxt *sh_ctxt) -{ - struct domain *d =3D v->domain; - void *map =3D NULL; - - sh_ctxt->mfn1 =3D emulate_gva_to_mfn(v, vaddr, sh_ctxt); - if ( !mfn_valid(sh_ctxt->mfn1) ) - return ((mfn_x(sh_ctxt->mfn1) =3D=3D BAD_GVA_TO_GFN) ? - MAPPING_EXCEPTION : - (mfn_x(sh_ctxt->mfn1) =3D=3D READONLY_GFN) ? - MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE); - -#ifndef NDEBUG - /* We don't emulate user-mode writes to page tables */ - if ( hvm_get_seg_reg(x86_seg_ss, sh_ctxt)->attr.fields.dpl =3D=3D 3 ) - { - gdprintk(XENLOG_DEBUG, "User-mode write to pagetable reached " - "emulate_map_dest(). This should never happen!\n"); - return MAPPING_UNHANDLEABLE; - } -#endif - - /* Unaligned writes mean probably this isn't a pagetable */ - if ( vaddr & (bytes - 1) ) - sh_remove_shadows(d, sh_ctxt->mfn1, 0, 0 /* Slow, can fail */ ); - - if ( likely(((vaddr + bytes - 1) & PAGE_MASK) =3D=3D (vaddr & = PAGE_MASK)) ) - { - /* Whole write fits on a single page */ - sh_ctxt->mfn2 =3D _mfn(INVALID_MFN); - map =3D map_domain_page(sh_ctxt->mfn1) + (vaddr & ~PAGE_MASK); - } - else - { - mfn_t mfns[2]; - - /* Cross-page emulated writes are only supported for HVM guests; - * PV guests ought to know better */ - if ( !is_hvm_domain(d) ) - return MAPPING_UNHANDLEABLE; - - /* This write crosses a page boundary. Translate the second page = */ - sh_ctxt->mfn2 =3D emulate_gva_to_mfn(v, (vaddr + bytes - 1) & = PAGE_MASK, - sh_ctxt); - if ( !mfn_valid(sh_ctxt->mfn2) ) - return ((mfn_x(sh_ctxt->mfn2) =3D=3D BAD_GVA_TO_GFN) ? - MAPPING_EXCEPTION : - (mfn_x(sh_ctxt->mfn2) =3D=3D READONLY_GFN) ? - MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE); - - /* Cross-page writes mean probably not a pagetable */ - sh_remove_shadows(d, sh_ctxt->mfn2, 0, 0 /* Slow, can fail */ ); - - mfns[0] =3D sh_ctxt->mfn1; - mfns[1] =3D sh_ctxt->mfn2; - map =3D vmap(mfns, 2); - if ( !map ) - return MAPPING_UNHANDLEABLE; - map +=3D (vaddr & ~PAGE_MASK); - } - -#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY) - /* Remember if the bottom bit was clear, so we can choose not to run - * the change through the verify code if it's still clear afterwards = */ - sh_ctxt->low_bit_was_clear =3D map !=3D NULL && !(*(u8 *)map & = _PAGE_PRESENT); -#endif - - return map; -} +/* Handling guest writes to pagetables. */ =20 /* Tidy up after the emulated write: mark pages dirty, verify the new * contents, and undo the mapping */ @@ -4749,8 +4615,6 @@ static void emulate_unmap_dest(struct vc u32 bytes, struct sh_emulate_ctxt *sh_ctxt) { - u32 b1 =3D bytes, b2 =3D 0, shflags; - ASSERT(mfn_valid(sh_ctxt->mfn1)); =20 /* If we are writing lots of PTE-aligned zeros, might want to = unshadow */ @@ -4764,56 +4628,7 @@ static void emulate_unmap_dest(struct vc else reset_early_unshadow(v); =20 - /* We can avoid re-verifying the page contents after the write if: - * - it was no larger than the PTE type of this pagetable; - * - it was aligned to the PTE boundaries; and - * - _PAGE_PRESENT was clear before and after the write. */ - shflags =3D mfn_to_page(sh_ctxt->mfn1)->shadow_flags; -#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY) - if ( sh_ctxt->low_bit_was_clear - && !(*(u8 *)addr & _PAGE_PRESENT) - && ((!(shflags & SHF_32) - /* Not shadowed 32-bit: aligned 64-bit writes that leave - * the present bit unset are safe to ignore. */ - && ((unsigned long)addr & 7) =3D=3D 0 - && bytes <=3D 8) - || - (!(shflags & (SHF_PAE|SHF_64)) - /* Not shadowed PAE/64-bit: aligned 32-bit writes that - * leave the present bit unset are safe to ignore. */ - && ((unsigned long)addr & 3) =3D=3D 0 - && bytes <=3D 4)) ) - { - /* Writes with this alignment constraint can't possibly cross = pages */ - ASSERT(!mfn_valid(sh_ctxt->mfn2)); - } - else -#endif /* SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY */ - { - if ( unlikely(mfn_valid(sh_ctxt->mfn2)) ) - { - /* Validate as two writes, one to each page */ - b1 =3D PAGE_SIZE - (((unsigned long)addr) & ~PAGE_MASK); - b2 =3D bytes - b1; - ASSERT(b2 < bytes); - } - if ( likely(b1 > 0) ) - sh_validate_guest_pt_write(v, sh_ctxt->mfn1, addr, b1); - if ( unlikely(b2 > 0) ) - sh_validate_guest_pt_write(v, sh_ctxt->mfn2, addr + b1, b2); - } - - paging_mark_dirty(v->domain, mfn_x(sh_ctxt->mfn1)); - - if ( unlikely(mfn_valid(sh_ctxt->mfn2)) ) - { - paging_mark_dirty(v->domain, mfn_x(sh_ctxt->mfn2)); - vunmap((void *)((unsigned long)addr & PAGE_MASK)); - } - else - unmap_domain_page(addr); - - atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version); + sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt); } =20 static int @@ -4826,8 +4641,8 @@ sh_x86_emulate_write(struct vcpu *v, uns if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v) ) return X86EMUL_UNHANDLEABLE; =20 - addr =3D emulate_map_dest(v, vaddr, bytes, sh_ctxt); - if ( emulate_map_dest_failed(addr) ) + addr =3D sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt); + if ( sh_emulate_map_dest_failed(addr) ) return (long)addr; =20 paging_lock(v->domain); @@ -4868,8 +4683,8 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v) ) return X86EMUL_UNHANDLEABLE; =20 - addr =3D emulate_map_dest(v, vaddr, bytes, sh_ctxt); - if ( emulate_map_dest_failed(addr) ) + addr =3D sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt); + if ( sh_emulate_map_dest_failed(addr) ) return (long)addr; =20 paging_lock(v->domain); --- a/xen/arch/x86/mm/shadow/private.h +++ b/xen/arch/x86/mm/shadow/private.h @@ -393,6 +393,17 @@ void shadow_update_paging_modes(struct v * With user_only =3D=3D 1, unhooks only the user-mode mappings. */ void shadow_unhook_mappings(struct domain *d, mfn_t smfn, int user_only); =20 +/* Returns a mapped pointer to write to, or one of the following error + * indicators. */ +#define MAPPING_UNHANDLEABLE ((void *)(unsigned long)X86EMUL_UNHANDLEABLE)= +#define MAPPING_EXCEPTION ((void *)(unsigned long)X86EMUL_EXCEPTION) +#define MAPPING_SILENT_FAIL ((void *)(unsigned long)X86EMUL_OKAY) +#define sh_emulate_map_dest_failed(rc) ((unsigned long)(rc) <=3D 3) +void *sh_emulate_map_dest(struct vcpu *v, unsigned long vaddr, + unsigned int bytes, struct sh_emulate_ctxt = *sh_ctxt); +void sh_emulate_unmap_dest(struct vcpu *v, void *addr, unsigned int = bytes, + struct sh_emulate_ctxt *sh_ctxt); + #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Allow a shadowed page to go out of sync */ int sh_unsync(struct vcpu *v, mfn_t gmfn); --=__Part35028FA2.0__= Content-Type: text/plain; name="x86-sh-emul-writes.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="x86-sh-emul-writes.patch" x86/shadow: compile most write emulation code just once=0A=0ANo need to = compile all of this code three times, as most of it really=0Ais guest mode = independent. The savings are between 3k and 4k of binary=0Acode in my = builds.=0A=0ANo functional change (i.e. only formatting and naming = changes) except=0Afor=0A- sh_emulate_map_dest()'s user mode check = corrected for the PV case=0A (affecting debugging mode only, this isn't = being split out)=0A- simplifying the vaddr argument to emulate_gva_to_mfn()= for the second=0A part in the cross page write case=0A=0ASigned-off-by: = Jan Beulich =0A=0A--- a/xen/arch/x86/mm/shadow/common.c= =0A+++ b/xen/arch/x86/mm/shadow/common.c=0A@@ -1679,6 +1679,211 @@ static = unsigned int shadow_get_allocatio=0A }=0A =0A /****************************= **********************************************/=0A+/* Handling guest = writes to pagetables. */=0A+=0A+/* Translate a VA to an MFN, injecting a = page-fault if we fail. */=0A+#define BAD_GVA_TO_GFN (~0UL)=0A+#define = BAD_GFN_TO_MFN (~1UL)=0A+#define READONLY_GFN (~2UL)=0A+static mfn_t = emulate_gva_to_mfn(struct vcpu *v, unsigned long vaddr,=0A+ = struct sh_emulate_ctxt *sh_ctxt)=0A+{=0A+ unsigned long = gfn;=0A+ struct page_info *page;=0A+ mfn_t mfn;=0A+ p2m_type_t = p2mt;=0A+ uint32_t pfec =3D PFEC_page_present | PFEC_write_access;=0A+= =0A+ /* Translate the VA to a GFN. */=0A+ gfn =3D paging_get_hostmode= (v)->gva_to_gfn(v, NULL, vaddr, &pfec);=0A+ if ( gfn =3D=3D INVALID_GFN = )=0A+ {=0A+ if ( is_hvm_vcpu(v) )=0A+ hvm_inject_page_= fault(pfec, vaddr);=0A+ else=0A+ propagate_page_fault(vad= dr, pfec);=0A+ return _mfn(BAD_GVA_TO_GFN);=0A+ }=0A+=0A+ /* = Translate the GFN to an MFN. */=0A+ ASSERT(!paging_locked_by_me(v->domai= n));=0A+=0A+ page =3D get_page_from_gfn(v->domain, gfn, &p2mt, = P2M_ALLOC);=0A+=0A+ /* Sanity checking. */=0A+ if ( page =3D=3D NULL = )=0A+ {=0A+ return _mfn(BAD_GFN_TO_MFN);=0A+ }=0A+ if ( = p2m_is_discard_write(p2mt) )=0A+ {=0A+ put_page(page);=0A+ = return _mfn(READONLY_GFN);=0A+ }=0A+ if ( !p2m_is_ram(p2mt) )=0A+ = {=0A+ put_page(page);=0A+ return _mfn(BAD_GFN_TO_MFN);=0A+ = }=0A+ mfn =3D page_to_mfn(page);=0A+ ASSERT(mfn_valid(mfn));=0A+=0A= + v->arch.paging.last_write_was_pt =3D !!sh_mfn_is_a_page_table(mfn);=0A= + /*=0A+ * Note shadow cannot page out or unshare this mfn, so the = map won't=0A+ * disappear. Otherwise, caller must hold onto page until = done.=0A+ */=0A+ put_page(page);=0A+=0A+ return mfn;=0A+}=0A+=0A+= /* Check that the user is allowed to perform this write. */=0A+void = *sh_emulate_map_dest(struct vcpu *v, unsigned long vaddr,=0A+ = unsigned int bytes,=0A+ struct = sh_emulate_ctxt *sh_ctxt)=0A+{=0A+ struct domain *d =3D v->domain;=0A+ = void *map;=0A+=0A+ sh_ctxt->mfn1 =3D emulate_gva_to_mfn(v, vaddr, = sh_ctxt);=0A+ if ( !mfn_valid(sh_ctxt->mfn1) )=0A+ return = ((mfn_x(sh_ctxt->mfn1) =3D=3D BAD_GVA_TO_GFN) ?=0A+ = MAPPING_EXCEPTION :=0A+ (mfn_x(sh_ctxt->mfn1) =3D=3D = READONLY_GFN) ?=0A+ MAPPING_SILENT_FAIL : MAPPING_UNHANDLEAB= LE);=0A+=0A+#ifndef NDEBUG=0A+ /* We don't emulate user-mode writes to = page tables. */=0A+ if ( has_hvm_container_domain(d)=0A+ ? = hvm_get_seg_reg(x86_seg_ss, sh_ctxt)->attr.fields.dpl =3D=3D 3=0A+ = : !guest_kernel_mode(v, guest_cpu_user_regs()) )=0A+ {=0A+ = gdprintk(XENLOG_DEBUG, "User-mode write to pagetable reached "=0A+ = "emulate_map_dest(). This should never happen!\n");=0A+ = return MAPPING_UNHANDLEABLE;=0A+ }=0A+#endif=0A+=0A+ /* Unaligned = writes mean probably this isn't a pagetable. */=0A+ if ( vaddr & (bytes = - 1) )=0A+ sh_remove_shadows(d, sh_ctxt->mfn1, 0, 0 /* Slow, can = fail. */ );=0A+=0A+ if ( likely(((vaddr + bytes - 1) & PAGE_MASK) = =3D=3D (vaddr & PAGE_MASK)) )=0A+ {=0A+ /* Whole write fits on a = single page. */=0A+ sh_ctxt->mfn2 =3D _mfn(INVALID_MFN);=0A+ = map =3D map_domain_page(sh_ctxt->mfn1) + (vaddr & ~PAGE_MASK);=0A+ = }=0A+ else=0A+ {=0A+ mfn_t mfns[2];=0A+=0A+ /*=0A+ = * Cross-page emulated writes are only supported for HVM guests;=0A+ = * PV guests ought to know better.=0A+ */=0A+ if ( = !is_hvm_domain(d) )=0A+ return MAPPING_UNHANDLEABLE;=0A+=0A+ = /* This write crosses a page boundary. Translate the second page. = */=0A+ sh_ctxt->mfn2 =3D emulate_gva_to_mfn(v, vaddr + bytes, = sh_ctxt);=0A+ if ( !mfn_valid(sh_ctxt->mfn2) )=0A+ = return ((mfn_x(sh_ctxt->mfn2) =3D=3D BAD_GVA_TO_GFN) ?=0A+ = MAPPING_EXCEPTION :=0A+ (mfn_x(sh_ctxt->mfn2) =3D=3D = READONLY_GFN) ?=0A+ MAPPING_SILENT_FAIL : MAPPING_UNHAND= LEABLE);=0A+=0A+ /* Cross-page writes mean probably not a pagetable.= */=0A+ sh_remove_shadows(d, sh_ctxt->mfn2, 0, 0 /* Slow, can fail. = */ );=0A+=0A+ mfns[0] =3D sh_ctxt->mfn1;=0A+ mfns[1] =3D = sh_ctxt->mfn2;=0A+ map =3D vmap(mfns, 2);=0A+ if ( !map = )=0A+ return MAPPING_UNHANDLEABLE;=0A+ map +=3D (vaddr & = ~PAGE_MASK);=0A+ }=0A+=0A+#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)= =0A+ /*=0A+ * Remember if the bottom bit was clear, so we can = choose not to run=0A+ * the change through the verify code if it's = still clear afterwards.=0A+ */=0A+ sh_ctxt->low_bit_was_clear =3D = map !=3D NULL && !(*(u8 *)map & _PAGE_PRESENT);=0A+#endif=0A+=0A+ = return map;=0A+}=0A+=0A+/*=0A+ * Tidy up after the emulated write: mark = pages dirty, verify the new=0A+ * contents, and undo the mapping.=0A+ = */=0A+void sh_emulate_unmap_dest(struct vcpu *v, void *addr, unsigned int = bytes,=0A+ struct sh_emulate_ctxt *sh_ctxt)=0A+{= =0A+ u32 b1 =3D bytes, b2 =3D 0, shflags;=0A+=0A+ /*=0A+ * We = can avoid re-verifying the page contents after the write if:=0A+ * - = it was no larger than the PTE type of this pagetable;=0A+ * - it was = aligned to the PTE boundaries; and=0A+ * - _PAGE_PRESENT was clear = before and after the write.=0A+ */=0A+ shflags =3D mfn_to_page(sh_ct= xt->mfn1)->shadow_flags;=0A+#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)= =0A+ if ( sh_ctxt->low_bit_was_clear=0A+ && !(*(u8 *)addr & = _PAGE_PRESENT)=0A+ && ((!(shflags & SHF_32)=0A+ = /*=0A+ * Not shadowed 32-bit: aligned 64-bit writes that = leave=0A+ * the present bit unset are safe to ignore.=0A+ = */=0A+ && ((unsigned long)addr & 7) =3D=3D 0=0A+ = && bytes <=3D 8)=0A+ ||=0A+ (!(shflags = & (SHF_PAE|SHF_64))=0A+ /*=0A+ * Not shadowed = PAE/64-bit: aligned 32-bit writes that=0A+ * leave the = present bit unset are safe to ignore.=0A+ */=0A+ = && ((unsigned long)addr & 3) =3D=3D 0=0A+ && bytes <=3D 4)) = )=0A+ {=0A+ /* Writes with this alignment constraint can't = possibly cross pages. */=0A+ ASSERT(!mfn_valid(sh_ctxt->mfn2));=0A+ = }=0A+ else=0A+#endif /* SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY = */=0A+ {=0A+ if ( unlikely(mfn_valid(sh_ctxt->mfn2)) )=0A+ = {=0A+ /* Validate as two writes, one to each page. */=0A+ = b1 =3D PAGE_SIZE - (((unsigned long)addr) & ~PAGE_MASK);=0A+ = b2 =3D bytes - b1;=0A+ ASSERT(b2 < bytes);=0A+ }=0A+ = if ( likely(b1 > 0) )=0A+ sh_validate_guest_pt_write(v, = sh_ctxt->mfn1, addr, b1);=0A+ if ( unlikely(b2 > 0) )=0A+ = sh_validate_guest_pt_write(v, sh_ctxt->mfn2, addr + b1, b2);=0A+ = }=0A+=0A+ paging_mark_dirty(v->domain, mfn_x(sh_ctxt->mfn1));=0A+=0A+ = if ( unlikely(mfn_valid(sh_ctxt->mfn2)) )=0A+ {=0A+ paging_mark_= dirty(v->domain, mfn_x(sh_ctxt->mfn2));=0A+ vunmap((void *)((unsigne= d long)addr & PAGE_MASK));=0A+ }=0A+ else=0A+ unmap_domain_pag= e(addr);=0A+=0A+ atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_= version);=0A+}=0A+=0A+/****************************************************= **********************/=0A /* Hash table for storing the guest->shadow = mappings.=0A * The table itself is an array of pointers to shadows; the = shadows are then=0A * threaded on a singly-linked list of shadows with = the same hash value */=0A--- a/xen/arch/x86/mm/shadow/multi.c=0A+++ = b/xen/arch/x86/mm/shadow/multi.c=0A@@ -4606,141 +4606,7 @@ static void = sh_pagetable_dying(struct vc=0A #endif=0A =0A /****************************= **********************************************/=0A-/* Handling HVM guest = writes to pagetables */=0A-=0A-/* Translate a VA to an MFN, injecting a = page-fault if we fail */=0A-#define BAD_GVA_TO_GFN (~0UL)=0A-#define = BAD_GFN_TO_MFN (~1UL)=0A-#define READONLY_GFN (~2UL)=0A-static mfn_t = emulate_gva_to_mfn(struct vcpu *v,=0A- = unsigned long vaddr,=0A- struct sh_emulate_c= txt *sh_ctxt)=0A-{=0A- unsigned long gfn;=0A- struct page_info = *page;=0A- mfn_t mfn;=0A- p2m_type_t p2mt;=0A- uint32_t pfec =3D = PFEC_page_present | PFEC_write_access;=0A-=0A- /* Translate the VA to a = GFN */=0A- gfn =3D sh_gva_to_gfn(v, NULL, vaddr, &pfec);=0A- if ( = gfn =3D=3D INVALID_GFN )=0A- {=0A- if ( is_hvm_vcpu(v) )=0A- = hvm_inject_page_fault(pfec, vaddr);=0A- else=0A- = propagate_page_fault(vaddr, pfec);=0A- return _mfn(BAD_GVA_TO_GFN);= =0A- }=0A-=0A- /* Translate the GFN to an MFN */=0A- ASSERT(!pagin= g_locked_by_me(v->domain));=0A-=0A- page =3D get_page_from_gfn(v->domain= , gfn, &p2mt, P2M_ALLOC);=0A-=0A- /* Sanity checking */=0A- if ( = page =3D=3D NULL )=0A- {=0A- return _mfn(BAD_GFN_TO_MFN);=0A- = }=0A- if ( p2m_is_discard_write(p2mt) )=0A- {=0A- put_page(pag= e);=0A- return _mfn(READONLY_GFN);=0A- }=0A- if ( !p2m_is_ram(= p2mt) )=0A- {=0A- put_page(page);=0A- return _mfn(BAD_GFN_= TO_MFN);=0A- }=0A- mfn =3D page_to_mfn(page);=0A- ASSERT(mfn_valid= (mfn));=0A-=0A- v->arch.paging.last_write_was_pt =3D !!sh_mfn_is_a_page_= table(mfn);=0A- /* Note shadow cannot page out or unshare this mfn, so = the map won't=0A- * disappear. Otherwise, caller must hold onto page = until done. */=0A- put_page(page);=0A- return mfn;=0A-}=0A-=0A-/* = Check that the user is allowed to perform this write.=0A- * Returns a = mapped pointer to write to, or NULL for error. */=0A-#define MAPPING_UNHAND= LEABLE ((void *)(unsigned long)X86EMUL_UNHANDLEABLE)=0A-#define MAPPING_EXC= EPTION ((void *)(unsigned long)X86EMUL_EXCEPTION)=0A-#define MAPPING_SIL= ENT_FAIL ((void *)(unsigned long)X86EMUL_OKAY)=0A-#define emulate_map_dest= _failed(rc) ((unsigned long)(rc) <=3D 3)=0A-static void *emulate_map_dest(s= truct vcpu *v,=0A- unsigned long vaddr,=0A- = u32 bytes,=0A- = struct sh_emulate_ctxt *sh_ctxt)=0A-{=0A- struct domain *d =3D = v->domain;=0A- void *map =3D NULL;=0A-=0A- sh_ctxt->mfn1 =3D = emulate_gva_to_mfn(v, vaddr, sh_ctxt);=0A- if ( !mfn_valid(sh_ctxt->mfn1= ) )=0A- return ((mfn_x(sh_ctxt->mfn1) =3D=3D BAD_GVA_TO_GFN) ?=0A- = MAPPING_EXCEPTION :=0A- (mfn_x(sh_ctxt->mfn1) = =3D=3D READONLY_GFN) ?=0A- MAPPING_SILENT_FAIL : MAPPING_UNH= ANDLEABLE);=0A-=0A-#ifndef NDEBUG=0A- /* We don't emulate user-mode = writes to page tables */=0A- if ( hvm_get_seg_reg(x86_seg_ss, sh_ctxt)->= attr.fields.dpl =3D=3D 3 )=0A- {=0A- gdprintk(XENLOG_DEBUG, = "User-mode write to pagetable reached "=0A- "emulate_map_de= st(). This should never happen!\n");=0A- return MAPPING_UNHANDLEABLE= ;=0A- }=0A-#endif=0A-=0A- /* Unaligned writes mean probably this = isn't a pagetable */=0A- if ( vaddr & (bytes - 1) )=0A- = sh_remove_shadows(d, sh_ctxt->mfn1, 0, 0 /* Slow, can fail */ );=0A-=0A- = if ( likely(((vaddr + bytes - 1) & PAGE_MASK) =3D=3D (vaddr & PAGE_MASK)) = )=0A- {=0A- /* Whole write fits on a single page */=0A- = sh_ctxt->mfn2 =3D _mfn(INVALID_MFN);=0A- map =3D map_domain_page(sh_= ctxt->mfn1) + (vaddr & ~PAGE_MASK);=0A- }=0A- else=0A- {=0A- = mfn_t mfns[2];=0A-=0A- /* Cross-page emulated writes are only = supported for HVM guests;=0A- * PV guests ought to know better = */=0A- if ( !is_hvm_domain(d) )=0A- return MAPPING_UNHAND= LEABLE;=0A-=0A- /* This write crosses a page boundary. Translate = the second page */=0A- sh_ctxt->mfn2 =3D emulate_gva_to_mfn(v, = (vaddr + bytes - 1) & PAGE_MASK,=0A- = sh_ctxt);=0A- if ( !mfn_valid(sh_ctxt->mfn2) )=0A- = return ((mfn_x(sh_ctxt->mfn2) =3D=3D BAD_GVA_TO_GFN) ?=0A- = MAPPING_EXCEPTION :=0A- (mfn_x(sh_ctxt->mfn2) =3D=3D = READONLY_GFN) ?=0A- MAPPING_SILENT_FAIL : MAPPING_UNHAND= LEABLE);=0A-=0A- /* Cross-page writes mean probably not a pagetable = */=0A- sh_remove_shadows(d, sh_ctxt->mfn2, 0, 0 /* Slow, can fail = */ );=0A-=0A- mfns[0] =3D sh_ctxt->mfn1;=0A- mfns[1] =3D = sh_ctxt->mfn2;=0A- map =3D vmap(mfns, 2);=0A- if ( !map = )=0A- return MAPPING_UNHANDLEABLE;=0A- map +=3D (vaddr & = ~PAGE_MASK);=0A- }=0A-=0A-#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)= =0A- /* Remember if the bottom bit was clear, so we can choose not to = run=0A- * the change through the verify code if it's still clear = afterwards */=0A- sh_ctxt->low_bit_was_clear =3D map !=3D NULL && = !(*(u8 *)map & _PAGE_PRESENT);=0A-#endif=0A-=0A- return map;=0A-}=0A+/* = Handling guest writes to pagetables. */=0A =0A /* Tidy up after the = emulated write: mark pages dirty, verify the new=0A * contents, and undo = the mapping */=0A@@ -4749,8 +4615,6 @@ static void emulate_unmap_dest(struc= t vc=0A u32 bytes,=0A = struct sh_emulate_ctxt *sh_ctxt)=0A {=0A- u32 b1 =3D bytes, b2 = =3D 0, shflags;=0A-=0A ASSERT(mfn_valid(sh_ctxt->mfn1));=0A =0A /* = If we are writing lots of PTE-aligned zeros, might want to unshadow = */=0A@@ -4764,56 +4628,7 @@ static void emulate_unmap_dest(struct vc=0A = else=0A reset_early_unshadow(v);=0A =0A- /* We can avoid = re-verifying the page contents after the write if:=0A- * - it was no = larger than the PTE type of this pagetable;=0A- * - it was aligned to = the PTE boundaries; and=0A- * - _PAGE_PRESENT was clear before and = after the write. */=0A- shflags =3D mfn_to_page(sh_ctxt->mfn1)->shadow_f= lags;=0A-#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)=0A- if ( = sh_ctxt->low_bit_was_clear=0A- && !(*(u8 *)addr & _PAGE_PRESENT)=0A= - && ((!(shflags & SHF_32)=0A- /* Not shadowed = 32-bit: aligned 64-bit writes that leave=0A- * the present = bit unset are safe to ignore. */=0A- && ((unsigned long)addr = & 7) =3D=3D 0=0A- && bytes <=3D 8)=0A- ||=0A- = (!(shflags & (SHF_PAE|SHF_64))=0A- /* Not shadowed = PAE/64-bit: aligned 32-bit writes that=0A- * leave the = present bit unset are safe to ignore. */=0A- && ((unsigned = long)addr & 3) =3D=3D 0=0A- && bytes <=3D 4)) )=0A- {=0A- = /* Writes with this alignment constraint can't possibly cross pages = */=0A- ASSERT(!mfn_valid(sh_ctxt->mfn2));=0A- }=0A- else=0A-#e= ndif /* SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY */=0A- {=0A- if = ( unlikely(mfn_valid(sh_ctxt->mfn2)) )=0A- {=0A- /* = Validate as two writes, one to each page */=0A- b1 =3D = PAGE_SIZE - (((unsigned long)addr) & ~PAGE_MASK);=0A- b2 =3D = bytes - b1;=0A- ASSERT(b2 < bytes);=0A- }=0A- if = ( likely(b1 > 0) )=0A- sh_validate_guest_pt_write(v, sh_ctxt->mf= n1, addr, b1);=0A- if ( unlikely(b2 > 0) )=0A- sh_validat= e_guest_pt_write(v, sh_ctxt->mfn2, addr + b1, b2);=0A- }=0A-=0A- = paging_mark_dirty(v->domain, mfn_x(sh_ctxt->mfn1));=0A-=0A- if ( = unlikely(mfn_valid(sh_ctxt->mfn2)) )=0A- {=0A- paging_mark_dirty(= v->domain, mfn_x(sh_ctxt->mfn2));=0A- vunmap((void *)((unsigned = long)addr & PAGE_MASK));=0A- }=0A- else=0A- unmap_domain_page(= addr);=0A-=0A- atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_ve= rsion);=0A+ sh_emulate_unmap_dest(v, addr, bytes, sh_ctxt);=0A }=0A =0A = static int=0A@@ -4826,8 +4641,8 @@ sh_x86_emulate_write(struct vcpu *v, = uns=0A if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v) )=0A = return X86EMUL_UNHANDLEABLE;=0A =0A- addr =3D emulate_map_dest(v, = vaddr, bytes, sh_ctxt);=0A- if ( emulate_map_dest_failed(addr) )=0A+ = addr =3D sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt);=0A+ if ( = sh_emulate_map_dest_failed(addr) )=0A return (long)addr;=0A =0A = paging_lock(v->domain);=0A@@ -4868,8 +4683,8 @@ sh_x86_emulate_cmpxchg(str= uct vcpu *v, u=0A if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v) )=0A = return X86EMUL_UNHANDLEABLE;=0A =0A- addr =3D emulate_map_dest(v,= vaddr, bytes, sh_ctxt);=0A- if ( emulate_map_dest_failed(addr) )=0A+ = addr =3D sh_emulate_map_dest(v, vaddr, bytes, sh_ctxt);=0A+ if ( = sh_emulate_map_dest_failed(addr) )=0A return (long)addr;=0A =0A = paging_lock(v->domain);=0A--- a/xen/arch/x86/mm/shadow/private.h=0A+++ = b/xen/arch/x86/mm/shadow/private.h=0A@@ -393,6 +393,17 @@ void shadow_updat= e_paging_modes(struct v=0A * With user_only =3D=3D 1, unhooks only the = user-mode mappings. */=0A void shadow_unhook_mappings(struct domain *d, = mfn_t smfn, int user_only);=0A =0A+/* Returns a mapped pointer to write = to, or one of the following error=0A+ * indicators. */=0A+#define = MAPPING_UNHANDLEABLE ((void *)(unsigned long)X86EMUL_UNHANDLEABLE)=0A+#defi= ne MAPPING_EXCEPTION ((void *)(unsigned long)X86EMUL_EXCEPTION)=0A+#defi= ne MAPPING_SILENT_FAIL ((void *)(unsigned long)X86EMUL_OKAY)=0A+#define = sh_emulate_map_dest_failed(rc) ((unsigned long)(rc) <=3D 3)=0A+void = *sh_emulate_map_dest(struct vcpu *v, unsigned long vaddr,=0A+ = unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt);=0A+void = sh_emulate_unmap_dest(struct vcpu *v, void *addr, unsigned int bytes,=0A+ = struct sh_emulate_ctxt *sh_ctxt);=0A+=0A #if = (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)=0A /* Allow a shadowed page to = go out of sync */=0A int sh_unsync(struct vcpu *v, mfn_t gmfn);=0A --=__Part35028FA2.0__= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: base64 Content-Disposition: inline X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KWGVuLWRldmVs IG1haWxpbmcgbGlzdApYZW4tZGV2ZWxAbGlzdHMueGVuLm9yZwpodHRwOi8vbGlzdHMueGVuLm9y Zy94ZW4tZGV2ZWwK --=__Part35028FA2.0__=--