All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Huaitong Han <huaitong.han@intel.com>,
	jbeulich@suse.com, jun.nakajima@intel.com, eddie.dong@intel.com,
	kevin.tian@intel.com, george.dunlap@eu.citrix.com,
	ian.jackson@eu.citrix.com, stefano.stabellini@eu.citrix.com,
	ian.campbell@citrix.com, wei.liu2@citrix.com, keir@xen.org
Cc: xen-devel@lists.xen.org
Subject: Re: [PATCH 08/10] x86/hvm: pkeys, add pkeys support for do_page_fault
Date: Mon, 16 Nov 2015 15:25:37 +0000	[thread overview]
Message-ID: <5649F571.607@citrix.com> (raw)
In-Reply-To: <1447669917-17939-9-git-send-email-huaitong.han@intel.com>

On 16/11/15 10:31, Huaitong Han wrote:
> This patch adds pkeys support for do_page_fault.
>
> the protection keys architecture define a new status bit in the PFEC. PFEC.PK
> (bit 5) is set to 1 if an only if protection keys block the access.
>
> Protection keys block an access and induce a page fault if and only if
> 1.Protection keys are enabled (CR4.PKE=1 and EFER.LMA=1), and
> 2.The page has a valid translation (page is present with no reserved bit
>   violations), and
> 3.The access is not an instruction fetch, and
> 4.The access is to a user page, and
> 5.At least one of the following restrictions apply:
>     --The access is a data read or data write and AD=1
>     --The access is a data write and WD=1 and either CR0.WP=1 or (CR0.WP=0 and
>     it is a user access)
>
> Signed-off-by: Huaitong Han <huaitong.han@intel.com>
>
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 9f5a6c6..73abb3b 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -1287,7 +1287,7 @@ enum pf_type {
>      spurious_fault
>  };
>  
> -static enum pf_type __page_fault_type(
> +static enum pf_type __page_fault_type(struct vcpu *vcpu,
>      unsigned long addr, const struct cpu_user_regs *regs)
>  {
>      unsigned long mfn, cr3 = read_cr3();
> @@ -1295,7 +1295,7 @@ static enum pf_type __page_fault_type(
>      l3_pgentry_t l3e, *l3t;
>      l2_pgentry_t l2e, *l2t;
>      l1_pgentry_t l1e, *l1t;
> -    unsigned int required_flags, disallowed_flags, page_user;
> +    unsigned int required_flags, disallowed_flags, page_user, pte_pkeys;
>      unsigned int error_code = regs->error_code;
>  
>      /*hvm_wp_enabled
>
> @@ -1340,6 +1340,7 @@ static enum pf_type __page_fault_type(
>      if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) ||
>           (l3e_get_flags(l3e) & disallowed_flags) )
>          return real_fault;
> +    pte_pkeys = l3e_get_pkeys(l3e);
>      page_user &= l3e_get_flags(l3e);
>      if ( l3e_get_flags(l3e) & _PAGE_PSE )
>          goto leaf;
> @@ -1351,6 +1352,7 @@ static enum pf_type __page_fault_type(
>      if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
>           (l2e_get_flags(l2e) & disallowed_flags) )
>          return real_fault;
> +    pte_pkeys = l2e_get_pkeys(l2e);
>      page_user &= l2e_get_flags(l2e);
>      if ( l2e_get_flags(l2e) & _PAGE_PSE )
>          goto leaf;
> @@ -1362,12 +1364,22 @@ static enum pf_type __page_fault_type(
>      if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
>           (l1e_get_flags(l1e) & disallowed_flags) )
>          return real_fault;
> +    pte_pkeys = l1e_get_pkeys(l1e);
>      page_user &= l1e_get_flags(l1e);
>  
>  leaf:
>      if ( page_user )
>      {
>          unsigned long cr4 = read_cr4();
> +        unsigned int ff, wf, uf, rsvdf, pkuf;
> +        unsigned int pkru_ad, pkru_wd;
> +
> +        uf = error_code & PFEC_user_mode;
> +        wf = error_code & PFEC_write_access;
> +        rsvdf = error_code & PFEC_reserved_bit;
> +        ff = error_code & PFEC_insn_fetch;
> +        pkuf = error_code & PFEC_protection_key;

These should be bool_t's rather than unsigned int, but I don't really
see the point of breaking them all out.

> +
>          /*
>           * Supervisor Mode Execution Prevention (SMEP):
>           * Disallow supervisor execution from user-accessible mappings
> @@ -1386,15 +1398,35 @@ leaf:
>           *   - CPL=3 or X86_EFLAGS_AC is clear
>           *   - Page fault in kernel mode
>           */
> -        if ( (cr4 & X86_CR4_SMAP) && !(error_code & PFEC_user_mode) &&
> +        if ( (cr4 & X86_CR4_SMAP) && !uf &&
>               (((regs->cs & 3) == 3) || !(regs->eflags & X86_EFLAGS_AC)) )
>              return smap_fault;
> +         /*
> +         * PKU:  additional mechanism by which the paging controls
> +         * access to user-mode addresses based on the value in the
> +         * PKRU register. A fault is considered as a PKU violation if all
> +         * of the following conditions are ture:
> +         * 1.CR4_PKE=1.
> +         * 2.EFER_LMA=1.
> +         * 3.page is present with no reserved bit violations.
> +         * 4.the access is not an instruction fetch.
> +         * 5.the access is to a user page.
> +         * 6.PKRU.AD=1
> +         *       or The access is a data write and PKRU.WD=1
> +         *           and either CR0.WP=1 or it is a user access.
> +         */
> +         pkru_ad = READ_PKRU_AD(pte_pkeys);
> +         pkru_wd = READ_PKRU_AD(pte_pkeys);
> +         if ( pkuf && (cr4 & X86_CR4_PKE) && hvm_long_mode_enabled(vcpu) &&
> +             !rsvdf && !ff && (pkru_ad ||
> +             (pkru_wd && wf && (hvm_wp_enabled(vcpu) || uf))))
> +             return real_fault;

This has clearly never been tested on a non-Skylake processor.  The
rdpkru instruction must be after the CR4_PKE check, or it will kill Xen
with a #UD fault.

The hvm_long_mode_enabled() and hvm_wp_enabled() are not valid without a
has_hvm_container() check.  However, you on a PV-only codepath so can
guarantee that the vcpu is not an hvm one (All HVM pagefaults are
handled via paging_fault()).  You can also guarantee that CR0.WP is 1,
and don't actually need to pass a vcpu pointer into here at at all.

>      }
>  
>      return spurious_fault;
>  }
>  
> -static enum pf_type spurious_page_fault(
> +static enum pf_type spurious_page_fault(struct vcpu *vcpu,
>      unsigned long addr, const struct cpu_user_regs *regs)
>  {
>      unsigned long flags;
> @@ -1405,7 +1437,7 @@ static enum pf_type spurious_page_fault(
>       * page tables from becoming invalid under our feet during the walk.
>       */
>      local_irq_save(flags);
> -    pf_type = __page_fault_type(addr, regs);
> +    pf_type = __page_fault_type(vcpu, addr, regs);
>      local_irq_restore(flags);
>  
>      return pf_type;
> @@ -1479,6 +1511,7 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
>   *  Bit 2: User mode (=1) ; Supervisor mode (=0)
>   *  Bit 3: Reserved bit violation
>   *  Bit 4: Instruction fetch
> + *  Bit 5: Protection-key violations
>   */
>  void do_page_fault(struct cpu_user_regs *regs)
>  {
> @@ -1500,7 +1533,7 @@ void do_page_fault(struct cpu_user_regs *regs)
>  
>      if ( unlikely(!guest_mode(regs)) )
>      {
> -        pf_type = spurious_page_fault(addr, regs);
> +        pf_type = spurious_page_fault(current, addr, regs);
>          if ( (pf_type == smep_fault) || (pf_type == smap_fault) )
>          {
>              console_start_sync();
> @@ -1533,7 +1566,7 @@ void do_page_fault(struct cpu_user_regs *regs)
>  
>      if ( unlikely(current->domain->arch.suppress_spurious_page_faults) )
>      {
> -        pf_type = spurious_page_fault(addr, regs);
> +        pf_type = spurious_page_fault(current, addr, regs);
>          if ( (pf_type == smep_fault) || (pf_type == smap_fault))
>          {
>              printk(XENLOG_G_ERR "%pv fatal SM%cP violation\n",
> diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
> index 427eb84..e8090fb 100644
> --- a/xen/include/asm-x86/processor.h
> +++ b/xen/include/asm-x86/processor.h
> @@ -135,13 +135,14 @@
>  #define TF_kernel_mode         (1<<_TF_kernel_mode)
>  
>  /* #PF error code values. */
> -#define PFEC_page_present   (1U<<0)
> -#define PFEC_write_access   (1U<<1)
> -#define PFEC_user_mode      (1U<<2)
> -#define PFEC_reserved_bit   (1U<<3)
> -#define PFEC_insn_fetch     (1U<<4)
> -#define PFEC_page_paged     (1U<<5)
> -#define PFEC_page_shared    (1U<<6)
> +#define PFEC_page_present       (1U<<0)
> +#define PFEC_write_access       (1U<<1)
> +#define PFEC_user_mode          (1U<<2)
> +#define PFEC_reserved_bit       (1U<<3)
> +#define PFEC_insn_fetch         (1U<<4)
> +#define PFEC_protection_key     (1U<<5)
> +#define PFEC_page_paged         (1U<<6)
> +#define PFEC_page_shared        (1U<<7)

Again, this needs a rebase, and you will find that PFEC_prot_key is
already included.

~Andrew

  reply	other threads:[~2015-11-16 15:25 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-16 10:31 [PATCH 00/10] x86/hvm: pkeys, add memory protection-key support Huaitong Han
2015-11-16 10:31 ` [PATCH 01/10] x86/hvm: pkeys, add pkeys support for cpuid handling Huaitong Han
2015-11-16 12:00   ` Andrew Cooper
2015-11-19 14:39     ` Wu, Feng
2015-11-16 16:58   ` Wei Liu
2015-11-16 10:31 ` [PATCH 02/10] x86/hvm: pkeys, add pku support for x86_capability Huaitong Han
2015-11-16 13:35   ` Andrew Cooper
2015-11-16 10:31 ` [PATCH 03/10] x86/hvm: pkeys, add the flag to enable Memory Protection Keys Huaitong Han
2015-11-16 13:56   ` Andrew Cooper
2015-11-16 10:31 ` [PATCH 04/10] x86/hvm: pkeys, add pkeys support when setting CR4 Huaitong Han
2015-11-16 14:02   ` Andrew Cooper
2015-11-20  1:16   ` Wu, Feng
2015-11-20 10:41     ` Andrew Cooper
2015-11-16 10:31 ` [PATCH 05/10] x86/hvm: pkeys, disable pkeys for guests in non-paging mode Huaitong Han
2015-11-16 14:03   ` Andrew Cooper
2015-11-16 10:31 ` [PATCH 06/10] x86/hvm: pkeys, add functions to get pkeys value from PTE Huaitong Han
2015-11-16 14:16   ` Andrew Cooper
2015-11-16 14:42     ` Jan Beulich
2015-11-16 10:31 ` [PATCH 07/10] x86/hvm: pkeys, add functions to support PKRU access/write Huaitong Han
2015-11-16 15:09   ` Andrew Cooper
2015-11-16 10:31 ` [PATCH 08/10] x86/hvm: pkeys, add pkeys support for do_page_fault Huaitong Han
2015-11-16 15:25   ` Andrew Cooper [this message]
2015-11-16 10:31 ` [PATCH 09/10] x86/hvm: pkeys, add pkeys support for guest_walk_tables Huaitong Han
2015-11-16 16:52   ` Andrew Cooper
2015-11-16 16:59   ` Andrew Cooper
2015-11-16 10:31 ` [PATCH 10/10] x86/hvm: pkeys, add xstate support for pkeys Huaitong Han
2015-11-16 16:52   ` Andrew Cooper
2015-11-16 17:45 ` [PATCH 00/10] x86/hvm: pkeys, add memory protection-key support Andrew Cooper
2015-11-17 10:26   ` Jan Beulich
2015-11-17 16:24     ` Andrew Cooper
2015-11-17 16:36       ` Jan Beulich
2015-11-18  9:12     ` Wu, Feng
2015-11-18 10:10       ` Andrew Cooper
2015-11-19  7:44         ` Wu, Feng
2015-11-19  8:44           ` Jan Beulich
2015-11-19  8:49             ` Wu, Feng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5649F571.607@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=huaitong.han@intel.com \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=keir@xen.org \
    --cc=kevin.tian@intel.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.