linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
@ 2014-10-27  7:16 张静(长谷)
  2014-10-27 15:45 ` Andy Lutomirski
  0 siblings, 1 reply; 9+ messages in thread
From: 张静(长谷) @ 2014-10-27  7:16 UTC (permalink / raw)
  To: 'Andy Lutomirski', 'Peter Zijlstra'
  Cc: 'Valdis Kletnieks',
	linux-kernel, 'Paul Mackerras',
	'Arnaldo Carvalho de Melo', 'Ingo Molnar',
	'Kees Cook', 'Andrea Arcangeli',
	'Vince Weaver'

> 
> We currently allow any process to use rdpmc.  This significantly
> weakens the protection offered by PR_TSC_DISABLED, and it could be
> helpful to users attempting to exploit timing attacks.
> 
> Since we can't enable access to individual counters, use a very
> coarse heuristic to limit access to rdpmc: allow access only when
> a perf_event is mmapped.  This protects seccomp sandboxes.
> 
> There is plenty of room to further tighen these restrictions.  For
> example, this allows rdpmc for any x86_pmu event, but it's only
> useful for self-monitoring tasks.
> 
> As a side effect, cap_user_rdpmc will now be false for AMD uncore
> events.  This isn't a real regression, since .event_idx is disabled
> for these events anyway for the time being.  Whenever that gets
> re-added, the cap_user_rdpmc code can be adjusted or refactored
> accordingly.
> 
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> ---
>  arch/x86/include/asm/mmu.h         |  2 ++
>  arch/x86/include/asm/mmu_context.h | 16 +++++++++++
>  arch/x86/kernel/cpu/perf_event.c   | 57 +++++++++++++++++++++++++-------------
>  arch/x86/kernel/cpu/perf_event.h   |  2 ++
>  4 files changed, 58 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 876e74e8eec7..09b9620a73b4 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -19,6 +19,8 @@ typedef struct {
> 
>  	struct mutex lock;
>  	void __user *vdso;
> +
> +	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
>  } mm_context_t;
> 
>  #ifdef CONFIG_SMP
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index 23697f74b372..ccad8d616038 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
>  }
>  #endif	/* !CONFIG_PARAVIRT */
> 
> +#ifdef CONFIG_PERF_EVENTS
> +static inline void load_mm_cr4(struct mm_struct *mm)
> +{
> +	if (atomic_read(&mm->context.perf_rdpmc_allowed))
> +		cr4_set_bits(X86_CR4_PCE);
> +	else
> +		cr4_clear_bits(X86_CR4_PCE);
> +}
> +#else
> +static inline void load_mm_cr4(struct mm_struct *mm) {}
> +#endif
> +
>  /*
>   * Used for LDT copy/destruction.
>   */
> @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
>  		/* Stop flush ipis for the previous mm */
>  		cpumask_clear_cpu(cpu, mm_cpumask(prev));
> 
> +		/* Load per-mm CR4 state */
> +		load_mm_cr4(next);
> +
>  		/*
>  		 * Load the LDT, if the LDT is different.
>  		 *
> @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
>  			 */
>  			load_cr3(next->pgd);
>  			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> +			load_mm_cr4(next);
>  			load_LDT_nolock(&next->context);
>  		}
>  	}
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 00fbab7aa587..3e875b3b30f2 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -31,6 +31,7 @@
>  #include <asm/nmi.h>
>  #include <asm/smp.h>
>  #include <asm/alternative.h>
> +#include <asm/mmu_context.h>
>  #include <asm/tlbflush.h>
>  #include <asm/timer.h>
>  #include <asm/desc.h>
> @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
>  		break;
> 
>  	case CPU_STARTING:
> -		if (x86_pmu.attr_rdpmc)
> -			cr4_set_bits(X86_CR4_PCE);
>  		if (x86_pmu.cpu_starting)
>  			x86_pmu.cpu_starting(cpu);
>  		break;
> @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
>  			event->destroy(event);
>  	}
> 
> +	if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> +		event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> +
>  	return err;
>  }
> 
> +static void refresh_pce(void *ignored)
> +{
> +	if (current->mm)
> +		load_mm_cr4(current->mm);
> +}
> +
> +static void x86_pmu_event_mapped(struct perf_event *event)
> +{
> +	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> +		return;
> +
> +	if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
> +		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> +}
> +
> +static void x86_pmu_event_unmapped(struct perf_event *event)
> +{
> +	if (!current->mm)
> +		return;
> +
> +	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> +		return;
> +
> +	if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
> +		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);

The current task(T-a on CPU A) is asking CPUs(A, B, C, D) to refresh pce, and looks
the current task(T-d on CPU D) is disturbed if T-d loaded CR4 when going on CPU D.

Hillf
> +}
> +
>  static int x86_pmu_event_idx(struct perf_event *event)
>  {
>  	int idx = event->hw.idx;
> 
> -	if (!x86_pmu.attr_rdpmc)
> +	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
>  		return 0;
> 
>  	if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
> @@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
>  	return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
>  }
> 
> -static void change_rdpmc(void *info)
> -{
> -	bool enable = !!(unsigned long)info;
> -
> -	if (enable)
> -		cr4_set_bits(X86_CR4_PCE);
> -	else
> -		cr4_clear_bits(X86_CR4_PCE);
> -}
> -
>  static ssize_t set_attr_rdpmc(struct device *cdev,
>  			      struct device_attribute *attr,
>  			      const char *buf, size_t count)
> @@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
>  	if (x86_pmu.attr_rdpmc_broken)
>  		return -ENOTSUPP;
> 
> -	if (!!val != !!x86_pmu.attr_rdpmc) {
> -		x86_pmu.attr_rdpmc = !!val;
> -		on_each_cpu(change_rdpmc, (void *)val, 1);
> -	}
> -
> +	x86_pmu.attr_rdpmc = !!val;
>  	return count;
>  }
> 
> @@ -1909,6 +1924,9 @@ static struct pmu pmu = {
> 
>  	.event_init		= x86_pmu_event_init,
> 
> +	.event_mapped		= x86_pmu_event_mapped,
> +	.event_unmapped		= x86_pmu_event_unmapped,
> +
>  	.add			= x86_pmu_add,
>  	.del			= x86_pmu_del,
>  	.start			= x86_pmu_start,
> @@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
> 
>  	userpg->cap_user_time = 0;
>  	userpg->cap_user_time_zero = 0;
> -	userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
> +	userpg->cap_user_rdpmc =
> +		!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
>  	userpg->pmc_width = x86_pmu.cntval_bits;
> 
>  	if (!sched_clock_stable())
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index d98a34d435d7..f6868186e67b 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -71,6 +71,8 @@ struct event_constraint {
>  #define PERF_X86_EVENT_COMMITTED	0x8 /* event passed commit_txn */
>  #define PERF_X86_EVENT_PEBS_LD_HSW	0x10 /* haswell style datala, load */
>  #define PERF_X86_EVENT_PEBS_NA_HSW	0x20 /* haswell style datala, unknown */
> +#define PERF_X86_EVENT_RDPMC_ALLOWED	0x40 /* grant rdpmc permission */
> +
> 
>  struct amd_nb {
>  	int nb_id;  /* NorthBridge id */
> --
> 1.9.3


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
  2014-10-27  7:16 [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped 张静(长谷)
@ 2014-10-27 15:45 ` Andy Lutomirski
  2014-10-28  3:35   ` Hillf Danton
  0 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-27 15:45 UTC (permalink / raw)
  To: hillf. zj
  Cc: Peter Zijlstra, Ingo Molnar, Vince Weaver, Paul Mackerras,
	Kees Cook, Arnaldo Carvalho de Melo, Andrea Arcangeli,
	linux-kernel, Valdis Kletnieks

On Oct 27, 2014 12:17 AM, "张静(长谷)" <hillf.zj@alibaba-inc.com> wrote:
>
> >
> > We currently allow any process to use rdpmc.  This significantly
> > weakens the protection offered by PR_TSC_DISABLED, and it could be
> > helpful to users attempting to exploit timing attacks.
> >
> > Since we can't enable access to individual counters, use a very
> > coarse heuristic to limit access to rdpmc: allow access only when
> > a perf_event is mmapped.  This protects seccomp sandboxes.
> >
> > There is plenty of room to further tighen these restrictions.  For
> > example, this allows rdpmc for any x86_pmu event, but it's only
> > useful for self-monitoring tasks.
> >
> > As a side effect, cap_user_rdpmc will now be false for AMD uncore
> > events.  This isn't a real regression, since .event_idx is disabled
> > for these events anyway for the time being.  Whenever that gets
> > re-added, the cap_user_rdpmc code can be adjusted or refactored
> > accordingly.
> >
> > Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> > ---
> >  arch/x86/include/asm/mmu.h         |  2 ++
> >  arch/x86/include/asm/mmu_context.h | 16 +++++++++++
> >  arch/x86/kernel/cpu/perf_event.c   | 57 +++++++++++++++++++++++++-------------
> >  arch/x86/kernel/cpu/perf_event.h   |  2 ++
> >  4 files changed, 58 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> > index 876e74e8eec7..09b9620a73b4 100644
> > --- a/arch/x86/include/asm/mmu.h
> > +++ b/arch/x86/include/asm/mmu.h
> > @@ -19,6 +19,8 @@ typedef struct {
> >
> >       struct mutex lock;
> >       void __user *vdso;
> > +
> > +     atomic_t perf_rdpmc_allowed;    /* nonzero if rdpmc is allowed */
> >  } mm_context_t;
> >
> >  #ifdef CONFIG_SMP
> > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> > index 23697f74b372..ccad8d616038 100644
> > --- a/arch/x86/include/asm/mmu_context.h
> > +++ b/arch/x86/include/asm/mmu_context.h
> > @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
> >  }
> >  #endif       /* !CONFIG_PARAVIRT */
> >
> > +#ifdef CONFIG_PERF_EVENTS
> > +static inline void load_mm_cr4(struct mm_struct *mm)
> > +{
> > +     if (atomic_read(&mm->context.perf_rdpmc_allowed))
> > +             cr4_set_bits(X86_CR4_PCE);
> > +     else
> > +             cr4_clear_bits(X86_CR4_PCE);
> > +}
> > +#else
> > +static inline void load_mm_cr4(struct mm_struct *mm) {}
> > +#endif
> > +
> >  /*
> >   * Used for LDT copy/destruction.
> >   */
> > @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> >               /* Stop flush ipis for the previous mm */
> >               cpumask_clear_cpu(cpu, mm_cpumask(prev));
> >
> > +             /* Load per-mm CR4 state */
> > +             load_mm_cr4(next);
> > +
> >               /*
> >                * Load the LDT, if the LDT is different.
> >                *
> > @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> >                        */
> >                       load_cr3(next->pgd);
> >                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> > +                     load_mm_cr4(next);
> >                       load_LDT_nolock(&next->context);
> >               }
> >       }
> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > index 00fbab7aa587..3e875b3b30f2 100644
> > --- a/arch/x86/kernel/cpu/perf_event.c
> > +++ b/arch/x86/kernel/cpu/perf_event.c
> > @@ -31,6 +31,7 @@
> >  #include <asm/nmi.h>
> >  #include <asm/smp.h>
> >  #include <asm/alternative.h>
> > +#include <asm/mmu_context.h>
> >  #include <asm/tlbflush.h>
> >  #include <asm/timer.h>
> >  #include <asm/desc.h>
> > @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
> >               break;
> >
> >       case CPU_STARTING:
> > -             if (x86_pmu.attr_rdpmc)
> > -                     cr4_set_bits(X86_CR4_PCE);
> >               if (x86_pmu.cpu_starting)
> >                       x86_pmu.cpu_starting(cpu);
> >               break;
> > @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
> >                       event->destroy(event);
> >       }
> >
> > +     if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> > +             event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> > +
> >       return err;
> >  }
> >
> > +static void refresh_pce(void *ignored)
> > +{
> > +     if (current->mm)
> > +             load_mm_cr4(current->mm);
> > +}
> > +
> > +static void x86_pmu_event_mapped(struct perf_event *event)
> > +{
> > +     if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > +             return;
> > +
> > +     if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
> > +             on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> > +}
> > +
> > +static void x86_pmu_event_unmapped(struct perf_event *event)
> > +{
> > +     if (!current->mm)
> > +             return;
> > +
> > +     if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > +             return;
> > +
> > +     if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
> > +             on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
>
> The current task(T-a on CPU A) is asking CPUs(A, B, C, D) to refresh pce, and looks
> the current task(T-d on CPU D) is disturbed if T-d loaded CR4 when going on CPU D.

I don't understand.  This code is intended to interrupt only affected
tasks, except for a race if cpus switch mm while this code is running.
At worst, the race should only result in an unnecessary IPI.

Can you clarify your concern?

--Andy

>
> Hillf
> > +}
> > +
> >  static int x86_pmu_event_idx(struct perf_event *event)
> >  {
> >       int idx = event->hw.idx;
> >
> > -     if (!x86_pmu.attr_rdpmc)
> > +     if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> >               return 0;
> >
> >       if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
> > @@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
> >       return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
> >  }
> >
> > -static void change_rdpmc(void *info)
> > -{
> > -     bool enable = !!(unsigned long)info;
> > -
> > -     if (enable)
> > -             cr4_set_bits(X86_CR4_PCE);
> > -     else
> > -             cr4_clear_bits(X86_CR4_PCE);
> > -}
> > -
> >  static ssize_t set_attr_rdpmc(struct device *cdev,
> >                             struct device_attribute *attr,
> >                             const char *buf, size_t count)
> > @@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
> >       if (x86_pmu.attr_rdpmc_broken)
> >               return -ENOTSUPP;
> >
> > -     if (!!val != !!x86_pmu.attr_rdpmc) {
> > -             x86_pmu.attr_rdpmc = !!val;
> > -             on_each_cpu(change_rdpmc, (void *)val, 1);
> > -     }
> > -
> > +     x86_pmu.attr_rdpmc = !!val;
> >       return count;
> >  }
> >
> > @@ -1909,6 +1924,9 @@ static struct pmu pmu = {
> >
> >       .event_init             = x86_pmu_event_init,
> >
> > +     .event_mapped           = x86_pmu_event_mapped,
> > +     .event_unmapped         = x86_pmu_event_unmapped,
> > +
> >       .add                    = x86_pmu_add,
> >       .del                    = x86_pmu_del,
> >       .start                  = x86_pmu_start,
> > @@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
> >
> >       userpg->cap_user_time = 0;
> >       userpg->cap_user_time_zero = 0;
> > -     userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
> > +     userpg->cap_user_rdpmc =
> > +             !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
> >       userpg->pmc_width = x86_pmu.cntval_bits;
> >
> >       if (!sched_clock_stable())
> > diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> > index d98a34d435d7..f6868186e67b 100644
> > --- a/arch/x86/kernel/cpu/perf_event.h
> > +++ b/arch/x86/kernel/cpu/perf_event.h
> > @@ -71,6 +71,8 @@ struct event_constraint {
> >  #define PERF_X86_EVENT_COMMITTED     0x8 /* event passed commit_txn */
> >  #define PERF_X86_EVENT_PEBS_LD_HSW   0x10 /* haswell style datala, load */
> >  #define PERF_X86_EVENT_PEBS_NA_HSW   0x20 /* haswell style datala, unknown */
> > +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
> > +
> >
> >  struct amd_nb {
> >       int nb_id;  /* NorthBridge id */
> > --
> > 1.9.3
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
  2014-10-27 15:45 ` Andy Lutomirski
@ 2014-10-28  3:35   ` Hillf Danton
  2014-10-28  3:57     ` Andy Lutomirski
  0 siblings, 1 reply; 9+ messages in thread
From: Hillf Danton @ 2014-10-28  3:35 UTC (permalink / raw)
  To: 'Andy Lutomirski'
  Cc: 'Peter Zijlstra', 'Ingo Molnar',
	'Vince Weaver', 'Paul Mackerras',
	'Kees Cook', 'Arnaldo Carvalho de Melo',
	'Andrea Arcangeli',
	linux-kernel, 'Valdis Kletnieks'

> -----Original Message-----
> From: Andy Lutomirski [mailto:luto@amacapital.net]
> Sent: Monday, October 27, 2014 11:45 PM
> To: Hillf Danton
> Cc: Peter Zijlstra; Ingo Molnar; Vince Weaver; Paul Mackerras; Kees Cook; Arnaldo Carvalho de Melo; Andrea Arcangeli; linux-
> kernel@vger.kernel.org; Valdis Kletnieks
> Subject: Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
> 
> >
> > >
> > > We currently allow any process to use rdpmc.  This significantly
> > > weakens the protection offered by PR_TSC_DISABLED, and it could be
> > > helpful to users attempting to exploit timing attacks.
> > >
> > > Since we can't enable access to individual counters, use a very
> > > coarse heuristic to limit access to rdpmc: allow access only when
> > > a perf_event is mmapped.  This protects seccomp sandboxes.
> > >
> > > There is plenty of room to further tighen these restrictions.  For
> > > example, this allows rdpmc for any x86_pmu event, but it's only
> > > useful for self-monitoring tasks.
> > >
> > > As a side effect, cap_user_rdpmc will now be false for AMD uncore
> > > events.  This isn't a real regression, since .event_idx is disabled
> > > for these events anyway for the time being.  Whenever that gets
> > > re-added, the cap_user_rdpmc code can be adjusted or refactored
> > > accordingly.
> > >
> > > Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> > > ---
> > >  arch/x86/include/asm/mmu.h         |  2 ++
> > >  arch/x86/include/asm/mmu_context.h | 16 +++++++++++
> > >  arch/x86/kernel/cpu/perf_event.c   | 57 +++++++++++++++++++++++++-------------
> > >  arch/x86/kernel/cpu/perf_event.h   |  2 ++
> > >  4 files changed, 58 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> > > index 876e74e8eec7..09b9620a73b4 100644
> > > --- a/arch/x86/include/asm/mmu.h
> > > +++ b/arch/x86/include/asm/mmu.h
> > > @@ -19,6 +19,8 @@ typedef struct {
> > >
> > >       struct mutex lock;
> > >       void __user *vdso;
> > > +
> > > +     atomic_t perf_rdpmc_allowed;    /* nonzero if rdpmc is allowed */
> > >  } mm_context_t;
> > >
> > >  #ifdef CONFIG_SMP
> > > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> > > index 23697f74b372..ccad8d616038 100644
> > > --- a/arch/x86/include/asm/mmu_context.h
> > > +++ b/arch/x86/include/asm/mmu_context.h
> > > @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
> > >  }
> > >  #endif       /* !CONFIG_PARAVIRT */
> > >
> > > +#ifdef CONFIG_PERF_EVENTS
> > > +static inline void load_mm_cr4(struct mm_struct *mm)
> > > +{
> > > +     if (atomic_read(&mm->context.perf_rdpmc_allowed))
> > > +             cr4_set_bits(X86_CR4_PCE);
> > > +     else
> > > +             cr4_clear_bits(X86_CR4_PCE);
> > > +}
> > > +#else
> > > +static inline void load_mm_cr4(struct mm_struct *mm) {}
> > > +#endif
> > > +
> > >  /*
> > >   * Used for LDT copy/destruction.
> > >   */
> > > @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > >               /* Stop flush ipis for the previous mm */
> > >               cpumask_clear_cpu(cpu, mm_cpumask(prev));
> > >
> > > +             /* Load per-mm CR4 state */
> > > +             load_mm_cr4(next);
> > > +
> > >               /*
> > >                * Load the LDT, if the LDT is different.
> > >                *
> > > @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > >                        */
> > >                       load_cr3(next->pgd);
> > >                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> > > +                     load_mm_cr4(next);
> > >                       load_LDT_nolock(&next->context);
> > >               }
> > >       }
> > > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > > index 00fbab7aa587..3e875b3b30f2 100644
> > > --- a/arch/x86/kernel/cpu/perf_event.c
> > > +++ b/arch/x86/kernel/cpu/perf_event.c
> > > @@ -31,6 +31,7 @@
> > >  #include <asm/nmi.h>
> > >  #include <asm/smp.h>
> > >  #include <asm/alternative.h>
> > > +#include <asm/mmu_context.h>
> > >  #include <asm/tlbflush.h>
> > >  #include <asm/timer.h>
> > >  #include <asm/desc.h>
> > > @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
> > >               break;
> > >
> > >       case CPU_STARTING:
> > > -             if (x86_pmu.attr_rdpmc)
> > > -                     cr4_set_bits(X86_CR4_PCE);
> > >               if (x86_pmu.cpu_starting)
> > >                       x86_pmu.cpu_starting(cpu);
> > >               break;
> > > @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
> > >                       event->destroy(event);
> > >       }
> > >
> > > +     if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> > > +             event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> > > +
> > >       return err;
> > >  }
> > >
> > > +static void refresh_pce(void *ignored)
> > > +{
> > > +     if (current->mm)
> > > +             load_mm_cr4(current->mm);
> > > +}
> > > +
> > > +static void x86_pmu_event_mapped(struct perf_event *event)
> > > +{
> > > +     if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > > +             return;
> > > +
> > > +     if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
> > > +             on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> > > +}
> > > +
> > > +static void x86_pmu_event_unmapped(struct perf_event *event)
> > > +{
> > > +     if (!current->mm)
> > > +             return;
> > > +
> > > +     if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > > +             return;
> > > +
> > > +     if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
> > > +             on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> >
> > The current task(T-a on CPU A) is asking CPUs(A, B, C, D) to refresh pce, and looks
> > the current task(T-d on CPU D) is disturbed if T-d loaded CR4 when going on CPU D.
> 
> I don't understand.  This code is intended to interrupt only affected
> tasks, except for a race if cpus switch mm while this code is running.
> At worst, the race should only result in an unnecessary IPI.
> 
> Can you clarify your concern?
>

CPU D			CPU A
switch_mm
load_mm_cr4
			x86_pmu_event_unmapped

I wonder if the X86_CR4_PCE set on CPU D is
cleared by CPU A by broadcasting IPI.

Hillf



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
  2014-10-28  3:35   ` Hillf Danton
@ 2014-10-28  3:57     ` Andy Lutomirski
  2014-10-28  4:07       ` Hillf Danton
  0 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-28  3:57 UTC (permalink / raw)
  To: Hillf Danton
  Cc: Peter Zijlstra, Ingo Molnar, Vince Weaver, Paul Mackerras,
	Kees Cook, Arnaldo Carvalho de Melo, Andrea Arcangeli,
	linux-kernel, Valdis Kletnieks

On Mon, Oct 27, 2014 at 8:35 PM, Hillf Danton <hillf.zj@alibaba-inc.com> wrote:
>> -----Original Message-----
>> From: Andy Lutomirski [mailto:luto@amacapital.net]
>> Sent: Monday, October 27, 2014 11:45 PM
>> To: Hillf Danton
>> Cc: Peter Zijlstra; Ingo Molnar; Vince Weaver; Paul Mackerras; Kees Cook; Arnaldo Carvalho de Melo; Andrea Arcangeli; linux-
>> kernel@vger.kernel.org; Valdis Kletnieks
>> Subject: Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
>>
> CPU D                   CPU A
> switch_mm
> load_mm_cr4
>                         x86_pmu_event_unmapped
>
> I wonder if the X86_CR4_PCE set on CPU D is
> cleared by CPU A by broadcasting IPI.
>

It should be okay.  The IPI does:

+       if (current->mm)
+               load_mm_cr4(current->mm);

which refers to the current task running on the targetted CPU, not to
the IPI sender's task.  So, if it happens after a context switch, it
will harmlessly reload the new task's cr4.

refresh_pce can't happen in between switch_mm and updating current,
since irqs are off for the entire duration of the context switch.

--Andy

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
  2014-10-28  3:57     ` Andy Lutomirski
@ 2014-10-28  4:07       ` Hillf Danton
  2014-10-28  4:27         ` Andy Lutomirski
  0 siblings, 1 reply; 9+ messages in thread
From: Hillf Danton @ 2014-10-28  4:07 UTC (permalink / raw)
  To: 'Andy Lutomirski'
  Cc: 'Peter Zijlstra', 'Ingo Molnar',
	'Vince Weaver', 'Paul Mackerras',
	'Kees Cook', 'Arnaldo Carvalho de Melo',
	'Andrea Arcangeli',
	linux-kernel, 'Valdis Kletnieks'

> >> Subject: Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
> >>
> > CPU D                   CPU A
> > switch_mm
> > load_mm_cr4
> >                         x86_pmu_event_unmapped
> >
> > I wonder if the X86_CR4_PCE set on CPU D is
> > cleared by CPU A by broadcasting IPI.
> >
> 
> It should be okay.  The IPI does:
> 
> +       if (current->mm)
> +               load_mm_cr4(current->mm);
> 
> which refers to the current task running on the targetted CPU, not to
> the IPI sender's task.  So, if it happens after a context switch, it
> will harmlessly reload the new task's cr4.
>
Right, but prev != next is checked in switch_mm.
 
Hillf



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
  2014-10-28  4:07       ` Hillf Danton
@ 2014-10-28  4:27         ` Andy Lutomirski
  0 siblings, 0 replies; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-28  4:27 UTC (permalink / raw)
  To: Hillf Danton
  Cc: Peter Zijlstra, Ingo Molnar, Vince Weaver, Paul Mackerras,
	Kees Cook, Arnaldo Carvalho de Melo, Andrea Arcangeli,
	linux-kernel, Valdis Kletnieks

On Mon, Oct 27, 2014 at 9:07 PM, Hillf Danton <hillf.zj@alibaba-inc.com> wrote:
>> >> Subject: Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
>> >>
>> > CPU D                   CPU A
>> > switch_mm
>> > load_mm_cr4
>> >                         x86_pmu_event_unmapped
>> >
>> > I wonder if the X86_CR4_PCE set on CPU D is
>> > cleared by CPU A by broadcasting IPI.
>> >
>>
>> It should be okay.  The IPI does:
>>
>> +       if (current->mm)
>> +               load_mm_cr4(current->mm);
>>
>> which refers to the current task running on the targetted CPU, not to
>> the IPI sender's task.  So, if it happens after a context switch, it
>> will harmlessly reload the new task's cr4.
>>
> Right, but prev != next is checked in switch_mm.

If that happens and !cpumask_test_cpu(cpu, mm_cpumask(next)), then cr4
will be reloaded.  So, in the case you described, we should still be
okay.

It's worth checking whether a more complicated race could be a
problem.  I think it's okay.  Here's my argument.

In x86_pmu_event_mapped / x86_pmu_event_unmapped, for each cpu, either
that cpu is set in mm_cpumask or it's clear.  If it's set, then we'll
send the IPI and that cpu is guaranteed to be updated.  If it's clear,
then it must become set before any user code in this mm can be
executed.  There are no paths through switch_mm that set the bit in
mm_cpumask without reloading cr4, so we should be safe.

Is that convincing?

--Andy

>
> Hillf
>
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
  2014-10-31 17:54   ` Paolo Bonzini
@ 2014-10-31 18:25     ` Andy Lutomirski
  0 siblings, 0 replies; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-31 18:25 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Peter Zijlstra, Ingo Molnar, hillf.zj, Vince Weaver,
	linux-kernel, Paul Mackerras, Kees Cook,
	Arnaldo Carvalho de Melo, Andrea Arcangeli, Valdis Kletnieks

On Oct 31, 2014 10:54 AM, "Paolo Bonzini" <pbonzini@redhat.com> wrote:
>
> On 25/10/2014 00:58, Andy Lutomirski wrote:
> > We currently allow any process to use rdpmc.  This significantly
> > weakens the protection offered by PR_TSC_DISABLED, and it could be
> > helpful to users attempting to exploit timing attacks.
> >
> > Since we can't enable access to individual counters, use a very
> > coarse heuristic to limit access to rdpmc: allow access only when
> > a perf_event is mmapped.  This protects seccomp sandboxes.
> >
> > There is plenty of room to further tighen these restrictions.  For
> > example, this allows rdpmc for any x86_pmu event, but it's only
> > useful for self-monitoring tasks.
> >
> > As a side effect, cap_user_rdpmc will now be false for AMD uncore
> > events.  This isn't a real regression, since .event_idx is disabled
> > for these events anyway for the time being.  Whenever that gets
> > re-added, the cap_user_rdpmc code can be adjusted or refactored
> > accordingly.
> >
> > Signed-off-by: Andy Lutomirski <luto@amacapital.net>
>
> What's the impact of this if the host doesn't have "x86,kvm,vmx:
> Preserve CR4 across VM entry"?

Do you mean if these patches are applied in a guest and the host isn't
fixed?  I think it shoudn't make a difference.  As far as I know,
there was never anything wrong with KVM's handling of the guest's cr4
value, at least under VMX.  I haven't looked at SVM much.

If you apply this on the host without the fix, then you'll have some
conflicts, and, if you fix those conflicts, then you'll exacerbate the
VMX bug, because perf_event will trigger it, too.  You'll also have
unprivileged-host-user-triggerable IPIs that change cr4, resulting in
lovely cross-CPU races.  This is why I arranged the fix so that the
entire window during which a host cr4 change would get lost happens
with interrupts off.

This is actually how I found the bug in the first place.  I was trying
to convince myself that these patches weren't racy, so I looked at all
of the cr4-manipulation code I could find, and the VMX code made my
scratch my head. :)

--Andy

>
> Paolo
>
> > ---
> >  arch/x86/include/asm/mmu.h         |  2 ++
> >  arch/x86/include/asm/mmu_context.h | 16 +++++++++++
> >  arch/x86/kernel/cpu/perf_event.c   | 57 +++++++++++++++++++++++++-------------
> >  arch/x86/kernel/cpu/perf_event.h   |  2 ++
> >  4 files changed, 58 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> > index 876e74e8eec7..09b9620a73b4 100644
> > --- a/arch/x86/include/asm/mmu.h
> > +++ b/arch/x86/include/asm/mmu.h
> > @@ -19,6 +19,8 @@ typedef struct {
> >
> >       struct mutex lock;
> >       void __user *vdso;
> > +
> > +     atomic_t perf_rdpmc_allowed;    /* nonzero if rdpmc is allowed */
> >  } mm_context_t;
> >
> >  #ifdef CONFIG_SMP
> > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> > index 23697f74b372..ccad8d616038 100644
> > --- a/arch/x86/include/asm/mmu_context.h
> > +++ b/arch/x86/include/asm/mmu_context.h
> > @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
> >  }
> >  #endif       /* !CONFIG_PARAVIRT */
> >
> > +#ifdef CONFIG_PERF_EVENTS
> > +static inline void load_mm_cr4(struct mm_struct *mm)
> > +{
> > +     if (atomic_read(&mm->context.perf_rdpmc_allowed))
> > +             cr4_set_bits(X86_CR4_PCE);
> > +     else
> > +             cr4_clear_bits(X86_CR4_PCE);
> > +}
> > +#else
> > +static inline void load_mm_cr4(struct mm_struct *mm) {}
> > +#endif
> > +
> >  /*
> >   * Used for LDT copy/destruction.
> >   */
> > @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> >               /* Stop flush ipis for the previous mm */
> >               cpumask_clear_cpu(cpu, mm_cpumask(prev));
> >
> > +             /* Load per-mm CR4 state */
> > +             load_mm_cr4(next);
> > +
> >               /*
> >                * Load the LDT, if the LDT is different.
> >                *
> > @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> >                        */
> >                       load_cr3(next->pgd);
> >                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> > +                     load_mm_cr4(next);
> >                       load_LDT_nolock(&next->context);
> >               }
> >       }
> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > index 00fbab7aa587..3e875b3b30f2 100644
> > --- a/arch/x86/kernel/cpu/perf_event.c
> > +++ b/arch/x86/kernel/cpu/perf_event.c
> > @@ -31,6 +31,7 @@
> >  #include <asm/nmi.h>
> >  #include <asm/smp.h>
> >  #include <asm/alternative.h>
> > +#include <asm/mmu_context.h>
> >  #include <asm/tlbflush.h>
> >  #include <asm/timer.h>
> >  #include <asm/desc.h>
> > @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
> >               break;
> >
> >       case CPU_STARTING:
> > -             if (x86_pmu.attr_rdpmc)
> > -                     cr4_set_bits(X86_CR4_PCE);
> >               if (x86_pmu.cpu_starting)
> >                       x86_pmu.cpu_starting(cpu);
> >               break;
> > @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
> >                       event->destroy(event);
> >       }
> >
> > +     if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> > +             event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> > +
> >       return err;
> >  }
> >
> > +static void refresh_pce(void *ignored)
> > +{
> > +     if (current->mm)
> > +             load_mm_cr4(current->mm);
> > +}
> > +
> > +static void x86_pmu_event_mapped(struct perf_event *event)
> > +{
> > +     if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > +             return;
> > +
> > +     if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
> > +             on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> > +}
> > +
> > +static void x86_pmu_event_unmapped(struct perf_event *event)
> > +{
> > +     if (!current->mm)
> > +             return;
> > +
> > +     if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > +             return;
> > +
> > +     if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
> > +             on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> > +}
> > +
> >  static int x86_pmu_event_idx(struct perf_event *event)
> >  {
> >       int idx = event->hw.idx;
> >
> > -     if (!x86_pmu.attr_rdpmc)
> > +     if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> >               return 0;
> >
> >       if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
> > @@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
> >       return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
> >  }
> >
> > -static void change_rdpmc(void *info)
> > -{
> > -     bool enable = !!(unsigned long)info;
> > -
> > -     if (enable)
> > -             cr4_set_bits(X86_CR4_PCE);
> > -     else
> > -             cr4_clear_bits(X86_CR4_PCE);
> > -}
> > -
> >  static ssize_t set_attr_rdpmc(struct device *cdev,
> >                             struct device_attribute *attr,
> >                             const char *buf, size_t count)
> > @@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
> >       if (x86_pmu.attr_rdpmc_broken)
> >               return -ENOTSUPP;
> >
> > -     if (!!val != !!x86_pmu.attr_rdpmc) {
> > -             x86_pmu.attr_rdpmc = !!val;
> > -             on_each_cpu(change_rdpmc, (void *)val, 1);
> > -     }
> > -
> > +     x86_pmu.attr_rdpmc = !!val;
> >       return count;
> >  }
> >
> > @@ -1909,6 +1924,9 @@ static struct pmu pmu = {
> >
> >       .event_init             = x86_pmu_event_init,
> >
> > +     .event_mapped           = x86_pmu_event_mapped,
> > +     .event_unmapped         = x86_pmu_event_unmapped,
> > +
> >       .add                    = x86_pmu_add,
> >       .del                    = x86_pmu_del,
> >       .start                  = x86_pmu_start,
> > @@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
> >
> >       userpg->cap_user_time = 0;
> >       userpg->cap_user_time_zero = 0;
> > -     userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
> > +     userpg->cap_user_rdpmc =
> > +             !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
> >       userpg->pmc_width = x86_pmu.cntval_bits;
> >
> >       if (!sched_clock_stable())
> > diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> > index d98a34d435d7..f6868186e67b 100644
> > --- a/arch/x86/kernel/cpu/perf_event.h
> > +++ b/arch/x86/kernel/cpu/perf_event.h
> > @@ -71,6 +71,8 @@ struct event_constraint {
> >  #define PERF_X86_EVENT_COMMITTED     0x8 /* event passed commit_txn */
> >  #define PERF_X86_EVENT_PEBS_LD_HSW   0x10 /* haswell style datala, load */
> >  #define PERF_X86_EVENT_PEBS_NA_HSW   0x20 /* haswell style datala, unknown */
> > +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
> > +
> >
> >  struct amd_nb {
> >       int nb_id;  /* NorthBridge id */
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
  2014-10-24 22:58 ` [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped Andy Lutomirski
@ 2014-10-31 17:54   ` Paolo Bonzini
  2014-10-31 18:25     ` Andy Lutomirski
  0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2014-10-31 17:54 UTC (permalink / raw)
  To: Andy Lutomirski, Peter Zijlstra
  Cc: Valdis Kletnieks, linux-kernel, Paul Mackerras,
	Arnaldo Carvalho de Melo, Ingo Molnar, Kees Cook,
	Andrea Arcangeli, Vince Weaver, hillf.zj

On 25/10/2014 00:58, Andy Lutomirski wrote:
> We currently allow any process to use rdpmc.  This significantly
> weakens the protection offered by PR_TSC_DISABLED, and it could be
> helpful to users attempting to exploit timing attacks.
> 
> Since we can't enable access to individual counters, use a very
> coarse heuristic to limit access to rdpmc: allow access only when
> a perf_event is mmapped.  This protects seccomp sandboxes.
> 
> There is plenty of room to further tighen these restrictions.  For
> example, this allows rdpmc for any x86_pmu event, but it's only
> useful for self-monitoring tasks.
> 
> As a side effect, cap_user_rdpmc will now be false for AMD uncore
> events.  This isn't a real regression, since .event_idx is disabled
> for these events anyway for the time being.  Whenever that gets
> re-added, the cap_user_rdpmc code can be adjusted or refactored
> accordingly.
> 
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>

What's the impact of this if the host doesn't have "x86,kvm,vmx:
Preserve CR4 across VM entry"?

Paolo

> ---
>  arch/x86/include/asm/mmu.h         |  2 ++
>  arch/x86/include/asm/mmu_context.h | 16 +++++++++++
>  arch/x86/kernel/cpu/perf_event.c   | 57 +++++++++++++++++++++++++-------------
>  arch/x86/kernel/cpu/perf_event.h   |  2 ++
>  4 files changed, 58 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 876e74e8eec7..09b9620a73b4 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -19,6 +19,8 @@ typedef struct {
>  
>  	struct mutex lock;
>  	void __user *vdso;
> +
> +	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
>  } mm_context_t;
>  
>  #ifdef CONFIG_SMP
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index 23697f74b372..ccad8d616038 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
>  }
>  #endif	/* !CONFIG_PARAVIRT */
>  
> +#ifdef CONFIG_PERF_EVENTS
> +static inline void load_mm_cr4(struct mm_struct *mm)
> +{
> +	if (atomic_read(&mm->context.perf_rdpmc_allowed))
> +		cr4_set_bits(X86_CR4_PCE);
> +	else
> +		cr4_clear_bits(X86_CR4_PCE);
> +}
> +#else
> +static inline void load_mm_cr4(struct mm_struct *mm) {}
> +#endif
> +
>  /*
>   * Used for LDT copy/destruction.
>   */
> @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
>  		/* Stop flush ipis for the previous mm */
>  		cpumask_clear_cpu(cpu, mm_cpumask(prev));
>  
> +		/* Load per-mm CR4 state */
> +		load_mm_cr4(next);
> +
>  		/*
>  		 * Load the LDT, if the LDT is different.
>  		 *
> @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
>  			 */
>  			load_cr3(next->pgd);
>  			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> +			load_mm_cr4(next);
>  			load_LDT_nolock(&next->context);
>  		}
>  	}
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 00fbab7aa587..3e875b3b30f2 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -31,6 +31,7 @@
>  #include <asm/nmi.h>
>  #include <asm/smp.h>
>  #include <asm/alternative.h>
> +#include <asm/mmu_context.h>
>  #include <asm/tlbflush.h>
>  #include <asm/timer.h>
>  #include <asm/desc.h>
> @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
>  		break;
>  
>  	case CPU_STARTING:
> -		if (x86_pmu.attr_rdpmc)
> -			cr4_set_bits(X86_CR4_PCE);
>  		if (x86_pmu.cpu_starting)
>  			x86_pmu.cpu_starting(cpu);
>  		break;
> @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
>  			event->destroy(event);
>  	}
>  
> +	if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> +		event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> +
>  	return err;
>  }
>  
> +static void refresh_pce(void *ignored)
> +{
> +	if (current->mm)
> +		load_mm_cr4(current->mm);
> +}
> +
> +static void x86_pmu_event_mapped(struct perf_event *event)
> +{
> +	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> +		return;
> +
> +	if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
> +		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> +}
> +
> +static void x86_pmu_event_unmapped(struct perf_event *event)
> +{
> +	if (!current->mm)
> +		return;
> +
> +	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> +		return;
> +
> +	if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
> +		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> +}
> +
>  static int x86_pmu_event_idx(struct perf_event *event)
>  {
>  	int idx = event->hw.idx;
>  
> -	if (!x86_pmu.attr_rdpmc)
> +	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
>  		return 0;
>  
>  	if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
> @@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
>  	return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
>  }
>  
> -static void change_rdpmc(void *info)
> -{
> -	bool enable = !!(unsigned long)info;
> -
> -	if (enable)
> -		cr4_set_bits(X86_CR4_PCE);
> -	else
> -		cr4_clear_bits(X86_CR4_PCE);
> -}
> -
>  static ssize_t set_attr_rdpmc(struct device *cdev,
>  			      struct device_attribute *attr,
>  			      const char *buf, size_t count)
> @@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
>  	if (x86_pmu.attr_rdpmc_broken)
>  		return -ENOTSUPP;
>  
> -	if (!!val != !!x86_pmu.attr_rdpmc) {
> -		x86_pmu.attr_rdpmc = !!val;
> -		on_each_cpu(change_rdpmc, (void *)val, 1);
> -	}
> -
> +	x86_pmu.attr_rdpmc = !!val;
>  	return count;
>  }
>  
> @@ -1909,6 +1924,9 @@ static struct pmu pmu = {
>  
>  	.event_init		= x86_pmu_event_init,
>  
> +	.event_mapped		= x86_pmu_event_mapped,
> +	.event_unmapped		= x86_pmu_event_unmapped,
> +
>  	.add			= x86_pmu_add,
>  	.del			= x86_pmu_del,
>  	.start			= x86_pmu_start,
> @@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
>  
>  	userpg->cap_user_time = 0;
>  	userpg->cap_user_time_zero = 0;
> -	userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
> +	userpg->cap_user_rdpmc =
> +		!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
>  	userpg->pmc_width = x86_pmu.cntval_bits;
>  
>  	if (!sched_clock_stable())
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index d98a34d435d7..f6868186e67b 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -71,6 +71,8 @@ struct event_constraint {
>  #define PERF_X86_EVENT_COMMITTED	0x8 /* event passed commit_txn */
>  #define PERF_X86_EVENT_PEBS_LD_HSW	0x10 /* haswell style datala, load */
>  #define PERF_X86_EVENT_PEBS_NA_HSW	0x20 /* haswell style datala, unknown */
> +#define PERF_X86_EVENT_RDPMC_ALLOWED	0x40 /* grant rdpmc permission */
> +
>  
>  struct amd_nb {
>  	int nb_id;  /* NorthBridge id */
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
  2014-10-24 22:58 [PATCH v2 0/8] CR4 handling improvements Andy Lutomirski
@ 2014-10-24 22:58 ` Andy Lutomirski
  2014-10-31 17:54   ` Paolo Bonzini
  0 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-24 22:58 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Valdis Kletnieks, linux-kernel, Paul Mackerras,
	Arnaldo Carvalho de Melo, Ingo Molnar, Kees Cook,
	Andrea Arcangeli, Vince Weaver, hillf.zj, Andy Lutomirski

We currently allow any process to use rdpmc.  This significantly
weakens the protection offered by PR_TSC_DISABLED, and it could be
helpful to users attempting to exploit timing attacks.

Since we can't enable access to individual counters, use a very
coarse heuristic to limit access to rdpmc: allow access only when
a perf_event is mmapped.  This protects seccomp sandboxes.

There is plenty of room to further tighen these restrictions.  For
example, this allows rdpmc for any x86_pmu event, but it's only
useful for self-monitoring tasks.

As a side effect, cap_user_rdpmc will now be false for AMD uncore
events.  This isn't a real regression, since .event_idx is disabled
for these events anyway for the time being.  Whenever that gets
re-added, the cap_user_rdpmc code can be adjusted or refactored
accordingly.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/x86/include/asm/mmu.h         |  2 ++
 arch/x86/include/asm/mmu_context.h | 16 +++++++++++
 arch/x86/kernel/cpu/perf_event.c   | 57 +++++++++++++++++++++++++-------------
 arch/x86/kernel/cpu/perf_event.h   |  2 ++
 4 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 876e74e8eec7..09b9620a73b4 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,6 +19,8 @@ typedef struct {
 
 	struct mutex lock;
 	void __user *vdso;
+
+	atomic_t perf_rdpmc_allowed;	/* nonzero if rdpmc is allowed */
 } mm_context_t;
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 23697f74b372..ccad8d616038 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
 }
 #endif	/* !CONFIG_PARAVIRT */
 
+#ifdef CONFIG_PERF_EVENTS
+static inline void load_mm_cr4(struct mm_struct *mm)
+{
+	if (atomic_read(&mm->context.perf_rdpmc_allowed))
+		cr4_set_bits(X86_CR4_PCE);
+	else
+		cr4_clear_bits(X86_CR4_PCE);
+}
+#else
+static inline void load_mm_cr4(struct mm_struct *mm) {}
+#endif
+
 /*
  * Used for LDT copy/destruction.
  */
@@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		/* Stop flush ipis for the previous mm */
 		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 
+		/* Load per-mm CR4 state */
+		load_mm_cr4(next);
+
 		/*
 		 * Load the LDT, if the LDT is different.
 		 *
@@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			 */
 			load_cr3(next->pgd);
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+			load_mm_cr4(next);
 			load_LDT_nolock(&next->context);
 		}
 	}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 00fbab7aa587..3e875b3b30f2 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,7 @@
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include <asm/alternative.h>
+#include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
 #include <asm/timer.h>
 #include <asm/desc.h>
@@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 		break;
 
 	case CPU_STARTING:
-		if (x86_pmu.attr_rdpmc)
-			cr4_set_bits(X86_CR4_PCE);
 		if (x86_pmu.cpu_starting)
 			x86_pmu.cpu_starting(cpu);
 		break;
@@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
 			event->destroy(event);
 	}
 
+	if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
+		event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
+
 	return err;
 }
 
+static void refresh_pce(void *ignored)
+{
+	if (current->mm)
+		load_mm_cr4(current->mm);
+}
+
+static void x86_pmu_event_mapped(struct perf_event *event)
+{
+	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+		return;
+
+	if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
+		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static void x86_pmu_event_unmapped(struct perf_event *event)
+{
+	if (!current->mm)
+		return;
+
+	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+		return;
+
+	if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
+		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
 static int x86_pmu_event_idx(struct perf_event *event)
 {
 	int idx = event->hw.idx;
 
-	if (!x86_pmu.attr_rdpmc)
+	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 		return 0;
 
 	if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
@@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
 	return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
 }
 
-static void change_rdpmc(void *info)
-{
-	bool enable = !!(unsigned long)info;
-
-	if (enable)
-		cr4_set_bits(X86_CR4_PCE);
-	else
-		cr4_clear_bits(X86_CR4_PCE);
-}
-
 static ssize_t set_attr_rdpmc(struct device *cdev,
 			      struct device_attribute *attr,
 			      const char *buf, size_t count)
@@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
 	if (x86_pmu.attr_rdpmc_broken)
 		return -ENOTSUPP;
 
-	if (!!val != !!x86_pmu.attr_rdpmc) {
-		x86_pmu.attr_rdpmc = !!val;
-		on_each_cpu(change_rdpmc, (void *)val, 1);
-	}
-
+	x86_pmu.attr_rdpmc = !!val;
 	return count;
 }
 
@@ -1909,6 +1924,9 @@ static struct pmu pmu = {
 
 	.event_init		= x86_pmu_event_init,
 
+	.event_mapped		= x86_pmu_event_mapped,
+	.event_unmapped		= x86_pmu_event_unmapped,
+
 	.add			= x86_pmu_add,
 	.del			= x86_pmu_del,
 	.start			= x86_pmu_start,
@@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
 
 	userpg->cap_user_time = 0;
 	userpg->cap_user_time_zero = 0;
-	userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
+	userpg->cap_user_rdpmc =
+		!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
 	userpg->pmc_width = x86_pmu.cntval_bits;
 
 	if (!sched_clock_stable())
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index d98a34d435d7..f6868186e67b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -71,6 +71,8 @@ struct event_constraint {
 #define PERF_X86_EVENT_COMMITTED	0x8 /* event passed commit_txn */
 #define PERF_X86_EVENT_PEBS_LD_HSW	0x10 /* haswell style datala, load */
 #define PERF_X86_EVENT_PEBS_NA_HSW	0x20 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_RDPMC_ALLOWED	0x40 /* grant rdpmc permission */
+
 
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2014-10-31 18:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-27  7:16 [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped 张静(长谷)
2014-10-27 15:45 ` Andy Lutomirski
2014-10-28  3:35   ` Hillf Danton
2014-10-28  3:57     ` Andy Lutomirski
2014-10-28  4:07       ` Hillf Danton
2014-10-28  4:27         ` Andy Lutomirski
  -- strict thread matches above, loose matches on Subject: below --
2014-10-24 22:58 [PATCH v2 0/8] CR4 handling improvements Andy Lutomirski
2014-10-24 22:58 ` [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped Andy Lutomirski
2014-10-31 17:54   ` Paolo Bonzini
2014-10-31 18:25     ` Andy Lutomirski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).