* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
@ 2014-10-27 7:16 张静(长谷)
2014-10-27 15:45 ` Andy Lutomirski
0 siblings, 1 reply; 9+ messages in thread
From: 张静(长谷) @ 2014-10-27 7:16 UTC (permalink / raw)
To: 'Andy Lutomirski', 'Peter Zijlstra'
Cc: 'Valdis Kletnieks',
linux-kernel, 'Paul Mackerras',
'Arnaldo Carvalho de Melo', 'Ingo Molnar',
'Kees Cook', 'Andrea Arcangeli',
'Vince Weaver'
>
> We currently allow any process to use rdpmc. This significantly
> weakens the protection offered by PR_TSC_DISABLED, and it could be
> helpful to users attempting to exploit timing attacks.
>
> Since we can't enable access to individual counters, use a very
> coarse heuristic to limit access to rdpmc: allow access only when
> a perf_event is mmapped. This protects seccomp sandboxes.
>
> There is plenty of room to further tighen these restrictions. For
> example, this allows rdpmc for any x86_pmu event, but it's only
> useful for self-monitoring tasks.
>
> As a side effect, cap_user_rdpmc will now be false for AMD uncore
> events. This isn't a real regression, since .event_idx is disabled
> for these events anyway for the time being. Whenever that gets
> re-added, the cap_user_rdpmc code can be adjusted or refactored
> accordingly.
>
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> ---
> arch/x86/include/asm/mmu.h | 2 ++
> arch/x86/include/asm/mmu_context.h | 16 +++++++++++
> arch/x86/kernel/cpu/perf_event.c | 57 +++++++++++++++++++++++++-------------
> arch/x86/kernel/cpu/perf_event.h | 2 ++
> 4 files changed, 58 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 876e74e8eec7..09b9620a73b4 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -19,6 +19,8 @@ typedef struct {
>
> struct mutex lock;
> void __user *vdso;
> +
> + atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
> } mm_context_t;
>
> #ifdef CONFIG_SMP
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index 23697f74b372..ccad8d616038 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
> }
> #endif /* !CONFIG_PARAVIRT */
>
> +#ifdef CONFIG_PERF_EVENTS
> +static inline void load_mm_cr4(struct mm_struct *mm)
> +{
> + if (atomic_read(&mm->context.perf_rdpmc_allowed))
> + cr4_set_bits(X86_CR4_PCE);
> + else
> + cr4_clear_bits(X86_CR4_PCE);
> +}
> +#else
> +static inline void load_mm_cr4(struct mm_struct *mm) {}
> +#endif
> +
> /*
> * Used for LDT copy/destruction.
> */
> @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> /* Stop flush ipis for the previous mm */
> cpumask_clear_cpu(cpu, mm_cpumask(prev));
>
> + /* Load per-mm CR4 state */
> + load_mm_cr4(next);
> +
> /*
> * Load the LDT, if the LDT is different.
> *
> @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> */
> load_cr3(next->pgd);
> trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> + load_mm_cr4(next);
> load_LDT_nolock(&next->context);
> }
> }
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 00fbab7aa587..3e875b3b30f2 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -31,6 +31,7 @@
> #include <asm/nmi.h>
> #include <asm/smp.h>
> #include <asm/alternative.h>
> +#include <asm/mmu_context.h>
> #include <asm/tlbflush.h>
> #include <asm/timer.h>
> #include <asm/desc.h>
> @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
> break;
>
> case CPU_STARTING:
> - if (x86_pmu.attr_rdpmc)
> - cr4_set_bits(X86_CR4_PCE);
> if (x86_pmu.cpu_starting)
> x86_pmu.cpu_starting(cpu);
> break;
> @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
> event->destroy(event);
> }
>
> + if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> + event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> +
> return err;
> }
>
> +static void refresh_pce(void *ignored)
> +{
> + if (current->mm)
> + load_mm_cr4(current->mm);
> +}
> +
> +static void x86_pmu_event_mapped(struct perf_event *event)
> +{
> + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> + return;
> +
> + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
> + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> +}
> +
> +static void x86_pmu_event_unmapped(struct perf_event *event)
> +{
> + if (!current->mm)
> + return;
> +
> + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> + return;
> +
> + if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
> + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
The current task(T-a on CPU A) is asking CPUs(A, B, C, D) to refresh pce, and looks
the current task(T-d on CPU D) is disturbed if T-d loaded CR4 when going on CPU D.
Hillf
> +}
> +
> static int x86_pmu_event_idx(struct perf_event *event)
> {
> int idx = event->hw.idx;
>
> - if (!x86_pmu.attr_rdpmc)
> + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> return 0;
>
> if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
> @@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
> return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
> }
>
> -static void change_rdpmc(void *info)
> -{
> - bool enable = !!(unsigned long)info;
> -
> - if (enable)
> - cr4_set_bits(X86_CR4_PCE);
> - else
> - cr4_clear_bits(X86_CR4_PCE);
> -}
> -
> static ssize_t set_attr_rdpmc(struct device *cdev,
> struct device_attribute *attr,
> const char *buf, size_t count)
> @@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
> if (x86_pmu.attr_rdpmc_broken)
> return -ENOTSUPP;
>
> - if (!!val != !!x86_pmu.attr_rdpmc) {
> - x86_pmu.attr_rdpmc = !!val;
> - on_each_cpu(change_rdpmc, (void *)val, 1);
> - }
> -
> + x86_pmu.attr_rdpmc = !!val;
> return count;
> }
>
> @@ -1909,6 +1924,9 @@ static struct pmu pmu = {
>
> .event_init = x86_pmu_event_init,
>
> + .event_mapped = x86_pmu_event_mapped,
> + .event_unmapped = x86_pmu_event_unmapped,
> +
> .add = x86_pmu_add,
> .del = x86_pmu_del,
> .start = x86_pmu_start,
> @@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
>
> userpg->cap_user_time = 0;
> userpg->cap_user_time_zero = 0;
> - userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
> + userpg->cap_user_rdpmc =
> + !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
> userpg->pmc_width = x86_pmu.cntval_bits;
>
> if (!sched_clock_stable())
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index d98a34d435d7..f6868186e67b 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -71,6 +71,8 @@ struct event_constraint {
> #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
> #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
> #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
> +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
> +
>
> struct amd_nb {
> int nb_id; /* NorthBridge id */
> --
> 1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
2014-10-27 7:16 [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped 张静(长谷)
@ 2014-10-27 15:45 ` Andy Lutomirski
2014-10-28 3:35 ` Hillf Danton
0 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-27 15:45 UTC (permalink / raw)
To: hillf. zj
Cc: Peter Zijlstra, Ingo Molnar, Vince Weaver, Paul Mackerras,
Kees Cook, Arnaldo Carvalho de Melo, Andrea Arcangeli,
linux-kernel, Valdis Kletnieks
On Oct 27, 2014 12:17 AM, "张静(长谷)" <hillf.zj@alibaba-inc.com> wrote:
>
> >
> > We currently allow any process to use rdpmc. This significantly
> > weakens the protection offered by PR_TSC_DISABLED, and it could be
> > helpful to users attempting to exploit timing attacks.
> >
> > Since we can't enable access to individual counters, use a very
> > coarse heuristic to limit access to rdpmc: allow access only when
> > a perf_event is mmapped. This protects seccomp sandboxes.
> >
> > There is plenty of room to further tighen these restrictions. For
> > example, this allows rdpmc for any x86_pmu event, but it's only
> > useful for self-monitoring tasks.
> >
> > As a side effect, cap_user_rdpmc will now be false for AMD uncore
> > events. This isn't a real regression, since .event_idx is disabled
> > for these events anyway for the time being. Whenever that gets
> > re-added, the cap_user_rdpmc code can be adjusted or refactored
> > accordingly.
> >
> > Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> > ---
> > arch/x86/include/asm/mmu.h | 2 ++
> > arch/x86/include/asm/mmu_context.h | 16 +++++++++++
> > arch/x86/kernel/cpu/perf_event.c | 57 +++++++++++++++++++++++++-------------
> > arch/x86/kernel/cpu/perf_event.h | 2 ++
> > 4 files changed, 58 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> > index 876e74e8eec7..09b9620a73b4 100644
> > --- a/arch/x86/include/asm/mmu.h
> > +++ b/arch/x86/include/asm/mmu.h
> > @@ -19,6 +19,8 @@ typedef struct {
> >
> > struct mutex lock;
> > void __user *vdso;
> > +
> > + atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
> > } mm_context_t;
> >
> > #ifdef CONFIG_SMP
> > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> > index 23697f74b372..ccad8d616038 100644
> > --- a/arch/x86/include/asm/mmu_context.h
> > +++ b/arch/x86/include/asm/mmu_context.h
> > @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
> > }
> > #endif /* !CONFIG_PARAVIRT */
> >
> > +#ifdef CONFIG_PERF_EVENTS
> > +static inline void load_mm_cr4(struct mm_struct *mm)
> > +{
> > + if (atomic_read(&mm->context.perf_rdpmc_allowed))
> > + cr4_set_bits(X86_CR4_PCE);
> > + else
> > + cr4_clear_bits(X86_CR4_PCE);
> > +}
> > +#else
> > +static inline void load_mm_cr4(struct mm_struct *mm) {}
> > +#endif
> > +
> > /*
> > * Used for LDT copy/destruction.
> > */
> > @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > /* Stop flush ipis for the previous mm */
> > cpumask_clear_cpu(cpu, mm_cpumask(prev));
> >
> > + /* Load per-mm CR4 state */
> > + load_mm_cr4(next);
> > +
> > /*
> > * Load the LDT, if the LDT is different.
> > *
> > @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > */
> > load_cr3(next->pgd);
> > trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> > + load_mm_cr4(next);
> > load_LDT_nolock(&next->context);
> > }
> > }
> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > index 00fbab7aa587..3e875b3b30f2 100644
> > --- a/arch/x86/kernel/cpu/perf_event.c
> > +++ b/arch/x86/kernel/cpu/perf_event.c
> > @@ -31,6 +31,7 @@
> > #include <asm/nmi.h>
> > #include <asm/smp.h>
> > #include <asm/alternative.h>
> > +#include <asm/mmu_context.h>
> > #include <asm/tlbflush.h>
> > #include <asm/timer.h>
> > #include <asm/desc.h>
> > @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
> > break;
> >
> > case CPU_STARTING:
> > - if (x86_pmu.attr_rdpmc)
> > - cr4_set_bits(X86_CR4_PCE);
> > if (x86_pmu.cpu_starting)
> > x86_pmu.cpu_starting(cpu);
> > break;
> > @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
> > event->destroy(event);
> > }
> >
> > + if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> > + event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> > +
> > return err;
> > }
> >
> > +static void refresh_pce(void *ignored)
> > +{
> > + if (current->mm)
> > + load_mm_cr4(current->mm);
> > +}
> > +
> > +static void x86_pmu_event_mapped(struct perf_event *event)
> > +{
> > + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > + return;
> > +
> > + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
> > + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> > +}
> > +
> > +static void x86_pmu_event_unmapped(struct perf_event *event)
> > +{
> > + if (!current->mm)
> > + return;
> > +
> > + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > + return;
> > +
> > + if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
> > + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
>
> The current task(T-a on CPU A) is asking CPUs(A, B, C, D) to refresh pce, and looks
> the current task(T-d on CPU D) is disturbed if T-d loaded CR4 when going on CPU D.
I don't understand. This code is intended to interrupt only affected
tasks, except for a race if cpus switch mm while this code is running.
At worst, the race should only result in an unnecessary IPI.
Can you clarify your concern?
--Andy
>
> Hillf
> > +}
> > +
> > static int x86_pmu_event_idx(struct perf_event *event)
> > {
> > int idx = event->hw.idx;
> >
> > - if (!x86_pmu.attr_rdpmc)
> > + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > return 0;
> >
> > if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
> > @@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
> > return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
> > }
> >
> > -static void change_rdpmc(void *info)
> > -{
> > - bool enable = !!(unsigned long)info;
> > -
> > - if (enable)
> > - cr4_set_bits(X86_CR4_PCE);
> > - else
> > - cr4_clear_bits(X86_CR4_PCE);
> > -}
> > -
> > static ssize_t set_attr_rdpmc(struct device *cdev,
> > struct device_attribute *attr,
> > const char *buf, size_t count)
> > @@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
> > if (x86_pmu.attr_rdpmc_broken)
> > return -ENOTSUPP;
> >
> > - if (!!val != !!x86_pmu.attr_rdpmc) {
> > - x86_pmu.attr_rdpmc = !!val;
> > - on_each_cpu(change_rdpmc, (void *)val, 1);
> > - }
> > -
> > + x86_pmu.attr_rdpmc = !!val;
> > return count;
> > }
> >
> > @@ -1909,6 +1924,9 @@ static struct pmu pmu = {
> >
> > .event_init = x86_pmu_event_init,
> >
> > + .event_mapped = x86_pmu_event_mapped,
> > + .event_unmapped = x86_pmu_event_unmapped,
> > +
> > .add = x86_pmu_add,
> > .del = x86_pmu_del,
> > .start = x86_pmu_start,
> > @@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
> >
> > userpg->cap_user_time = 0;
> > userpg->cap_user_time_zero = 0;
> > - userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
> > + userpg->cap_user_rdpmc =
> > + !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
> > userpg->pmc_width = x86_pmu.cntval_bits;
> >
> > if (!sched_clock_stable())
> > diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> > index d98a34d435d7..f6868186e67b 100644
> > --- a/arch/x86/kernel/cpu/perf_event.h
> > +++ b/arch/x86/kernel/cpu/perf_event.h
> > @@ -71,6 +71,8 @@ struct event_constraint {
> > #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
> > #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
> > #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
> > +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
> > +
> >
> > struct amd_nb {
> > int nb_id; /* NorthBridge id */
> > --
> > 1.9.3
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
2014-10-27 15:45 ` Andy Lutomirski
@ 2014-10-28 3:35 ` Hillf Danton
2014-10-28 3:57 ` Andy Lutomirski
0 siblings, 1 reply; 9+ messages in thread
From: Hillf Danton @ 2014-10-28 3:35 UTC (permalink / raw)
To: 'Andy Lutomirski'
Cc: 'Peter Zijlstra', 'Ingo Molnar',
'Vince Weaver', 'Paul Mackerras',
'Kees Cook', 'Arnaldo Carvalho de Melo',
'Andrea Arcangeli',
linux-kernel, 'Valdis Kletnieks'
> -----Original Message-----
> From: Andy Lutomirski [mailto:luto@amacapital.net]
> Sent: Monday, October 27, 2014 11:45 PM
> To: Hillf Danton
> Cc: Peter Zijlstra; Ingo Molnar; Vince Weaver; Paul Mackerras; Kees Cook; Arnaldo Carvalho de Melo; Andrea Arcangeli; linux-
> kernel@vger.kernel.org; Valdis Kletnieks
> Subject: Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
>
> >
> > >
> > > We currently allow any process to use rdpmc. This significantly
> > > weakens the protection offered by PR_TSC_DISABLED, and it could be
> > > helpful to users attempting to exploit timing attacks.
> > >
> > > Since we can't enable access to individual counters, use a very
> > > coarse heuristic to limit access to rdpmc: allow access only when
> > > a perf_event is mmapped. This protects seccomp sandboxes.
> > >
> > > There is plenty of room to further tighen these restrictions. For
> > > example, this allows rdpmc for any x86_pmu event, but it's only
> > > useful for self-monitoring tasks.
> > >
> > > As a side effect, cap_user_rdpmc will now be false for AMD uncore
> > > events. This isn't a real regression, since .event_idx is disabled
> > > for these events anyway for the time being. Whenever that gets
> > > re-added, the cap_user_rdpmc code can be adjusted or refactored
> > > accordingly.
> > >
> > > Signed-off-by: Andy Lutomirski <luto@amacapital.net>
> > > ---
> > > arch/x86/include/asm/mmu.h | 2 ++
> > > arch/x86/include/asm/mmu_context.h | 16 +++++++++++
> > > arch/x86/kernel/cpu/perf_event.c | 57 +++++++++++++++++++++++++-------------
> > > arch/x86/kernel/cpu/perf_event.h | 2 ++
> > > 4 files changed, 58 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> > > index 876e74e8eec7..09b9620a73b4 100644
> > > --- a/arch/x86/include/asm/mmu.h
> > > +++ b/arch/x86/include/asm/mmu.h
> > > @@ -19,6 +19,8 @@ typedef struct {
> > >
> > > struct mutex lock;
> > > void __user *vdso;
> > > +
> > > + atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
> > > } mm_context_t;
> > >
> > > #ifdef CONFIG_SMP
> > > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> > > index 23697f74b372..ccad8d616038 100644
> > > --- a/arch/x86/include/asm/mmu_context.h
> > > +++ b/arch/x86/include/asm/mmu_context.h
> > > @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
> > > }
> > > #endif /* !CONFIG_PARAVIRT */
> > >
> > > +#ifdef CONFIG_PERF_EVENTS
> > > +static inline void load_mm_cr4(struct mm_struct *mm)
> > > +{
> > > + if (atomic_read(&mm->context.perf_rdpmc_allowed))
> > > + cr4_set_bits(X86_CR4_PCE);
> > > + else
> > > + cr4_clear_bits(X86_CR4_PCE);
> > > +}
> > > +#else
> > > +static inline void load_mm_cr4(struct mm_struct *mm) {}
> > > +#endif
> > > +
> > > /*
> > > * Used for LDT copy/destruction.
> > > */
> > > @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > > /* Stop flush ipis for the previous mm */
> > > cpumask_clear_cpu(cpu, mm_cpumask(prev));
> > >
> > > + /* Load per-mm CR4 state */
> > > + load_mm_cr4(next);
> > > +
> > > /*
> > > * Load the LDT, if the LDT is different.
> > > *
> > > @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > > */
> > > load_cr3(next->pgd);
> > > trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> > > + load_mm_cr4(next);
> > > load_LDT_nolock(&next->context);
> > > }
> > > }
> > > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > > index 00fbab7aa587..3e875b3b30f2 100644
> > > --- a/arch/x86/kernel/cpu/perf_event.c
> > > +++ b/arch/x86/kernel/cpu/perf_event.c
> > > @@ -31,6 +31,7 @@
> > > #include <asm/nmi.h>
> > > #include <asm/smp.h>
> > > #include <asm/alternative.h>
> > > +#include <asm/mmu_context.h>
> > > #include <asm/tlbflush.h>
> > > #include <asm/timer.h>
> > > #include <asm/desc.h>
> > > @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
> > > break;
> > >
> > > case CPU_STARTING:
> > > - if (x86_pmu.attr_rdpmc)
> > > - cr4_set_bits(X86_CR4_PCE);
> > > if (x86_pmu.cpu_starting)
> > > x86_pmu.cpu_starting(cpu);
> > > break;
> > > @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
> > > event->destroy(event);
> > > }
> > >
> > > + if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> > > + event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> > > +
> > > return err;
> > > }
> > >
> > > +static void refresh_pce(void *ignored)
> > > +{
> > > + if (current->mm)
> > > + load_mm_cr4(current->mm);
> > > +}
> > > +
> > > +static void x86_pmu_event_mapped(struct perf_event *event)
> > > +{
> > > + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > > + return;
> > > +
> > > + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
> > > + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> > > +}
> > > +
> > > +static void x86_pmu_event_unmapped(struct perf_event *event)
> > > +{
> > > + if (!current->mm)
> > > + return;
> > > +
> > > + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > > + return;
> > > +
> > > + if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
> > > + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> >
> > The current task(T-a on CPU A) is asking CPUs(A, B, C, D) to refresh pce, and looks
> > the current task(T-d on CPU D) is disturbed if T-d loaded CR4 when going on CPU D.
>
> I don't understand. This code is intended to interrupt only affected
> tasks, except for a race if cpus switch mm while this code is running.
> At worst, the race should only result in an unnecessary IPI.
>
> Can you clarify your concern?
>
CPU D CPU A
switch_mm
load_mm_cr4
x86_pmu_event_unmapped
I wonder if the X86_CR4_PCE set on CPU D is
cleared by CPU A by broadcasting IPI.
Hillf
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
2014-10-28 3:35 ` Hillf Danton
@ 2014-10-28 3:57 ` Andy Lutomirski
2014-10-28 4:07 ` Hillf Danton
0 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-28 3:57 UTC (permalink / raw)
To: Hillf Danton
Cc: Peter Zijlstra, Ingo Molnar, Vince Weaver, Paul Mackerras,
Kees Cook, Arnaldo Carvalho de Melo, Andrea Arcangeli,
linux-kernel, Valdis Kletnieks
On Mon, Oct 27, 2014 at 8:35 PM, Hillf Danton <hillf.zj@alibaba-inc.com> wrote:
>> -----Original Message-----
>> From: Andy Lutomirski [mailto:luto@amacapital.net]
>> Sent: Monday, October 27, 2014 11:45 PM
>> To: Hillf Danton
>> Cc: Peter Zijlstra; Ingo Molnar; Vince Weaver; Paul Mackerras; Kees Cook; Arnaldo Carvalho de Melo; Andrea Arcangeli; linux-
>> kernel@vger.kernel.org; Valdis Kletnieks
>> Subject: Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
>>
> CPU D CPU A
> switch_mm
> load_mm_cr4
> x86_pmu_event_unmapped
>
> I wonder if the X86_CR4_PCE set on CPU D is
> cleared by CPU A by broadcasting IPI.
>
It should be okay. The IPI does:
+ if (current->mm)
+ load_mm_cr4(current->mm);
which refers to the current task running on the targetted CPU, not to
the IPI sender's task. So, if it happens after a context switch, it
will harmlessly reload the new task's cr4.
refresh_pce can't happen in between switch_mm and updating current,
since irqs are off for the entire duration of the context switch.
--Andy
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
2014-10-28 3:57 ` Andy Lutomirski
@ 2014-10-28 4:07 ` Hillf Danton
2014-10-28 4:27 ` Andy Lutomirski
0 siblings, 1 reply; 9+ messages in thread
From: Hillf Danton @ 2014-10-28 4:07 UTC (permalink / raw)
To: 'Andy Lutomirski'
Cc: 'Peter Zijlstra', 'Ingo Molnar',
'Vince Weaver', 'Paul Mackerras',
'Kees Cook', 'Arnaldo Carvalho de Melo',
'Andrea Arcangeli',
linux-kernel, 'Valdis Kletnieks'
> >> Subject: Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
> >>
> > CPU D CPU A
> > switch_mm
> > load_mm_cr4
> > x86_pmu_event_unmapped
> >
> > I wonder if the X86_CR4_PCE set on CPU D is
> > cleared by CPU A by broadcasting IPI.
> >
>
> It should be okay. The IPI does:
>
> + if (current->mm)
> + load_mm_cr4(current->mm);
>
> which refers to the current task running on the targetted CPU, not to
> the IPI sender's task. So, if it happens after a context switch, it
> will harmlessly reload the new task's cr4.
>
Right, but prev != next is checked in switch_mm.
Hillf
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
2014-10-28 4:07 ` Hillf Danton
@ 2014-10-28 4:27 ` Andy Lutomirski
0 siblings, 0 replies; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-28 4:27 UTC (permalink / raw)
To: Hillf Danton
Cc: Peter Zijlstra, Ingo Molnar, Vince Weaver, Paul Mackerras,
Kees Cook, Arnaldo Carvalho de Melo, Andrea Arcangeli,
linux-kernel, Valdis Kletnieks
On Mon, Oct 27, 2014 at 9:07 PM, Hillf Danton <hillf.zj@alibaba-inc.com> wrote:
>> >> Subject: Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
>> >>
>> > CPU D CPU A
>> > switch_mm
>> > load_mm_cr4
>> > x86_pmu_event_unmapped
>> >
>> > I wonder if the X86_CR4_PCE set on CPU D is
>> > cleared by CPU A by broadcasting IPI.
>> >
>>
>> It should be okay. The IPI does:
>>
>> + if (current->mm)
>> + load_mm_cr4(current->mm);
>>
>> which refers to the current task running on the targetted CPU, not to
>> the IPI sender's task. So, if it happens after a context switch, it
>> will harmlessly reload the new task's cr4.
>>
> Right, but prev != next is checked in switch_mm.
If that happens and !cpumask_test_cpu(cpu, mm_cpumask(next)), then cr4
will be reloaded. So, in the case you described, we should still be
okay.
It's worth checking whether a more complicated race could be a
problem. I think it's okay. Here's my argument.
In x86_pmu_event_mapped / x86_pmu_event_unmapped, for each cpu, either
that cpu is set in mm_cpumask or it's clear. If it's set, then we'll
send the IPI and that cpu is guaranteed to be updated. If it's clear,
then it must become set before any user code in this mm can be
executed. There are no paths through switch_mm that set the bit in
mm_cpumask without reloading cr4, so we should be safe.
Is that convincing?
--Andy
>
> Hillf
>
>
--
Andy Lutomirski
AMA Capital Management, LLC
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 0/8] CR4 handling improvements
@ 2014-10-24 22:58 Andy Lutomirski
2014-10-24 22:58 ` [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped Andy Lutomirski
0 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-24 22:58 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Valdis Kletnieks, linux-kernel, Paul Mackerras,
Arnaldo Carvalho de Melo, Ingo Molnar, Kees Cook,
Andrea Arcangeli, Vince Weaver, hillf.zj, Andy Lutomirski
This little series tightens up rdpmc permissions. With it applied,
rdpmc can only be used if a perf_event is actually mmapped. For now,
this is only really useful for seccomp.
At some point this could be further tightened up to only allow rdpmc
if an actual self-monitoring perf event that is compatible with
rdpmc is mapped.
This should add <50ns to context switches between rdpmc-capable and
rdpmc-non-capable mms. I suspect that this is well under 10%
overhead, given that perf already adds some context switch latency.
I think that patches 1-3 are a good idea regardless of any rdpmc changes.
AMD Uncore userspace rdpmc is broken by these patches (cap_user_rdpmc
will be zero), but it was broken anyway.
Changes from v1 (aka RFC):
- Rebased on top of the KVM CR4 fix. This applies to a very recent -linus.
- Renamed the cr4 helpers (Peter, Borislav)
- Fixed buggy cr4 helpers (Hilf)
- Improved lots of comments (everyone)
- Renamed read_cr4 and write_cr4 to make sure I didn't miss anything.
(NB: This will introduce conflicts with Andi's FSGSBASE work. This is
a good thing.)
Andy Lutomirski (7):
x86: Clean up cr4 manipulation
x86: Store a per-cpu shadow copy of CR4
x86: Add a comment clarifying LDT context switching
perf: Add pmu callbacks to track event mapping and unmapping
perf: Pass the event to arch_perf_update_userpage
x86, perf: Only allow rdpmc if a perf_event is mapped
x86, perf: Add /sys/devices/cpu/rdpmc=2 to allow rdpmc for all tasks
Peter Zijlstra (1):
perf: Clean up pmu::event_idx
arch/powerpc/perf/hv-24x7.c | 6 ---
arch/powerpc/perf/hv-gpci.c | 6 ---
arch/s390/kernel/perf_cpum_sf.c | 6 ---
arch/x86/include/asm/mmu.h | 2 +
arch/x86/include/asm/mmu_context.h | 32 ++++++++++++++-
arch/x86/include/asm/paravirt.h | 6 +--
arch/x86/include/asm/processor.h | 33 ----------------
arch/x86/include/asm/special_insns.h | 6 +--
arch/x86/include/asm/tlbflush.h | 77 ++++++++++++++++++++++++++++++++----
arch/x86/include/asm/virtext.h | 5 ++-
arch/x86/kernel/acpi/sleep.c | 2 +-
arch/x86/kernel/cpu/common.c | 17 +++++---
arch/x86/kernel/cpu/mcheck/mce.c | 3 +-
arch/x86/kernel/cpu/mcheck/p5.c | 3 +-
arch/x86/kernel/cpu/mcheck/winchip.c | 3 +-
arch/x86/kernel/cpu/mtrr/cyrix.c | 6 +--
arch/x86/kernel/cpu/mtrr/generic.c | 6 +--
arch/x86/kernel/cpu/perf_event.c | 76 ++++++++++++++++++++++++++---------
arch/x86/kernel/cpu/perf_event.h | 2 +
arch/x86/kernel/head32.c | 1 +
arch/x86/kernel/head64.c | 2 +
arch/x86/kernel/i387.c | 3 +-
arch/x86/kernel/process.c | 5 ++-
arch/x86/kernel/process_32.c | 2 +-
arch/x86/kernel/process_64.c | 2 +-
arch/x86/kernel/setup.c | 2 +-
arch/x86/kernel/xsave.c | 3 +-
arch/x86/kvm/svm.c | 2 +-
arch/x86/kvm/vmx.c | 10 ++---
arch/x86/mm/fault.c | 2 +-
arch/x86/mm/init.c | 12 +++++-
arch/x86/mm/tlb.c | 3 --
arch/x86/power/cpu.c | 11 ++----
arch/x86/realmode/init.c | 2 +-
arch/x86/xen/enlighten.c | 4 +-
drivers/lguest/x86/core.c | 4 +-
include/linux/perf_event.h | 7 ++++
kernel/events/core.c | 29 ++++++--------
kernel/events/hw_breakpoint.c | 7 ----
39 files changed, 256 insertions(+), 154 deletions(-)
--
1.9.3
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
2014-10-24 22:58 [PATCH v2 0/8] CR4 handling improvements Andy Lutomirski
@ 2014-10-24 22:58 ` Andy Lutomirski
2014-10-31 17:54 ` Paolo Bonzini
0 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-24 22:58 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Valdis Kletnieks, linux-kernel, Paul Mackerras,
Arnaldo Carvalho de Melo, Ingo Molnar, Kees Cook,
Andrea Arcangeli, Vince Weaver, hillf.zj, Andy Lutomirski
We currently allow any process to use rdpmc. This significantly
weakens the protection offered by PR_TSC_DISABLED, and it could be
helpful to users attempting to exploit timing attacks.
Since we can't enable access to individual counters, use a very
coarse heuristic to limit access to rdpmc: allow access only when
a perf_event is mmapped. This protects seccomp sandboxes.
There is plenty of room to further tighen these restrictions. For
example, this allows rdpmc for any x86_pmu event, but it's only
useful for self-monitoring tasks.
As a side effect, cap_user_rdpmc will now be false for AMD uncore
events. This isn't a real regression, since .event_idx is disabled
for these events anyway for the time being. Whenever that gets
re-added, the cap_user_rdpmc code can be adjusted or refactored
accordingly.
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
arch/x86/include/asm/mmu.h | 2 ++
arch/x86/include/asm/mmu_context.h | 16 +++++++++++
arch/x86/kernel/cpu/perf_event.c | 57 +++++++++++++++++++++++++-------------
arch/x86/kernel/cpu/perf_event.h | 2 ++
4 files changed, 58 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 876e74e8eec7..09b9620a73b4 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,6 +19,8 @@ typedef struct {
struct mutex lock;
void __user *vdso;
+
+ atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 23697f74b372..ccad8d616038 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
}
#endif /* !CONFIG_PARAVIRT */
+#ifdef CONFIG_PERF_EVENTS
+static inline void load_mm_cr4(struct mm_struct *mm)
+{
+ if (atomic_read(&mm->context.perf_rdpmc_allowed))
+ cr4_set_bits(X86_CR4_PCE);
+ else
+ cr4_clear_bits(X86_CR4_PCE);
+}
+#else
+static inline void load_mm_cr4(struct mm_struct *mm) {}
+#endif
+
/*
* Used for LDT copy/destruction.
*/
@@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* Stop flush ipis for the previous mm */
cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ /* Load per-mm CR4 state */
+ load_mm_cr4(next);
+
/*
* Load the LDT, if the LDT is different.
*
@@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ load_mm_cr4(next);
load_LDT_nolock(&next->context);
}
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 00fbab7aa587..3e875b3b30f2 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,7 @@
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/alternative.h>
+#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/timer.h>
#include <asm/desc.h>
@@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
case CPU_STARTING:
- if (x86_pmu.attr_rdpmc)
- cr4_set_bits(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
@@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
event->destroy(event);
}
+ if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
+ event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
+
return err;
}
+static void refresh_pce(void *ignored)
+{
+ if (current->mm)
+ load_mm_cr4(current->mm);
+}
+
+static void x86_pmu_event_mapped(struct perf_event *event)
+{
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return;
+
+ if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
+ on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static void x86_pmu_event_unmapped(struct perf_event *event)
+{
+ if (!current->mm)
+ return;
+
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ return;
+
+ if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
+ on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
static int x86_pmu_event_idx(struct perf_event *event)
{
int idx = event->hw.idx;
- if (!x86_pmu.attr_rdpmc)
+ if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
@@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
}
-static void change_rdpmc(void *info)
-{
- bool enable = !!(unsigned long)info;
-
- if (enable)
- cr4_set_bits(X86_CR4_PCE);
- else
- cr4_clear_bits(X86_CR4_PCE);
-}
-
static ssize_t set_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
if (x86_pmu.attr_rdpmc_broken)
return -ENOTSUPP;
- if (!!val != !!x86_pmu.attr_rdpmc) {
- x86_pmu.attr_rdpmc = !!val;
- on_each_cpu(change_rdpmc, (void *)val, 1);
- }
-
+ x86_pmu.attr_rdpmc = !!val;
return count;
}
@@ -1909,6 +1924,9 @@ static struct pmu pmu = {
.event_init = x86_pmu_event_init,
+ .event_mapped = x86_pmu_event_mapped,
+ .event_unmapped = x86_pmu_event_unmapped,
+
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
@@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->cap_user_time = 0;
userpg->cap_user_time_zero = 0;
- userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->cap_user_rdpmc =
+ !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
userpg->pmc_width = x86_pmu.cntval_bits;
if (!sched_clock_stable())
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index d98a34d435d7..f6868186e67b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -71,6 +71,8 @@ struct event_constraint {
#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
+
struct amd_nb {
int nb_id; /* NorthBridge id */
--
1.9.3
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
2014-10-24 22:58 ` [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped Andy Lutomirski
@ 2014-10-31 17:54 ` Paolo Bonzini
2014-10-31 18:25 ` Andy Lutomirski
0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2014-10-31 17:54 UTC (permalink / raw)
To: Andy Lutomirski, Peter Zijlstra
Cc: Valdis Kletnieks, linux-kernel, Paul Mackerras,
Arnaldo Carvalho de Melo, Ingo Molnar, Kees Cook,
Andrea Arcangeli, Vince Weaver, hillf.zj
On 25/10/2014 00:58, Andy Lutomirski wrote:
> We currently allow any process to use rdpmc. This significantly
> weakens the protection offered by PR_TSC_DISABLED, and it could be
> helpful to users attempting to exploit timing attacks.
>
> Since we can't enable access to individual counters, use a very
> coarse heuristic to limit access to rdpmc: allow access only when
> a perf_event is mmapped. This protects seccomp sandboxes.
>
> There is plenty of room to further tighen these restrictions. For
> example, this allows rdpmc for any x86_pmu event, but it's only
> useful for self-monitoring tasks.
>
> As a side effect, cap_user_rdpmc will now be false for AMD uncore
> events. This isn't a real regression, since .event_idx is disabled
> for these events anyway for the time being. Whenever that gets
> re-added, the cap_user_rdpmc code can be adjusted or refactored
> accordingly.
>
> Signed-off-by: Andy Lutomirski <luto@amacapital.net>
What's the impact of this if the host doesn't have "x86,kvm,vmx:
Preserve CR4 across VM entry"?
Paolo
> ---
> arch/x86/include/asm/mmu.h | 2 ++
> arch/x86/include/asm/mmu_context.h | 16 +++++++++++
> arch/x86/kernel/cpu/perf_event.c | 57 +++++++++++++++++++++++++-------------
> arch/x86/kernel/cpu/perf_event.h | 2 ++
> 4 files changed, 58 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 876e74e8eec7..09b9620a73b4 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -19,6 +19,8 @@ typedef struct {
>
> struct mutex lock;
> void __user *vdso;
> +
> + atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
> } mm_context_t;
>
> #ifdef CONFIG_SMP
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index 23697f74b372..ccad8d616038 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
> }
> #endif /* !CONFIG_PARAVIRT */
>
> +#ifdef CONFIG_PERF_EVENTS
> +static inline void load_mm_cr4(struct mm_struct *mm)
> +{
> + if (atomic_read(&mm->context.perf_rdpmc_allowed))
> + cr4_set_bits(X86_CR4_PCE);
> + else
> + cr4_clear_bits(X86_CR4_PCE);
> +}
> +#else
> +static inline void load_mm_cr4(struct mm_struct *mm) {}
> +#endif
> +
> /*
> * Used for LDT copy/destruction.
> */
> @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> /* Stop flush ipis for the previous mm */
> cpumask_clear_cpu(cpu, mm_cpumask(prev));
>
> + /* Load per-mm CR4 state */
> + load_mm_cr4(next);
> +
> /*
> * Load the LDT, if the LDT is different.
> *
> @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> */
> load_cr3(next->pgd);
> trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> + load_mm_cr4(next);
> load_LDT_nolock(&next->context);
> }
> }
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 00fbab7aa587..3e875b3b30f2 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -31,6 +31,7 @@
> #include <asm/nmi.h>
> #include <asm/smp.h>
> #include <asm/alternative.h>
> +#include <asm/mmu_context.h>
> #include <asm/tlbflush.h>
> #include <asm/timer.h>
> #include <asm/desc.h>
> @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
> break;
>
> case CPU_STARTING:
> - if (x86_pmu.attr_rdpmc)
> - cr4_set_bits(X86_CR4_PCE);
> if (x86_pmu.cpu_starting)
> x86_pmu.cpu_starting(cpu);
> break;
> @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
> event->destroy(event);
> }
>
> + if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> + event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> +
> return err;
> }
>
> +static void refresh_pce(void *ignored)
> +{
> + if (current->mm)
> + load_mm_cr4(current->mm);
> +}
> +
> +static void x86_pmu_event_mapped(struct perf_event *event)
> +{
> + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> + return;
> +
> + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
> + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> +}
> +
> +static void x86_pmu_event_unmapped(struct perf_event *event)
> +{
> + if (!current->mm)
> + return;
> +
> + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> + return;
> +
> + if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
> + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> +}
> +
> static int x86_pmu_event_idx(struct perf_event *event)
> {
> int idx = event->hw.idx;
>
> - if (!x86_pmu.attr_rdpmc)
> + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> return 0;
>
> if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
> @@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
> return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
> }
>
> -static void change_rdpmc(void *info)
> -{
> - bool enable = !!(unsigned long)info;
> -
> - if (enable)
> - cr4_set_bits(X86_CR4_PCE);
> - else
> - cr4_clear_bits(X86_CR4_PCE);
> -}
> -
> static ssize_t set_attr_rdpmc(struct device *cdev,
> struct device_attribute *attr,
> const char *buf, size_t count)
> @@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
> if (x86_pmu.attr_rdpmc_broken)
> return -ENOTSUPP;
>
> - if (!!val != !!x86_pmu.attr_rdpmc) {
> - x86_pmu.attr_rdpmc = !!val;
> - on_each_cpu(change_rdpmc, (void *)val, 1);
> - }
> -
> + x86_pmu.attr_rdpmc = !!val;
> return count;
> }
>
> @@ -1909,6 +1924,9 @@ static struct pmu pmu = {
>
> .event_init = x86_pmu_event_init,
>
> + .event_mapped = x86_pmu_event_mapped,
> + .event_unmapped = x86_pmu_event_unmapped,
> +
> .add = x86_pmu_add,
> .del = x86_pmu_del,
> .start = x86_pmu_start,
> @@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
>
> userpg->cap_user_time = 0;
> userpg->cap_user_time_zero = 0;
> - userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
> + userpg->cap_user_rdpmc =
> + !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
> userpg->pmc_width = x86_pmu.cntval_bits;
>
> if (!sched_clock_stable())
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index d98a34d435d7..f6868186e67b 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -71,6 +71,8 @@ struct event_constraint {
> #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
> #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
> #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
> +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
> +
>
> struct amd_nb {
> int nb_id; /* NorthBridge id */
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped
2014-10-31 17:54 ` Paolo Bonzini
@ 2014-10-31 18:25 ` Andy Lutomirski
0 siblings, 0 replies; 9+ messages in thread
From: Andy Lutomirski @ 2014-10-31 18:25 UTC (permalink / raw)
To: Paolo Bonzini
Cc: Peter Zijlstra, Ingo Molnar, hillf.zj, Vince Weaver,
linux-kernel, Paul Mackerras, Kees Cook,
Arnaldo Carvalho de Melo, Andrea Arcangeli, Valdis Kletnieks
On Oct 31, 2014 10:54 AM, "Paolo Bonzini" <pbonzini@redhat.com> wrote:
>
> On 25/10/2014 00:58, Andy Lutomirski wrote:
> > We currently allow any process to use rdpmc. This significantly
> > weakens the protection offered by PR_TSC_DISABLED, and it could be
> > helpful to users attempting to exploit timing attacks.
> >
> > Since we can't enable access to individual counters, use a very
> > coarse heuristic to limit access to rdpmc: allow access only when
> > a perf_event is mmapped. This protects seccomp sandboxes.
> >
> > There is plenty of room to further tighen these restrictions. For
> > example, this allows rdpmc for any x86_pmu event, but it's only
> > useful for self-monitoring tasks.
> >
> > As a side effect, cap_user_rdpmc will now be false for AMD uncore
> > events. This isn't a real regression, since .event_idx is disabled
> > for these events anyway for the time being. Whenever that gets
> > re-added, the cap_user_rdpmc code can be adjusted or refactored
> > accordingly.
> >
> > Signed-off-by: Andy Lutomirski <luto@amacapital.net>
>
> What's the impact of this if the host doesn't have "x86,kvm,vmx:
> Preserve CR4 across VM entry"?
Do you mean if these patches are applied in a guest and the host isn't
fixed? I think it shoudn't make a difference. As far as I know,
there was never anything wrong with KVM's handling of the guest's cr4
value, at least under VMX. I haven't looked at SVM much.
If you apply this on the host without the fix, then you'll have some
conflicts, and, if you fix those conflicts, then you'll exacerbate the
VMX bug, because perf_event will trigger it, too. You'll also have
unprivileged-host-user-triggerable IPIs that change cr4, resulting in
lovely cross-CPU races. This is why I arranged the fix so that the
entire window during which a host cr4 change would get lost happens
with interrupts off.
This is actually how I found the bug in the first place. I was trying
to convince myself that these patches weren't racy, so I looked at all
of the cr4-manipulation code I could find, and the VMX code made my
scratch my head. :)
--Andy
>
> Paolo
>
> > ---
> > arch/x86/include/asm/mmu.h | 2 ++
> > arch/x86/include/asm/mmu_context.h | 16 +++++++++++
> > arch/x86/kernel/cpu/perf_event.c | 57 +++++++++++++++++++++++++-------------
> > arch/x86/kernel/cpu/perf_event.h | 2 ++
> > 4 files changed, 58 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> > index 876e74e8eec7..09b9620a73b4 100644
> > --- a/arch/x86/include/asm/mmu.h
> > +++ b/arch/x86/include/asm/mmu.h
> > @@ -19,6 +19,8 @@ typedef struct {
> >
> > struct mutex lock;
> > void __user *vdso;
> > +
> > + atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
> > } mm_context_t;
> >
> > #ifdef CONFIG_SMP
> > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> > index 23697f74b372..ccad8d616038 100644
> > --- a/arch/x86/include/asm/mmu_context.h
> > +++ b/arch/x86/include/asm/mmu_context.h
> > @@ -19,6 +19,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
> > }
> > #endif /* !CONFIG_PARAVIRT */
> >
> > +#ifdef CONFIG_PERF_EVENTS
> > +static inline void load_mm_cr4(struct mm_struct *mm)
> > +{
> > + if (atomic_read(&mm->context.perf_rdpmc_allowed))
> > + cr4_set_bits(X86_CR4_PCE);
> > + else
> > + cr4_clear_bits(X86_CR4_PCE);
> > +}
> > +#else
> > +static inline void load_mm_cr4(struct mm_struct *mm) {}
> > +#endif
> > +
> > /*
> > * Used for LDT copy/destruction.
> > */
> > @@ -53,6 +65,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > /* Stop flush ipis for the previous mm */
> > cpumask_clear_cpu(cpu, mm_cpumask(prev));
> >
> > + /* Load per-mm CR4 state */
> > + load_mm_cr4(next);
> > +
> > /*
> > * Load the LDT, if the LDT is different.
> > *
> > @@ -88,6 +103,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > */
> > load_cr3(next->pgd);
> > trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
> > + load_mm_cr4(next);
> > load_LDT_nolock(&next->context);
> > }
> > }
> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > index 00fbab7aa587..3e875b3b30f2 100644
> > --- a/arch/x86/kernel/cpu/perf_event.c
> > +++ b/arch/x86/kernel/cpu/perf_event.c
> > @@ -31,6 +31,7 @@
> > #include <asm/nmi.h>
> > #include <asm/smp.h>
> > #include <asm/alternative.h>
> > +#include <asm/mmu_context.h>
> > #include <asm/tlbflush.h>
> > #include <asm/timer.h>
> > #include <asm/desc.h>
> > @@ -1336,8 +1337,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
> > break;
> >
> > case CPU_STARTING:
> > - if (x86_pmu.attr_rdpmc)
> > - cr4_set_bits(X86_CR4_PCE);
> > if (x86_pmu.cpu_starting)
> > x86_pmu.cpu_starting(cpu);
> > break;
> > @@ -1813,14 +1812,44 @@ static int x86_pmu_event_init(struct perf_event *event)
> > event->destroy(event);
> > }
> >
> > + if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
> > + event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
> > +
> > return err;
> > }
> >
> > +static void refresh_pce(void *ignored)
> > +{
> > + if (current->mm)
> > + load_mm_cr4(current->mm);
> > +}
> > +
> > +static void x86_pmu_event_mapped(struct perf_event *event)
> > +{
> > + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > + return;
> > +
> > + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1)
> > + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> > +}
> > +
> > +static void x86_pmu_event_unmapped(struct perf_event *event)
> > +{
> > + if (!current->mm)
> > + return;
> > +
> > + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > + return;
> > +
> > + if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed))
> > + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
> > +}
> > +
> > static int x86_pmu_event_idx(struct perf_event *event)
> > {
> > int idx = event->hw.idx;
> >
> > - if (!x86_pmu.attr_rdpmc)
> > + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
> > return 0;
> >
> > if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
> > @@ -1838,16 +1867,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev,
> > return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
> > }
> >
> > -static void change_rdpmc(void *info)
> > -{
> > - bool enable = !!(unsigned long)info;
> > -
> > - if (enable)
> > - cr4_set_bits(X86_CR4_PCE);
> > - else
> > - cr4_clear_bits(X86_CR4_PCE);
> > -}
> > -
> > static ssize_t set_attr_rdpmc(struct device *cdev,
> > struct device_attribute *attr,
> > const char *buf, size_t count)
> > @@ -1862,11 +1881,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
> > if (x86_pmu.attr_rdpmc_broken)
> > return -ENOTSUPP;
> >
> > - if (!!val != !!x86_pmu.attr_rdpmc) {
> > - x86_pmu.attr_rdpmc = !!val;
> > - on_each_cpu(change_rdpmc, (void *)val, 1);
> > - }
> > -
> > + x86_pmu.attr_rdpmc = !!val;
> > return count;
> > }
> >
> > @@ -1909,6 +1924,9 @@ static struct pmu pmu = {
> >
> > .event_init = x86_pmu_event_init,
> >
> > + .event_mapped = x86_pmu_event_mapped,
> > + .event_unmapped = x86_pmu_event_unmapped,
> > +
> > .add = x86_pmu_add,
> > .del = x86_pmu_del,
> > .start = x86_pmu_start,
> > @@ -1930,7 +1948,8 @@ void arch_perf_update_userpage(struct perf_event *event,
> >
> > userpg->cap_user_time = 0;
> > userpg->cap_user_time_zero = 0;
> > - userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
> > + userpg->cap_user_rdpmc =
> > + !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
> > userpg->pmc_width = x86_pmu.cntval_bits;
> >
> > if (!sched_clock_stable())
> > diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> > index d98a34d435d7..f6868186e67b 100644
> > --- a/arch/x86/kernel/cpu/perf_event.h
> > +++ b/arch/x86/kernel/cpu/perf_event.h
> > @@ -71,6 +71,8 @@ struct event_constraint {
> > #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
> > #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
> > #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
> > +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */
> > +
> >
> > struct amd_nb {
> > int nb_id; /* NorthBridge id */
> >
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2014-10-31 18:26 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-27 7:16 [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped 张静(长谷)
2014-10-27 15:45 ` Andy Lutomirski
2014-10-28 3:35 ` Hillf Danton
2014-10-28 3:57 ` Andy Lutomirski
2014-10-28 4:07 ` Hillf Danton
2014-10-28 4:27 ` Andy Lutomirski
-- strict thread matches above, loose matches on Subject: below --
2014-10-24 22:58 [PATCH v2 0/8] CR4 handling improvements Andy Lutomirski
2014-10-24 22:58 ` [PATCH v2 7/8] x86, perf: Only allow rdpmc if a perf_event is mapped Andy Lutomirski
2014-10-31 17:54 ` Paolo Bonzini
2014-10-31 18:25 ` Andy Lutomirski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).