Hello so one last bug found by the PAPI testsuite. This one involves the rdpmc auto-disable on last unmap of an event feature. Failing test case: fd=perf_event_open(); addr=mmap(fd); exec() // without closing or unmapping the event fd=perf_event_open(); addr=mmap(fd); rdpmc() // GPFs due to rdpmc being disabled I won't pretend to be able to follow the rdpmc disabling code, but if I add some printks it looks like current->mm->context.perf_rdpmc_allowed isn't properly being reset on exec? In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems like it shouldn't happen? Anyway, a test case for this can be found in the perf_event_tests, tests/rdpmc/rdpmc_exec_papi Vince
On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote: > Hello > > so one last bug found by the PAPI testsuite. > > This one involves the rdpmc auto-disable on last unmap of an event > feature. > > Failing test case: > > fd=perf_event_open(); > addr=mmap(fd); > exec() // without closing or unmapping the event > fd=perf_event_open(); > addr=mmap(fd); > rdpmc() // GPFs due to rdpmc being disabled > > I won't pretend to be able to follow the rdpmc disabling code, but if I > add some printks it looks like > current->mm->context.perf_rdpmc_allowed > isn't properly being reset on exec? > > In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems > like it shouldn't happen? > > Anyway, a test case for this can be found in the perf_event_tests, > tests/rdpmc/rdpmc_exec_papi Good find that... The below seems to fix that for me. --- diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8e3db8f642a7..3186f6e081c4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2114,11 +2114,14 @@ static void refresh_pce(void *ignored) load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); } -static void x86_pmu_event_mapped(struct perf_event *event) +static void x86_pmu_event_mapped(struct perf_event *event, struct vm_area_struct *vma) { + struct mm_struct *mm; + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; + mm = vma->vm_mm; /* * This function relies on not being called concurrently in two * tasks in the same mm. Otherwise one task could observe @@ -2129,22 +2132,21 @@ static void x86_pmu_event_mapped(struct perf_event *event) * For now, this can't happen because all callers hold mmap_sem * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + lockdep_assert_held_exclusive(&mm->mmap_sem); - if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } -static void x86_pmu_event_unmapped(struct perf_event *event) +static void x86_pmu_event_unmapped(struct perf_event *event, struct vm_area_struct *vma) { - if (!current->mm) - return; + struct mm_struct *mm = vma->vm_mm; if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; - if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed)) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } static int x86_pmu_event_idx(struct perf_event *event) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a3b873fc59e4..271a32a7e3e1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -310,8 +310,8 @@ struct pmu { * Notification that the event was mapped or unmapped. Called * in the context of the mapping task. */ - void (*event_mapped) (struct perf_event *event); /*optional*/ - void (*event_unmapped) (struct perf_event *event); /*optional*/ + void (*event_mapped) (struct perf_event *event, struct vm_area_struct *vma); /*optional*/ + void (*event_unmapped) (struct perf_event *event, struct vm_area_struct *vma); /*optional*/ /* * Flags for ->add()/->del()/ ->start()/->stop(). There are diff --git a/kernel/events/core.c b/kernel/events/core.c index 426c2ffba16d..f724348bba08 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5090,7 +5105,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) atomic_inc(&event->rb->aux_mmap_count); if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma); } static void perf_pmu_output_stop(struct perf_event *event); @@ -5113,7 +5128,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) unsigned long size = perf_data_size(rb); if (event->pmu->event_unmapped) - event->pmu->event_unmapped(event); + event->pmu->event_unmapped(event, vma); /* * rb->aux_mmap_count will always drop before rb->mmap_count and @@ -5411,7 +5426,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &perf_mmap_vmops; if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma); return ret; }
On Wed, 2 Aug 2017, Peter Zijlstra wrote: > On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote: > > In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems > > like it shouldn't happen? > > Good find that... > > The below seems to fix that for me. yes, I can confirm that with this patch all of the various PAPI and perf_event_tests pass on my test system. Thanks, Vince
On Wed, Aug 2, 2017 at 10:39 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Jul 26, 2017 at 03:39:01PM -0400, Vince Weaver wrote:
>> Hello
>>
>> so one last bug found by the PAPI testsuite.
>>
>> This one involves the rdpmc auto-disable on last unmap of an event
>> feature.
>>
>> Failing test case:
>>
>> fd=perf_event_open();
>> addr=mmap(fd);
>> exec() // without closing or unmapping the event
>> fd=perf_event_open();
>> addr=mmap(fd);
>> rdpmc() // GPFs due to rdpmc being disabled
>>
>> I won't pretend to be able to follow the rdpmc disabling code, but if I
>> add some printks it looks like
>> current->mm->context.perf_rdpmc_allowed
>> isn't properly being reset on exec?
>>
>> In fact, current->mm->context.perf_rdpmc_allowed goes negative which seems
>> like it shouldn't happen?
>>
>> Anyway, a test case for this can be found in the perf_event_tests,
>> tests/rdpmc/rdpmc_exec_papi
>
> Good find that...
>
> The below seems to fix that for me.
>
...because execve plays funny games with non-current mms. Whoops.
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Can you send it upstream and tag it for stable?
On Thu, Aug 03, 2017 at 05:37:22PM -0700, Andy Lutomirski wrote: > Reviewed-by: Andy Lutomirski <luto@kernel.org> Thanks! > Can you send it upstream and tag it for stable? Yeah, I'll make it pretty, write a changelog and things like that :-)
On Fri, Aug 04, 2017 at 11:17:02AM +0200, Peter Zijlstra wrote: > Yeah, I'll make it pretty, write a changelog and things like that :-) Slightly different from the earlier one in that I now pass @mm along instead of @vma. Same thing otherwise. --- Subject: perf,x86: Fix RDPMC vs mm_struct tracking From: Peter Zijlstra <peterz@infradead.org> Date: Wed, 2 Aug 2017 19:39:30 +0200 Vince reported: > Failing test case: > > fd=perf_event_open(); > addr=mmap(fd); > exec() // without closing or unmapping the event > fd=perf_event_open(); > addr=mmap(fd); > rdpmc() // GPFs due to rdpmc being disabled The problem is of course that exec() plays tricks with what is current->mm, only destroying the old mappings after having installed the new mm. Fix this confusion by passing along vma->vm_mm instead of relying on current->mm. Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Stephane Eranian <eranian@gmail.com> Cc: stable@vger.kernel.org Reported-by: Vince Weaver <vincent.weaver@maine.edu> Tested-by: Vince Weaver <vincent.weaver@maine.edu> Reviewed-by: Andy Lutomirski <luto@kernel.org> Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping") Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: http://lkml.kernel.org/r/20170802173930.cstykcqefmqt7jau@hirez.programming.kicks-ass.net --- arch/x86/events/core.c | 16 +++++++--------- include/linux/perf_event.h | 4 ++-- kernel/events/core.c | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored) load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); } -static void x86_pmu_event_mapped(struct perf_event *event) +static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) { if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; @@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct * For now, this can't happen because all callers hold mmap_sem * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + lockdep_assert_held_exclusive(&mm->mmap_sem); - if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } -static void x86_pmu_event_unmapped(struct perf_event *event) +static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) { - if (!current->mm) - return; if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; - if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed)) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } static int x86_pmu_event_idx(struct perf_event *event) --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -310,8 +310,8 @@ struct pmu { * Notification that the event was mapped or unmapped. Called * in the context of the mapping task. */ - void (*event_mapped) (struct perf_event *event); /*optional*/ - void (*event_unmapped) (struct perf_event *event); /*optional*/ + void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /*optional*/ + void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /*optional*/ /* * Flags for ->add()/->del()/ ->start()/->stop(). There are --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5090,7 +5090,7 @@ static void perf_mmap_open(struct vm_are atomic_inc(&event->rb->aux_mmap_count); if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); } static void perf_pmu_output_stop(struct perf_event *event); @@ -5113,7 +5113,7 @@ static void perf_mmap_close(struct vm_ar unsigned long size = perf_data_size(rb); if (event->pmu->event_unmapped) - event->pmu->event_unmapped(event); + event->pmu->event_unmapped(event, vma->vm_mm); /* * rb->aux_mmap_count will always drop before rb->mmap_count and @@ -5411,7 +5411,7 @@ static int perf_mmap(struct file *file, vma->vm_ops = &perf_mmap_vmops; if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); return ret; }
Commit-ID: bfe334924ccd9f4a53f30240c03cf2f43f5b2df1 Gitweb: http://git.kernel.org/tip/bfe334924ccd9f4a53f30240c03cf2f43f5b2df1 Author: Peter Zijlstra <peterz@infradead.org> AuthorDate: Wed, 2 Aug 2017 19:39:30 +0200 Committer: Ingo Molnar <mingo@kernel.org> CommitDate: Thu, 10 Aug 2017 12:01:08 +0200 perf/x86: Fix RDPMC vs. mm_struct tracking Vince reported the following rdpmc() testcase failure: > Failing test case: > > fd=perf_event_open(); > addr=mmap(fd); > exec() // without closing or unmapping the event > fd=perf_event_open(); > addr=mmap(fd); > rdpmc() // GPFs due to rdpmc being disabled The problem is of course that exec() plays tricks with what is current->mm, only destroying the old mappings after having installed the new mm. Fix this confusion by passing along vma->vm_mm instead of relying on current->mm. Reported-by: Vince Weaver <vincent.weaver@maine.edu> Tested-by: Vince Weaver <vincent.weaver@maine.edu> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Andy Lutomirski <luto@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: stable@vger.kernel.org Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping") Link: http://lkml.kernel.org/r/20170802173930.cstykcqefmqt7jau@hirez.programming.kicks-ass.net [ Minor cleanups. ] Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/events/core.c | 16 +++++++--------- include/linux/perf_event.h | 4 ++-- kernel/events/core.c | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8e3db8f..af12e29 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored) load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); } -static void x86_pmu_event_mapped(struct perf_event *event) +static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) { if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; @@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct perf_event *event) * For now, this can't happen because all callers hold mmap_sem * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + lockdep_assert_held_exclusive(&mm->mmap_sem); - if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } -static void x86_pmu_event_unmapped(struct perf_event *event) +static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) { - if (!current->mm) - return; if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; - if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed)) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } static int x86_pmu_event_idx(struct perf_event *event) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a3b873f..b14095b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -310,8 +310,8 @@ struct pmu { * Notification that the event was mapped or unmapped. Called * in the context of the mapping task. */ - void (*event_mapped) (struct perf_event *event); /*optional*/ - void (*event_unmapped) (struct perf_event *event); /*optional*/ + void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ + void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ /* * Flags for ->add()/->del()/ ->start()/->stop(). There are diff --git a/kernel/events/core.c b/kernel/events/core.c index 426c2ff..a654b8a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5090,7 +5090,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) atomic_inc(&event->rb->aux_mmap_count); if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); } static void perf_pmu_output_stop(struct perf_event *event); @@ -5113,7 +5113,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) unsigned long size = perf_data_size(rb); if (event->pmu->event_unmapped) - event->pmu->event_unmapped(event); + event->pmu->event_unmapped(event, vma->vm_mm); /* * rb->aux_mmap_count will always drop before rb->mmap_count and @@ -5411,7 +5411,7 @@ aux_unlock: vma->vm_ops = &perf_mmap_vmops; if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); return ret; }