All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] perf/core: Fix data race between perf_event_set_output and perf_mmap_close
@ 2022-07-04 12:00 Yang Jihong
  2022-07-04 15:26 ` Peter Zijlstra
  0 siblings, 1 reply; 7+ messages in thread
From: Yang Jihong @ 2022-07-04 12:00 UTC (permalink / raw)
  To: peterz, mingo, acme, mark.rutland, alexander.shishkin, jolsa,
	namhyung, linux-perf-users, linux-kernel
  Cc: yangjihong1

Data race exists between perf_event_set_output and perf_mmap_close.
The scenario is as follows:

                  CPU1                                                       CPU2
                                                                    perf_mmap_close(event2)
                                                                      if (atomic_dec_and_test(&event2->rb->mmap_count)  // mmap_count 1 -> 0
                                                                        detach_rest = true;
ioctl(event1, PERF_EVENT_IOC_SET_OUTPUT, event2)
  perf_event_set_output(event1, event2)
                                                                      if (!detach_rest)
                                                                        goto out_put;
                                                                      list_for_each_entry_rcu(event, &event2->rb->event_list, rb_entry)
                                                                        ring_buffer_attach(event, NULL)
                                                                      // because event1 has not been added to event2->rb->event_list,
                                                                      // event1->rb is not set to NULL in these loops

    ring_buffer_attach(event1, event2->rb)
      list_add_rcu(&event1->rb_entry, &event2->rb->event_list)

The above data race causes a problem, that is, event1->rb is not NULL, but event1->rb->mmap_count is 0.
If the perf_mmap interface is invoked for the fd of event1, the kernel keeps in the perf_mmap infinite loop:

again:
        mutex_lock(&event->mmap_mutex);
        if (event->rb) {
<SNIP>
                if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
                        /*
                         * Raced against perf_mmap_close() through
                         * perf_event_set_output(). Try again, hope for better
                         * luck.
                         */
                        mutex_unlock(&event->mmap_mutex);
                        goto again;
                }
<SNIP>

This problem has occurred on the Syzkaller, causing the softlockup. The call stack is as follows:
[ 3666.984385][    C2] watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [syz-executor.2:32404]
[ 3666.986137][    C2] Modules linked in:
[ 3666.989581][    C2] CPU: 2 PID: 32404 Comm: syz-executor.2 Not tainted 5.10.0+ #4
[ 3666.990697][    C2] Hardware name: linux,dummy-virt (DT)
[ 3666.992270][    C2] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--)
[ 3666.993787][    C2] pc : __kasan_check_write+0x0/0x40
[ 3666.994841][    C2] lr : perf_mmap+0x3c8/0xf80
[ 3666.995661][    C2] sp : ffff00001011f8f0
[ 3666.996598][    C2] x29: ffff00001011f8f0 x28: ffff0000cf644868
[ 3666.998488][    C2] x27: ffff000012cad2c0 x26: 0000000000000000
[ 3666.999888][    C2] x25: 0000000000000001 x24: ffff000012cad298
[ 3667.003511][    C2] x23: 0000000000000000 x22: ffff000012cad000
[ 3667.005504][    C2] x21: ffff0000cf644818 x20: ffff0000cf6d2400
[ 3667.006891][    C2] x19: ffff0000cf6d24c0 x18: 0000000000000000
[ 3667.008295][    C2] x17: 0000000000000000 x16: 0000000000000000
[ 3667.009528][    C2] x15: 0000000000000000 x14: 0000000000000000
[ 3667.010658][    C2] x13: 0000000000000000 x12: ffff800002023f17
[ 3667.012169][    C2] x11: 1fffe00002023f16 x10: ffff800002023f16
[ 3667.013780][    C2] x9 : dfffa00000000000 x8 : ffff00001011f8b7
[ 3667.015265][    C2] x7 : 0000000000000001 x6 : ffff800002023f16
[ 3667.016683][    C2] x5 : ffff0000c0f36400 x4 : 0000000000000000
[ 3667.018078][    C2] x3 : ffffa00010000000 x2 : ffffa000119a0000
[ 3667.019343][    C2] x1 : 0000000000000004 x0 : ffff0000cf6d24c0
[ 3667.021276][    C2] Call trace:
[ 3667.022598][    C2]  __kasan_check_write+0x0/0x40
[ 3667.023666][    C2]  __mmap_region+0x7a4/0xc90
[ 3667.024679][    C2]  __do_mmap_mm+0x600/0xa20
[ 3667.025700][    C2]  do_mmap+0x114/0x384
[ 3667.026583][    C2]  vm_mmap_pgoff+0x138/0x230
[ 3667.027532][    C2]  ksys_mmap_pgoff+0x1d8/0x570
[ 3667.028537][    C2]  __arm64_sys_mmap+0xa4/0xd0
[ 3667.029597][    C2]  el0_svc_common.constprop.0+0xf4/0x414
[ 3667.030682][    C2]  do_el0_svc+0x50/0x11c
[ 3667.031545][    C2]  el0_svc+0x20/0x30
[ 3667.032368][    C2]  el0_sync_handler+0xe4/0x1e0
[ 3667.033305][    C2]  el0_sync+0x148/0x180

perf_mmap_close and ring_buffer_attach involve complicated lock recursion.
The solution is to modify ring_buffer_attach function, that is,
after new event is added to rb->event_list, check whether rb->mmap_count is 0,
if the value is 0, another core performs perf_mmap_close operation
on ring buffer during this period. In this case, we set event->rb to NULL.

Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
---

Changes since v1:
 - Modify commit message description.

 kernel/events/core.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80782cddb1da..c67c070f7b39 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5900,6 +5900,7 @@ static void ring_buffer_attach(struct perf_event *event,
 			       struct perf_buffer *rb)
 {
 	struct perf_buffer *old_rb = NULL;
+	struct perf_buffer *new_rb = rb;
 	unsigned long flags;
 
 	WARN_ON_ONCE(event->parent);
@@ -5928,6 +5929,20 @@ static void ring_buffer_attach(struct perf_event *event,
 
 		spin_lock_irqsave(&rb->event_lock, flags);
 		list_add_rcu(&event->rb_entry, &rb->event_list);
+
+		/*
+		 * When perf_mmap_close traverses rb->event_list during
+		 * detach all other events, new event may not be added to
+		 * rb->event_list, let's check again, if rb->mmap_count is 0,
+		 * it indicates that perf_mmap_close is executed.
+		 * Manually delete event from rb->event_list and
+		 * set event->rb to null.
+		 */
+		if (!atomic_read(&rb->mmap_count)) {
+			list_del_rcu(&event->rb_entry);
+			new_rb = NULL;
+		}
+
 		spin_unlock_irqrestore(&rb->event_lock, flags);
 	}
 
@@ -5944,7 +5959,7 @@ static void ring_buffer_attach(struct perf_event *event,
 	if (has_aux(event))
 		perf_event_stop(event, 0);
 
-	rcu_assign_pointer(event->rb, rb);
+	rcu_assign_pointer(event->rb, new_rb);
 
 	if (old_rb) {
 		ring_buffer_put(old_rb);
@@ -11883,6 +11898,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 			goto unlock;
 	}
 
+	/*
+	 * If rb->mmap_count is 0, perf_mmap_close is being executed,
+	 * the ring buffer is about to be unmapped and cannot be attached.
+	 */
+	if (rb && !atomic_read(&rb->mmap_count))
+		goto unlock;
+
 	ring_buffer_attach(event, rb);
 
 	ret = 0;
-- 
2.30.GIT


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-07-14 11:35 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-04 12:00 [PATCH v2] perf/core: Fix data race between perf_event_set_output and perf_mmap_close Yang Jihong
2022-07-04 15:26 ` Peter Zijlstra
2022-07-05  2:07   ` Yang Jihong
2022-07-05 13:07   ` Peter Zijlstra
2022-07-06 12:29     ` Yang Jihong
2022-07-09  2:00       ` Yang Jihong
2022-07-14 11:35     ` [tip: perf/urgent] perf/core: Fix data race between perf_event_set_output() and perf_mmap_close() tip-bot2 for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.