All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf: Fix sys_perf_event_open() race against self
@ 2022-05-20 18:38 Peter Zijlstra
  2022-05-23 10:33 ` Ravi Bangoria
  2022-05-23 10:48 ` Mark Rutland
  0 siblings, 2 replies; 3+ messages in thread
From: Peter Zijlstra @ 2022-05-20 18:38 UTC (permalink / raw)
  To: x86, linux-kernel; +Cc: nslusarek, Linus Torvalds


Norbert reported that it's possible to race sys_perf_event_open() such
that the looser ends up in another context from the group leader,
triggering many WARNs.

The move_group case checks for races against itself, but the
!move_group case doesn't, seemingly relying on the previous
group_leader->ctx == ctx check. However, that check is racy due to not
holding any locks at that time.

Therefore, re-check the result after acquiring locks and bailing
if they no longer match.

Additionally, clarify the not_move_group case from the
move_group-vs-move_group race.

Fixes: f63a8daa5812 ("perf: Fix event->ctx locking")
Reported-by: Norbert Slusarek <nslusarek@gmx.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/core.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -12217,6 +12217,9 @@ SYSCALL_DEFINE5(perf_event_open,
 		 * Do not allow to attach to a group in a different task
 		 * or CPU context. If we're moving SW events, we'll fix
 		 * this up later, so allow that.
+		 *
+		 * Racy, not holding group_leader->ctx->mutex, see comment with
+		 * perf_event_ctx_lock().
 		 */
 		if (!move_group && group_leader->ctx != ctx)
 			goto err_context;
@@ -12282,6 +12285,7 @@ SYSCALL_DEFINE5(perf_event_open,
 			} else {
 				perf_event_ctx_unlock(group_leader, gctx);
 				move_group = 0;
+				goto not_move_group;
 			}
 		}
 
@@ -12298,7 +12302,17 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	} else {
 		mutex_lock(&ctx->mutex);
+
+		/*
+		 * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx,
+		 * see the group_leader && !move_group test earlier.
+		 */
+		if (group_leader && group_leader->ctx != ctx) {
+			err = -EINVAL;
+			goto err_locked;
+		}
 	}
+not_move_group:
 
 	if (ctx->task == TASK_TOMBSTONE) {
 		err = -ESRCH;


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] perf: Fix sys_perf_event_open() race against self
  2022-05-20 18:38 [PATCH] perf: Fix sys_perf_event_open() race against self Peter Zijlstra
@ 2022-05-23 10:33 ` Ravi Bangoria
  2022-05-23 10:48 ` Mark Rutland
  1 sibling, 0 replies; 3+ messages in thread
From: Ravi Bangoria @ 2022-05-23 10:33 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: nslusarek, Linus Torvalds, Ravi Bangoria, x86, linux-kernel

On 21-May-22 12:08 AM, Peter Zijlstra wrote:
> 
> Norbert reported that it's possible to race sys_perf_event_open() such
> that the looser ends up in another context from the group leader,
> triggering many WARNs.
> 
> The move_group case checks for races against itself, but the
> !move_group case doesn't, seemingly relying on the previous
> group_leader->ctx == ctx check. However, that check is racy due to not
> holding any locks at that time.
> 
> Therefore, re-check the result after acquiring locks and bailing
> if they no longer match.
> 
> Additionally, clarify the not_move_group case from the
> move_group-vs-move_group race.

Tested-by: Ravi Bangoria <ravi.bangoria@amd.com>

Below is a quick test to reproduce the issue. It triggers WARN_ON()
as normal user. No warnings with the patch.

  $ cat perf-event-open-race.c
  #define _GNU_SOURCE
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
  #include <pthread.h>
  #include <linux/perf_event.h>
  #include <sys/ioctl.h>
  #include <linux/perf_event.h>
  #include <asm/unistd.h>
  #include <string.h>
  #include <errno.h>
  
  struct thread_args {
          int group_leader;
          int type;
          unsigned long config;
          pid_t target;
  };
  
  static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
                              int cpu, int group_fd, unsigned long flags)
  {
          int ret;
          ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
                          group_fd, flags);
          return ret;
  }
  
  static void
  perf_attr_prepare(struct perf_event_attr *pe, int type, unsigned long config)
  {
          memset(pe, 0, sizeof(struct perf_event_attr));
          pe->type = type;
          pe->size = sizeof(struct perf_event_attr);
          pe->config = config;
          pe->exclude_kernel = 1;
          pe->exclude_hv = 1;
  }
  
  void *thread(void *arg)
  {
          struct thread_args *args = arg;
          int group_leader = args->group_leader;
          unsigned long config = args->config;
          struct perf_event_attr pe;
          int type = args->type;
          pid_t target = args->target;
          int fd;
  
          perf_attr_prepare(&pe, type, config);
          fd = perf_event_open(&pe, target, -1, group_leader, 0);
          if (fd <= 0)
                  printf("Failed to open %d type event(err: %d)\n", type, -errno);
          else
                  close(fd);
  
          pthread_exit(NULL);
  }
  
  int main(int argc, char *argv[])
  {
          struct thread_args thread_1_args;
          struct thread_args thread_2_args;
          pthread_t t[2];
          struct perf_event_attr pe;
          int group_leader;
  
          perf_attr_prepare(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES);
          group_leader = perf_event_open(&pe, 0, -1, -1, 0);
          if (group_leader <= 0) {
                  printf("Failed to open group leader (err: %d)\n", -errno);
                  exit(EXIT_FAILURE);
          }
  
          thread_1_args.group_leader = group_leader;
          thread_1_args.type = PERF_TYPE_SOFTWARE;
          thread_1_args.config = PERF_COUNT_SW_CONTEXT_SWITCHES;
          thread_1_args.target = getpid();
          thread_2_args.group_leader = group_leader;
          thread_2_args.type = PERF_TYPE_HARDWARE;
          thread_2_args.config = PERF_COUNT_HW_CPU_CYCLES;
          thread_2_args.target = getpid();
  
          if (pthread_create(&t[0], NULL, thread, (void *)&thread_1_args) != 0) {
                  perror("pthread_create(t1) error");
                  exit(1);
          }
  
          if (pthread_create(&t[1], NULL, thread, (void *)&thread_2_args) != 0) {
                  perror("pthread_create(t2) error");
                  exit(1);
          }
  
          if (pthread_join(t[0], NULL) != 0) {
                  perror("pthread_join(t1) error");
                  exit(1);
          }
  
          if (pthread_join(t[1], NULL) != 0) {
                  perror("pthread_join(t2) error");
                  exit(1);
          }
  
          close(group_leader);
  }

  $ while true; do ./perf-event-open-race; done
  
  $ dmesg
  WARNING: CPU: 121 PID: 7304 at kernel/events/core.c:1948 perf_group_attach+0xfb/0x110
  [...]
  RIP: 0010:perf_group_attach+0xfb/0x110
  [...]
  RSP: 0018:ffff9fec0fa87dc8 EFLAGS: 00010006
  RAX: ffff8f988b3d0000 RBX: ffff8f98872bc0f8 RCX: 0000000000000002
  RDX: 0000000000000001 RSI: ffffffffb571c83b RDI: ffffffffb5793126
  RBP: ffff8f9887514830 R08: ffff8f988b3d0120 R09: 0000000000000000
  R10: 0000000000000001 R11: 0000000000000001 R12: ffff8f988b3d0000
  R13: ffff8f988b3d0008 R14: ffff8f988b3d0000 R15: ffff8f988b3d0000
  FS:  00007f30c5bff700(0000) GS:ffff8fb7f6800000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f719bf4f130 CR3: 000000208c010003 CR4: 0000000000770ee0
  PKRU: 55555554
  Call Trace:
   <TASK>
   perf_install_in_context+0x1af/0x210
   __do_sys_perf_event_open+0xcb6/0x12b0
   do_syscall_64+0x3a/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xae
  RIP: 0033:0x7f30c5c398cd
  [...]

Thanks,
Ravi

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] perf: Fix sys_perf_event_open() race against self
  2022-05-20 18:38 [PATCH] perf: Fix sys_perf_event_open() race against self Peter Zijlstra
  2022-05-23 10:33 ` Ravi Bangoria
@ 2022-05-23 10:48 ` Mark Rutland
  1 sibling, 0 replies; 3+ messages in thread
From: Mark Rutland @ 2022-05-23 10:48 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: x86, linux-kernel, nslusarek, Linus Torvalds

Hi Peter,

On Fri, May 20, 2022 at 08:38:06PM +0200, Peter Zijlstra wrote:
> 
> Norbert reported that it's possible to race sys_perf_event_open() such
> that the looser ends up in another context from the group leader,
> triggering many WARNs.

I'm hitting the same with my local arm64 Syzkaller instance, atop v5.18-rc6.

> The move_group case checks for races against itself, but the
> !move_group case doesn't, seemingly relying on the previous
> group_leader->ctx == ctx check. However, that check is racy due to not
> holding any locks at that time.
> 
> Therefore, re-check the result after acquiring locks and bailing
> if they no longer match.
> 
> Additionally, clarify the not_move_group case from the
> move_group-vs-move_group race.
> 
> Fixes: f63a8daa5812 ("perf: Fix event->ctx locking")
> Reported-by: Norbert Slusarek <nslusarek@gmx.net>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/events/core.c |   14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -12217,6 +12217,9 @@ SYSCALL_DEFINE5(perf_event_open,
>  		 * Do not allow to attach to a group in a different task
>  		 * or CPU context. If we're moving SW events, we'll fix
>  		 * this up later, so allow that.
> +		 *
> +		 * Racy, not holding group_leader->ctx->mutex, see comment with
> +		 * perf_event_ctx_lock().
>  		 */
>  		if (!move_group && group_leader->ctx != ctx)
>  			goto err_context;

I assume that given we say we're not holding the mutex that this is truly racy,
and a concurrent write can happen at any time. If that's the case, shouldn't we
be using *_ONCE() to manipulate perf_event::ctx?

... or could we remove the racy read entirely, and just rely on the later
check with ctx->mutex held? We can always reach that by chance anyway, so
there's not a functional need to bail out early, and consistently using the
later test removes some potential for introducing similar races in future.

FWIW, with this patch as-is applied atop v5.18-rc6, I no longer see the issue
in testing with the reproducer Syzkaller came up with. That would normally take
a few seconds, but it now survives several minutes.

For posterity, that reproducer was:

----8<----
r0 = perf_event_open$cgroup(&(0x7f0000000100)={0x1, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @perf_config_ext}, 0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0)
r1 = dup(r0)
perf_event_open(&(0x7f00000001c0)={0x0, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @perf_bp={0x0}}, 0xffffffffffffffff, 0x0, r1, 0x0) (async)
perf_event_open$cgroup(&(0x7f0000000100)={0x1, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x10, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @perf_bp={0x0, 0x5}, 0x0, 0x0, 0x0, 0x0, 0xfffffffffffffffe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x200000000000000}, 0xffffffffffffffff, 0x0, r1, 0x0)
---->8----

Thanks,
Mark.

> @@ -12282,6 +12285,7 @@ SYSCALL_DEFINE5(perf_event_open,
>  			} else {
>  				perf_event_ctx_unlock(group_leader, gctx);
>  				move_group = 0;
> +				goto not_move_group;
>  			}
>  		}
>  
> @@ -12298,7 +12302,17 @@ SYSCALL_DEFINE5(perf_event_open,
>  		}
>  	} else {
>  		mutex_lock(&ctx->mutex);
> +
> +		/*
> +		 * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx,
> +		 * see the group_leader && !move_group test earlier.
> +		 */
> +		if (group_leader && group_leader->ctx != ctx) {
> +			err = -EINVAL;
> +			goto err_locked;
> +		}
>  	}
> +not_move_group:
>  
>  	if (ctx->task == TASK_TOMBSTONE) {
>  		err = -ESRCH;
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-05-23 10:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-20 18:38 [PATCH] perf: Fix sys_perf_event_open() race against self Peter Zijlstra
2022-05-23 10:33 ` Ravi Bangoria
2022-05-23 10:48 ` Mark Rutland

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.