All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@redhat.com>,
	linux-kernel@vger.kernel.org, acme@redhat.com,
	kirill.shutemov@linux.intel.com, Borislav Petkov <bp@alien8.de>,
	rric@kernel.org,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>
Subject: [RFC PATCH 12/17] perf: Track pinned events per user
Date: Tue,  5 Sep 2017 16:30:21 +0300	[thread overview]
Message-ID: <20170905133026.13689-13-alexander.shishkin@linux.intel.com> (raw)
In-Reply-To: <20170905133026.13689-1-alexander.shishkin@linux.intel.com>

Maintain a per-user cpu-indexed array of shmemfs-backed events, same
way as mlock accounting.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 include/linux/sched/user.h  |  6 ++++
 kernel/events/core.c        | 14 ++++-----
 kernel/events/ring_buffer.c | 69 +++++++++++++++++++++++++++++++++++++--------
 kernel/user.c               |  1 +
 4 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 5d5415e129..bf10f95250 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -5,6 +5,7 @@
 #include <linux/atomic.h>
 
 struct key;
+struct perf_event;
 
 /*
  * Some day this will be a full-fledged user tracking system..
@@ -39,6 +40,11 @@ struct user_struct {
 #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
 	atomic_long_t locked_vm;
 #endif
+#ifdef CONFIG_PERF_EVENTS
+	atomic_long_t nr_pinnable_events;
+	struct mutex pinned_mutex;
+	struct perf_event ** __percpu pinned_events;
+#endif
 };
 
 extern int uids_sysfs_init(void);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1fed69d4ba..e00f1f6aaf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -384,7 +384,6 @@ static atomic_t perf_sched_count;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
-static DEFINE_PER_CPU(struct perf_event *, shmem_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -2086,7 +2085,8 @@ enum pin_event_t {
 
 static enum pin_event_t pin_event_pages(struct perf_event *event)
 {
-	struct perf_event **pinned_event = this_cpu_ptr(&shmem_events);
+	struct user_struct *user = event->rb->mmap_user;
+	struct perf_event **pinned_event = this_cpu_ptr(user->pinned_events);
 	struct perf_event *old_event = *pinned_event;
 
 	if (old_event == event)
@@ -4281,13 +4281,14 @@ static void _free_event(struct perf_event *event)
 	unaccount_event(event);
 
 	if (event->attach_state & PERF_ATTACH_SHMEM) {
+		struct user_struct *user = event->rb->mmap_user;
 		struct perf_event_context *ctx = event->ctx;
 		int cpu;
 
 		atomic_set(&event->xpinned, 0);
 		for_each_possible_cpu(cpu) {
 			struct perf_event **pinned_event =
-				per_cpu_ptr(&shmem_events, cpu);
+				per_cpu_ptr(user->pinned_events, cpu);
 
 			cmpxchg(pinned_event, event, NULL);
 		}
@@ -9530,7 +9531,7 @@ perf_event_detach(struct perf_event *event, struct perf_event *parent_event,
 {
 	struct ring_buffer *parent_rb = parent_event ? parent_event->rb : NULL;
 	char *filename;
-	int err;
+	int err = -ENOMEM;
 
 	filename = kasprintf(GFP_KERNEL, "%s:%x.event",
 			     task ? "task" : "cpu",
@@ -9550,10 +9551,9 @@ perf_event_detach(struct perf_event *event, struct perf_event *parent_event,
 	if (err) {
 		tracefs_remove(event->dent);
 		event->dent = NULL;
-		return err;
 	}
 
-	return 0;
+	return err;
 }
 /*
  * Allocate and initialize a event structure
@@ -10290,7 +10290,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	if (detached) {
-		err = perf_event_detach(event, task, NULL);
+		err = perf_event_detach(event, NULL, task, NULL);
 		if (err)
 			goto err_context;
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 896d441642..8d37e4e591 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -563,6 +563,44 @@ void *perf_get_aux(struct perf_output_handle *handle)
 	return handle->rb->aux_priv;
 }
 
+static struct user_struct *get_users_pinned_events(void)
+{
+	struct user_struct *user = current_user(), *ret = NULL;
+
+	if (atomic_long_inc_not_zero(&user->nr_pinnable_events))
+		return user;
+
+	mutex_lock(&user->pinned_mutex);
+	if (!atomic_long_read(&user->nr_pinnable_events)) {
+		if (WARN_ON_ONCE(!!user->pinned_events))
+			goto unlock;
+
+		user->pinned_events = alloc_percpu(struct perf_event *);
+		if (!user->pinned_events) {
+			goto unlock;
+		} else {
+			atomic_long_inc(&user->nr_pinnable_events);
+			ret = get_current_user();
+		}
+	}
+
+unlock:
+	mutex_unlock(&user->pinned_mutex);
+
+	return ret;
+}
+
+static void put_users_pinned_events(struct user_struct *user)
+{
+	if (!atomic_long_dec_and_test(&user->nr_pinnable_events))
+		return;
+
+	mutex_lock(&user->pinned_mutex);
+	free_percpu(user->pinned_events);
+	user->pinned_events = NULL;
+	mutex_unlock(&user->pinned_mutex);
+}
+
 /*
  * Check if the current user can afford @nr_pages, considering the
  * perf_event_mlock sysctl and their mlock limit. If the former is exceeded,
@@ -574,11 +612,14 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
                                  unsigned long nr_pages, unsigned long *locked)
 {
 	unsigned long total, limit, pinned;
+	struct user_struct *user;
 
 	if (!mm)
 		mm = rb->mmap_mapping;
 
-	rb->mmap_user = current_user();
+	user = get_users_pinned_events();
+	if (!user)
+		return -ENOMEM;
 
 	limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
 
@@ -587,10 +628,7 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
 	 */
 	limit *= num_online_cpus();
 
-	total = atomic_long_read(&rb->mmap_user->locked_vm) + nr_pages;
-
-	free_uid(rb->mmap_user);
-	rb->mmap_user = NULL;
+	total = atomic_long_read(&user->locked_vm) + nr_pages;
 
 	pinned = 0;
 	if (total > limit) {
@@ -599,7 +637,7 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
 		 * limit needs to be accounted to the consumer's mm.
 		 */
 		if (!mm)
-			return -EPERM;
+			goto err_put_user;
 
 		pinned = total - limit;
 
@@ -608,9 +646,8 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
 		total = mm->pinned_vm + pinned;
 
 		if ((total > limit) && perf_paranoid_tracepoint_raw() &&
-		    !capable(CAP_IPC_LOCK)) {
-			return -EPERM;
-		}
+		    !capable(CAP_IPC_LOCK))
+			goto err_put_user;
 
 		*locked = pinned;
 		mm->pinned_vm += pinned;
@@ -619,10 +656,15 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
 	if (!rb->mmap_mapping)
 		rb->mmap_mapping = mm;
 
-	rb->mmap_user = get_current_user();
-	atomic_long_add(nr_pages, &rb->mmap_user->locked_vm);
+	rb->mmap_user = user;
+	atomic_long_add(nr_pages, &user->locked_vm);
 
 	return 0;
+
+err_put_user:
+	put_users_pinned_events(user);
+
+	return -EPERM;
 }
 
 static int ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
@@ -657,7 +699,7 @@ void ring_buffer_unaccount(struct ring_buffer *rb, bool aux)
 	if (rb->mmap_mapping)
 		rb->mmap_mapping->pinned_vm -= pinned;
 
-	free_uid(rb->mmap_user);
+	put_users_pinned_events(rb->mmap_user);
 }
 
 #define PERF_AUX_GFP	(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
@@ -1124,6 +1166,7 @@ rb_shmem_account(struct ring_buffer *rb, struct ring_buffer *parent_rb)
 
 		rb->acct_refcount = parent_rb->acct_refcount;
 		atomic_inc(rb->acct_refcount);
+		rb->mmap_user = get_uid(parent_rb->mmap_user);
 
 		return 0;
 	}
@@ -1146,6 +1189,8 @@ rb_shmem_account(struct ring_buffer *rb, struct ring_buffer *parent_rb)
 
 static void rb_shmem_unaccount(struct ring_buffer *rb)
 {
+	free_uid(rb->mmap_user);
+
 	if (!atomic_dec_and_test(rb->acct_refcount)) {
 		rb->acct_refcount = NULL;
 		return;
diff --git a/kernel/user.c b/kernel/user.c
index 00281add65..e95a82d31d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -185,6 +185,7 @@ struct user_struct *alloc_uid(kuid_t uid)
 
 		new->uid = uid;
 		atomic_set(&new->__count, 1);
+		mutex_init(&new->pinned_mutex);
 
 		/*
 		 * Before adding this, check whether we raced
-- 
2.14.1

  parent reply	other threads:[~2017-09-05 13:50 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-05 13:30 [RFC PATCH 00/17] perf: Detached events Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 01/17] perf: Allow mmapping only user page Alexander Shishkin
2017-09-06 16:28   ` Borislav Petkov
2017-09-13 11:35     ` Alexander Shishkin
2017-09-13 12:58       ` Borislav Petkov
2017-09-05 13:30 ` [RFC PATCH 02/17] perf: Factor out mlock accounting Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 03/17] tracefs: De-globalize instances' callbacks Alexander Shishkin
2018-01-24 18:54   ` Steven Rostedt
2017-09-05 13:30 ` [RFC PATCH 04/17] tracefs: Add ->unlink callback to tracefs_dir_ops Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 05/17] perf: Introduce detached events Alexander Shishkin
2017-10-03 14:34   ` Peter Zijlstra
2017-10-06 11:23     ` Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 06/17] perf: Add buffers to the " Alexander Shishkin
2017-10-03 14:36   ` Peter Zijlstra
2017-09-05 13:30 ` [RFC PATCH 07/17] perf: Add pmu_info to user page Alexander Shishkin
2017-10-03 14:40   ` Peter Zijlstra
2017-09-05 13:30 ` [RFC PATCH 08/17] perf: Allow inheritance for detached events Alexander Shishkin
2017-10-03 14:42   ` Peter Zijlstra
2017-10-06 11:40     ` Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 09/17] perf: Use shmemfs pages for userspace-only per-thread " Alexander Shishkin
2017-10-03 14:43   ` Peter Zijlstra
2017-10-06 11:52     ` Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 10/17] perf: Implement pinning and scheduling for SHMEM events Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 11/17] perf: Implement mlock accounting for shmem ring buffers Alexander Shishkin
2017-09-05 13:30 ` Alexander Shishkin [this message]
2017-09-05 13:30 ` [RFC PATCH 13/17] perf: Re-inject shmem buffers after exec Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 14/17] perf: Add ioctl(REATTACH) for detached events Alexander Shishkin
2017-10-03 14:50   ` Peter Zijlstra
2017-09-05 13:30 ` [RFC PATCH 15/17] perf: Allow controlled non-root access to " Alexander Shishkin
2017-10-03 14:53   ` Peter Zijlstra
2017-09-05 13:30 ` [RFC PATCH 16/17] perf/x86/intel/pt: Add PMU info Alexander Shishkin
2017-09-05 13:30 ` [RFC PATCH 17/17] perf/x86/intel/bts: " Alexander Shishkin
2017-09-06 16:24 ` [RFC PATCH 00/17] perf: Detached events Borislav Petkov
2017-09-13 11:54   ` Alexander Shishkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170905133026.13689-13-alexander.shishkin@linux.intel.com \
    --to=alexander.shishkin@linux.intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@redhat.com \
    --cc=bp@alien8.de \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=rric@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.