From: Beau Belgrave <beaub@linux.microsoft.com>
To: rostedt@goodmis.org, mhiramat@kernel.org,
mathieu.desnoyers@efficios.com, dcook@linux.microsoft.com,
alanau@linux.microsoft.com
Cc: linux-trace-devel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH 2/2] tracing/user_events: Fixup enable faults asyncly
Date: Thu, 27 Oct 2022 15:40:11 -0700 [thread overview]
Message-ID: <20221027224011.2075-3-beaub@linux.microsoft.com> (raw)
In-Reply-To: <20221027224011.2075-1-beaub@linux.microsoft.com>
When events are enabled within the various tracing facilities, such as
ftrace/perf, the event_mutex is held. As events are enabled pages are
accessed. We do not want page faults to occur under this lock. Instead
queue the fault to a workqueue to be handled in a process context safe
way without the lock.
The enable address is disabled while the async fault-in occurs. This
ensures that we don't attempt fault-in more than is necessary. Once the
page has been faulted in, the address write is attempted again. If the
page couldn't fault-in, then we wait until the next time the event is
enabled to prevent any potential infinite loops.
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
kernel/trace/trace_events_user.c | 125 ++++++++++++++++++++++++++++++-
1 file changed, 121 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 633f24c2a1ac..f1eb8101e053 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -81,11 +81,22 @@ struct user_event_enabler {
struct list_head link;
struct mm_struct *mm;
struct file *file;
+ refcount_t refcnt;
unsigned long enable_addr;
unsigned int enable_bit: 5,
- __reserved: 27;
+ __reserved: 26,
+ disabled: 1;
};
+/* Used for asynchronous faulting in of pages */
+struct user_event_enabler_fault {
+ struct work_struct work;
+ struct user_event_enabler *enabler;
+ struct user_event *event;
+};
+
+static struct kmem_cache *fault_cache;
+
/*
* Stores per-event properties, as users register events
* within a file a user_event might be created if it does not
@@ -236,6 +247,19 @@ static void user_event_enabler_destroy(struct user_event_enabler *enabler)
kfree(enabler);
}
+static __always_inline struct user_event_enabler
+*user_event_enabler_get(struct user_event_enabler *enabler)
+{
+ refcount_inc(&enabler->refcnt);
+ return enabler;
+}
+
+static void user_event_enabler_put(struct user_event_enabler *enabler)
+{
+ if (refcount_dec_and_test(&enabler->refcnt))
+ user_event_enabler_destroy(enabler);
+}
+
static void user_event_enabler_remove(struct file *file,
struct user_event *user)
{
@@ -249,13 +273,93 @@ static void user_event_enabler_remove(struct file *file,
if (enabler->file != file)
continue;
+ enabler->disabled = 0;
list_del(&enabler->link);
- user_event_enabler_destroy(enabler);
+ user_event_enabler_put(enabler);
}
mutex_unlock(&event_mutex);
}
+static void user_event_enabler_write(struct user_event_enabler *enabler,
+ struct user_event *user);
+
+static void user_event_enabler_fault_fixup(struct work_struct *work)
+{
+ struct user_event_enabler_fault *fault = container_of(
+ work, struct user_event_enabler_fault, work);
+ struct user_event_enabler *enabler = fault->enabler;
+ struct user_event *user = fault->event;
+ struct mm_struct *mm = enabler->mm;
+ unsigned long uaddr = enabler->enable_addr;
+ bool unlocked = false;
+ int ret;
+
+ might_sleep();
+
+ mmap_read_lock(mm);
+
+ ret = fixup_user_fault(mm, uaddr, FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE,
+ &unlocked);
+
+ mmap_read_unlock(mm);
+
+ if (ret)
+ pr_warn("user_events: Fixup fault failed with %d "
+ "for mm: 0x%pK offset: 0x%llx event: %s\n", ret, mm,
+ (unsigned long long)uaddr, EVENT_NAME(user));
+
+ /* Prevent state changes from racing */
+ mutex_lock(&event_mutex);
+
+ /*
+ * If we managed to get the page, re-issue the write. We do not
+ * want to get into a possible infinite loop, which is why we only
+ * attempt again directly if the page came in. If we couldn't get
+ * the page here, then we will try again the next time the event is
+ * enabled/disabled.
+ */
+ enabler->disabled = 0;
+
+ if (!ret)
+ user_event_enabler_write(enabler, user);
+
+ mutex_unlock(&event_mutex);
+
+ refcount_dec(&user->refcnt);
+ user_event_enabler_put(enabler);
+ kmem_cache_free(fault_cache, fault);
+}
+
+static bool user_event_enabler_queue_fault(struct user_event_enabler *enabler,
+ struct user_event *user)
+{
+ struct user_event_enabler_fault *fault;
+
+ fault = kmem_cache_zalloc(fault_cache, GFP_NOWAIT | __GFP_NOWARN);
+
+ if (!fault)
+ return false;
+
+ INIT_WORK(&fault->work, user_event_enabler_fault_fixup);
+ fault->enabler = user_event_enabler_get(enabler);
+ fault->event = user;
+
+ refcount_inc(&user->refcnt);
+ enabler->disabled = 1;
+
+ if (!schedule_work(&fault->work)) {
+ enabler->disabled = 0;
+ refcount_dec(&user->refcnt);
+ user_event_enabler_put(enabler);
+ kmem_cache_free(fault_cache, fault);
+
+ return false;
+ }
+
+ return true;
+}
+
static void user_event_enabler_write(struct user_event_enabler *enabler,
struct user_event *user)
{
@@ -266,6 +370,11 @@ static void user_event_enabler_write(struct user_event_enabler *enabler,
void *kaddr;
int ret;
+ lockdep_assert_held(&event_mutex);
+
+ if (unlikely(enabler->disabled))
+ return;
+
mmap_read_lock(mm);
ret = pin_user_pages_remote(mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT,
@@ -273,8 +382,10 @@ static void user_event_enabler_write(struct user_event_enabler *enabler,
mmap_read_unlock(mm);
- if (ret <= 0) {
- pr_warn("user_events: Enable write failed\n");
+ if (unlikely(ret <= 0)) {
+ if (!user_event_enabler_queue_fault(enabler, user))
+ pr_warn("user_events: Unable to queue fault handler\n");
+
return;
}
@@ -321,6 +432,7 @@ static struct user_event_enabler
enabler->file = file;
enabler->enable_addr = (unsigned long)reg->enable_addr;
enabler->enable_bit = reg->enable_bit;
+ refcount_set(&enabler->refcnt, 1);
/* Prevents state changes from racing with new enablers */
mutex_lock(&event_mutex);
@@ -1902,6 +2014,11 @@ static int __init trace_events_user_init(void)
{
int ret;
+ fault_cache = KMEM_CACHE(user_event_enabler_fault, 0);
+
+ if (!fault_cache)
+ return -ENOMEM;
+
init_group = user_event_group_create(&init_user_ns);
if (!init_group)
--
2.25.1
next prev parent reply other threads:[~2022-10-27 22:40 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-27 22:40 [RFC PATCH 0/2] tracing/user_events: Remote write ABI Beau Belgrave
2022-10-27 22:40 ` [RFC PATCH 1/2] tracing/user_events: Use remote writes for event enablement Beau Belgrave
2022-10-29 14:44 ` Mathieu Desnoyers
2022-10-31 16:38 ` Beau Belgrave
2022-10-31 14:47 ` Masami Hiramatsu
2022-10-31 16:46 ` Beau Belgrave
2022-10-31 23:55 ` Masami Hiramatsu
2022-11-01 16:45 ` Beau Belgrave
2022-10-27 22:40 ` Beau Belgrave [this message]
2022-10-28 22:07 ` [RFC PATCH 2/2] tracing/user_events: Fixup enable faults asyncly Mathieu Desnoyers
2022-10-28 22:35 ` Beau Belgrave
2022-10-29 14:23 ` Mathieu Desnoyers
2022-10-31 16:58 ` Beau Belgrave
2022-10-28 22:19 ` Mathieu Desnoyers
2022-10-28 22:42 ` Beau Belgrave
2022-10-29 14:40 ` Mathieu Desnoyers
2022-10-30 11:45 ` Mathieu Desnoyers
2022-10-31 17:18 ` Beau Belgrave
2022-10-31 17:12 ` Beau Belgrave
2022-10-28 21:50 ` [RFC PATCH 0/2] tracing/user_events: Remote write ABI Mathieu Desnoyers
2022-10-28 22:17 ` Beau Belgrave
2022-10-29 13:58 ` Mathieu Desnoyers
2022-10-31 16:53 ` Beau Belgrave
2022-11-02 13:46 ` Mathieu Desnoyers
2022-11-02 17:18 ` Beau Belgrave
2022-10-31 14:15 ` Masami Hiramatsu
2022-10-31 15:27 ` Mathieu Desnoyers
2022-10-31 17:27 ` Beau Belgrave
2022-10-31 18:25 ` Mathieu Desnoyers
2022-11-01 13:52 ` Masami Hiramatsu
2022-11-01 16:55 ` Beau Belgrave
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221027224011.2075-3-beaub@linux.microsoft.com \
--to=beaub@linux.microsoft.com \
--cc=alanau@linux.microsoft.com \
--cc=dcook@linux.microsoft.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-devel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).