All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jinyu Tang <tangjinyu@tinylab.org>
To: rostedt@goodmis.org, mhiramat@kernel.org, bristot@kernel.org
Cc: ttjjyystupid@163.com, falcon@tinylab.org, wangjiexun@tinylab.org,
	linux-trace-kernel@vger.kernel.org, linux-kernel@vger.kernel.org,
	Jinyu Tang <tangjinyu@tinylab.org>
Subject: [PATCH v1] Ftrace: make sched_wakeup can focus on the target process
Date: Mon,  9 Oct 2023 23:37:14 +0800	[thread overview]
Message-ID: <20231009153714.10743-1-tangjinyu@tinylab.org> (raw)

When we want to know what happened in kernel when our app
has more latency than we hope, but the larger latency of
our app may be lower than other process in the syetem.
We feel sad after waiting a long time but only get other 
process sched_wakeup trace.

This Patch can let us only trace target process sched-wakeup 
time, other process sched-wakeup will be dropped and won't
change tracing_max_latency.

The patch is tested by the following commands:

$ mount -t tracefs none /sys/kernel/tracing
$ echo wakeup_rt > /sys/kernel/tracing/current_tracer
# some other stress-ng options are also tested 
$ stress-ng --cpu 4 &
$ cyclictest --mlockall --smp --priority=99 &
$ cyclictest_pid=$!
# child thread of cyclictest main process
$ thread_pid=$((cyclictest_pid + 1))

$ echo ${thread_pid} > /sys/kernel/tracing/set_wakeup_pid

$ echo 1 > /sys/kernel/tracing/tracing_on
$ echo 0 > /sys/kernel/tracing/tracing_max_latency
$ wait ${cyclictest_pid}
$ echo 0 > /sys/kernel/tracing/tracing_on
$ cat /sys/kernel/tracing/trace

The maximum latency and backtrace recorded in the trace file will be only
generated by the target process.
Then we can eliminate interference from other programs, making it easier 
to identify the cause of latency.

Tested-by: Jiexun Wang <wangjiexun@tinylab.org>
Signed-off-by: Jinyu Tang <tangjinyu@tinylab.org>
---
 kernel/trace/trace.h              |   3 +
 kernel/trace/trace_sched_wakeup.c | 179 ++++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 77debe53f07c..c6f212e8bfd2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -403,6 +403,9 @@ struct trace_array {
 #endif
 	/* function tracing enabled */
 	int			function_enabled;
+#endif
+#ifdef CONFIG_SCHED_TRACER
+	struct trace_pid_list	__rcu *wakeup_pids;
 #endif
 	int			no_filter_buffering_ref;
 	struct list_head	hist_vars;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0469a04a355f..b6cb9391e120 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -10,6 +10,9 @@
  *  Copyright (C) 2004-2006 Ingo Molnar
  *  Copyright (C) 2004 Nadia Yvette Chambers
  */
+#include <linux/fs.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
@@ -17,6 +20,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
 #include <trace/events/sched.h>
+#include <linux/tracefs.h>
 #include "trace.h"
 
 static struct trace_array	*wakeup_trace;
@@ -361,6 +365,169 @@ static bool report_latency(struct trace_array *tr, u64 delta)
 	return true;
 }
 
+DEFINE_MUTEX(sched_wakeup_mutex);
+static void *
+p_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct trace_array *tr = m->private;
+	struct trace_pid_list *pid_list = rcu_dereference_sched(tr->wakeup_pids);
+
+	return trace_pid_next(pid_list, v, pos);
+}
+
+static void *p_start(struct seq_file *m, loff_t *pos)
+	__acquires(RCU)
+{
+	struct trace_pid_list *pid_list;
+	struct trace_array *tr = m->private;
+
+	/*
+	 * Grab the mutex, to keep calls to p_next() having the same
+	 * tr->wakeup_pids as p_start() has.
+	 * If we just passed the tr->wakeup_pids around, then RCU would
+	 * have been enough, but doing that makes things more complex.
+	 */
+	mutex_lock(&sched_wakeup_mutex);
+	rcu_read_lock_sched();
+
+	pid_list = rcu_dereference_sched(tr->wakeup_pids);
+
+	if (!pid_list)
+		return NULL;
+
+	return trace_pid_start(pid_list, pos);
+}
+
+static void p_stop(struct seq_file *m, void *p)
+	__releases(RCU)
+{
+	rcu_read_unlock_sched();
+	mutex_unlock(&sched_wakeup_mutex);
+}
+
+static const struct seq_operations show_set_pid_seq_ops = {
+	.start = p_start,
+	.next = p_next,
+	.show = trace_pid_show,
+	.stop = p_stop,
+};
+
+static int
+ftrace_wakeup_open(struct inode *inode, struct file *file,
+		  const struct seq_operations *seq_ops)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, seq_ops);
+	if (ret < 0)
+		return ret;
+	m = file->private_data;
+	/* copy tr over to seq ops */
+	m->private = inode->i_private;
+
+	return ret;
+}
+
+static void __clear_wakeup_pids(struct trace_array *tr)
+{
+	struct trace_pid_list *pid_list;
+
+	pid_list = rcu_dereference_protected(tr->wakeup_pids,
+					     lockdep_is_held(&sched_wakeup_mutex));
+	if (!pid_list)
+		return;
+
+	rcu_assign_pointer(tr->wakeup_pids, NULL);
+
+	synchronize_rcu();
+	trace_pid_list_free(pid_list);
+}
+
+static void clear_wakeup_pids(struct trace_array *tr)
+{
+	mutex_lock(&sched_wakeup_mutex);
+	__clear_wakeup_pids(tr);
+	mutex_unlock(&sched_wakeup_mutex);
+
+}
+
+static int
+ftrace_set_wakeup_pid_open(struct inode *inode, struct file *file)
+{
+	const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
+	struct trace_array *tr = inode->i_private;
+	int ret;
+
+	if (trace_array_get(tr) < 0)
+		return -ENODEV;
+
+	if ((file->f_mode & FMODE_WRITE) &&
+	    (file->f_flags & O_TRUNC))
+		clear_wakeup_pids(tr);
+
+	ret = ftrace_wakeup_open(inode, file, seq_ops);
+
+	if (ret < 0)
+		trace_array_put(tr);
+	return ret;
+}
+
+static ssize_t
+ftrace_set_wakeup_pid_write(struct file *filp, const char __user *ubuf,
+		       size_t cnt, loff_t *ppos)
+{
+	struct seq_file *m = filp->private_data;
+	struct trace_array *tr = m->private;
+	struct trace_pid_list *filtered_pids = NULL;
+	struct trace_pid_list *pid_list;
+	ssize_t ret;
+
+	if (!cnt)
+		return 0;
+
+	mutex_lock(&sched_wakeup_mutex);
+
+	filtered_pids = rcu_dereference_protected(tr->wakeup_pids,
+					     lockdep_is_held(&sched_wakeup_mutex));
+
+	ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
+	if (ret < 0)
+		goto out;
+
+	rcu_assign_pointer(tr->wakeup_pids, pid_list);
+
+	if (filtered_pids) {
+		synchronize_rcu();
+		trace_pid_list_free(filtered_pids);
+	}
+
+ out:
+	mutex_unlock(&sched_wakeup_mutex);
+
+	if (ret > 0)
+		*ppos += ret;
+
+	return ret;
+}
+
+static int ftrace_set_wakeup_pid_release(struct inode *inode, struct file *file)
+{
+	struct trace_array *tr = inode->i_private;
+
+	trace_array_put(tr);
+
+	return seq_release(inode, file);
+}
+
+static const struct file_operations ftrace_set_wakeup_pid_fops = {
+	.open = ftrace_set_wakeup_pid_open,
+	.read = seq_read,
+	.write = ftrace_set_wakeup_pid_write,
+	.llseek = seq_lseek,
+	.release = ftrace_set_wakeup_pid_release,
+};
+
 static void
 probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
 {
@@ -437,6 +604,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
 	long disabled;
 	int cpu;
 	unsigned int trace_ctx;
+	struct trace_pid_list *pid_list;
 
 	tracing_record_cmdline(prev);
 
@@ -466,6 +634,14 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
 
 	arch_spin_lock(&wakeup_lock);
 
+	rcu_read_lock_sched();
+	pid_list = rcu_dereference_sched(wakeup_trace->wakeup_pids);
+	rcu_read_unlock_sched();
+
+	/* We could race with grabbing wakeup_lock */
+	if (likely(trace_ignore_this_task(pid_list, NULL, next)))
+		goto out_unlock;
+
 	/* We could race with grabbing wakeup_lock */
 	if (unlikely(!tracer_enabled || next != wakeup_task))
 		goto out_unlock;
@@ -674,6 +850,9 @@ static int __wakeup_tracer_init(struct trace_array *tr)
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
 
+	tracefs_create_file("set_wakeup_pid", 0644, NULL,
+				    tr, &ftrace_set_wakeup_pid_fops);
+
 	tr->max_latency = 0;
 	wakeup_trace = tr;
 	ftrace_init_array_ops(tr, wakeup_tracer_call);
-- 
2.34.1


             reply	other threads:[~2023-10-09 15:38 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-09 15:37 Jinyu Tang [this message]
2023-10-09 16:25 ` [PATCH v1] Ftrace: make sched_wakeup can focus on the target process Steven Rostedt
2023-10-10  6:53 ` Daniel Bristot de Oliveira
2023-10-11  1:13 ` kernel test robot
2023-10-20  7:59 ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231009153714.10743-1-tangjinyu@tinylab.org \
    --to=tangjinyu@tinylab.org \
    --cc=bristot@kernel.org \
    --cc=falcon@tinylab.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=ttjjyystupid@163.com \
    --cc=wangjiexun@tinylab.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.