* proof-of-concept, utrace->ftrace engine
@ 2009-01-27 19:54 Frank Ch. Eigler
2009-02-09 7:22 ` Roland McGrath
0 siblings, 1 reply; 3+ messages in thread
From: Frank Ch. Eigler @ 2009-01-27 19:54 UTC (permalink / raw)
To: utrace-devel; +Cc: systemtap, linux-kernel
Hi -
Here's the start of a little ditty that ties process-related events as
hooked by the Roland McGrath's utrace code into the ftrace
buffer/control widgetry. If nothing else, think of it as one
potential in-tree user of utrace.
Script started on Tue 27 Jan 2009 02:39:06 PM EST
[root@vm-fed10-64 tracing]# cat available_tracers
process wakeup irqsoff sysprof sched_switch nop
[root@vm-fed10-64 tracing]# echo process > current_tracer
[root@vm-fed10-64 tracing]# echo 500 > process_trace_uid_filter
[root@vm-fed10-64 tracing]# cat trace
# tracer: process
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
[root@vm-fed10-64 tracing]# su - fche
%
vm-fed10-64 /home/fche
[14:39:50] % pwd
/home/fche
%
vm-fed10-64 /home/fche
[14:39:52] % ls /tmp
firstbootX.log pulse-PKdhtXMmr18n stapbXg0xB stapUniATd
foo stap6cNJ5M stapl9Ww2f virtual-fche.4SkpzQ
kerneloops.pxnITL stap9MajHI stapT1LKnQ
%
vm-fed10-64 /home/fche
[14:39:59] % df
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/mapper/VolGroup00-LogVol00
13706328 11417980 2149176 85% /
/dev/sda1 194442 34259 150144 19% /boot
tmpfs 382320 0 382320 0% /dev/shm
super:/home 1300999168 496440320 750835712 40% /home
%
vm-fed10-64 /home/fche
[14:40:03] % exit
Tue Jan 27 14:40:05 EST 2009
[root@vm-fed10-64 tracing]# cat trace
# tracer: process
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
zsh 2091 0 2616701.950948 exec
zsh 2091 0 2616701.966410 fork 2092 flags 0x1200011
whoami 2092 1 2616702.005276 exec
whoami 2092 0 2616702.008612 exit 0
zsh 2091 0 2616702.009193 signal 17 errno 0 code 262145
zsh 2091 0 2616702.011385 fork 2093 flags 0x1200011
mkdir 2093 1 2616702.013701 exec
mkdir 2093 0 2616702.017300 exit 0
zsh 2091 0 2616702.018133 signal 17 errno 0 code 262145
zsh 2091 0 2616702.018951 fork 2094 flags 0x1200011
whoami 2094 0 2616702.023867 exec
whoami 2094 0 2616702.026108 exit 0
zsh 2091 0 2616702.026567 signal 17 errno 0 code 262145
zsh 2091 0 2616702.027358 fork 2095 flags 0x1200011
mkdir 2095 1 2616702.029712 exec
mkdir 2095 1 2616702.031703 exit 0
zsh 2091 0 2616702.032275 signal 17 errno 0 code 262145
zsh 2091 0 2616702.035062 fork 2096 flags 0x1200011
zsh 2096 1 2616702.036457 exit 0
zsh 2091 0 2616702.037344 fork 2097 flags 0x1200011
zsh 2091 0 2616702.038959 signal 17 errno 0 code 262145
egrep 2097 1 2616702.039692 exec
egrep 2097 1 2616702.041620 exit 256
zsh 2091 0 2616702.042150 signal 17 errno 0 code 262145
zsh 2091 0 2616702.043095 fork 2098 flags 0x1200011
zsh 2098 1 2616702.044435 exit 0
zsh 2091 0 2616702.045329 fork 2099 flags 0x1200011
zsh 2091 0 2616702.046846 signal 17 errno 0 code 262145
egrep 2099 1 2616702.047646 exec
egrep 2099 1 2616702.049571 exit 0
zsh 2091 0 2616702.050141 signal 17 errno 0 code 262145
zsh 2091 0 2616702.051020 fork 2100 flags 0x1200011
zsh 2100 0 2616702.052046 exit 0
zsh 2091 0 2616702.053346 fork 2101 flags 0x1200011
zsh 2091 0 2616702.054672 signal 17 errno 0 code 262145
egrep 2101 1 2616702.055515 exec
egrep 2101 1 2616702.057346 exit 0
zsh 2091 0 2616702.057907 signal 17 errno 0 code 262145
zsh 2091 0 2616702.058982 fork 2102 flags 0x1200011
id 2102 1 2616702.064822 exec
id 2102 1 2616702.067609 exit 0
zsh 2091 0 2616702.068307 signal 17 errno 0 code 262145
zsh 2091 0 2616702.069246 fork 2103 flags 0x1200011
hostname 2103 0 2616702.072067 exec
hostname 2103 0 2616702.074154 exit 0
zsh 2091 0 2616702.074766 signal 17 errno 0 code 262145
zsh 2091 0 2616702.076529 fork 2104 flags 0x1200011
zsh 2104 1 2616702.077982 exit 0
zsh 2091 0 2616702.079742 fork 2105 flags 0x1200011
zsh 2091 0 2616702.081672 signal 17 errno 0 code 262145
grep 2105 1 2616702.082929 exec
grep 2105 0 2616702.087867 exec
grep 2105 0 2616702.089716 exit 256
zsh 2091 0 2616702.090205 signal 17 errno 0 code 262145
zsh 2091 0 2616702.092925 fork 2106 flags 0x1200011
tput 2106 1 2616702.099077 exec
tput 2106 1 2616702.100918 exit 0
zsh 2091 0 2616702.101588 signal 17 errno 0 code 262145
zsh 2091 0 2616702.102659 fork 2107 flags 0x1200011
dircolors 2107 1 2616702.108917 exec
dircolors 2107 1 2616702.110359 exit 0
zsh 2091 0 2616702.110997 signal 17 errno 0 code 262145
zsh 2091 0 2616702.134110 fork 2108 flags 0x1200011
egrep 2108 0 2616702.136910 exec
egrep 2108 0 2616702.138921 exit 256
zsh 2091 0 2616702.139430 signal 17 errno 0 code 262145
zsh 2091 0 2616702.141230 fork 2109 flags 0x1200011
zsh 2109 1 2616702.142714 exit 0
zsh 2091 0 2616702.143685 fork 2110 flags 0x1200011
zsh 2091 0 2616702.145204 signal 17 errno 0 code 262145
grep 2110 1 2616702.145974 exec
grep 2110 1 2616702.147934 exit 256
zsh 2091 0 2616702.150523 signal 17 errno 0 code 262145
zsh 2091 0 2616702.151842 fork 2111 flags 0x1200011
zsh 2111 1 2616702.153271 exit 0
zsh 2091 0 2616702.154703 fork 2112 flags 0x1200011
zsh 2091 0 2616702.156063 signal 17 errno 0 code 262145
grep 2112 1 2616702.157028 exec
grep 2112 1 2616702.158834 exit 256
zsh 2091 0 2616702.159476 signal 17 errno 0 code 262145
zsh 2091 0 2616702.160319 fork 2113 flags 0x1200011
id 2113 1 2616702.162848 exec
id 2113 1 2616702.165115 exit 0
zsh 2091 0 2616702.165872 signal 17 errno 0 code 262145
zsh 2091 0 2616702.168590 fork 2114 flags 0x1200011
consoletype 2114 1 2616702.171021 exec
consoletype 2114 1 2616702.171988 exit 512
zsh 2091 0 2616702.172443 signal 17 errno 0 code 262145
zsh 2091 0 2616702.181959 fork 2115 flags 0x1200011
whoami 2115 1 2616702.188936 exec
whoami 2115 1 2616702.191366 exit 0
zsh 2091 0 2616702.192051 signal 17 errno 0 code 262145
zsh 2091 0 2616702.194605 fork 2116 flags 0x1200011
mkdir 2116 0 2616702.197377 exec
mkdir 2116 0 2616702.199480 exit 0
zsh 2091 0 2616702.200084 signal 17 errno 0 code 262145
zsh 2091 0 2616702.201017 fork 2117 flags 0x1200011
whoami 2117 0 2616702.206033 exec
whoami 2117 0 2616702.208245 exit 0
zsh 2091 0 2616702.208888 signal 17 errno 0 code 262145
zsh 2091 0 2616702.209836 fork 2118 flags 0x1200011
mkdir 2118 0 2616702.212527 exec
mkdir 2118 0 2616702.214474 exit 0
zsh 2091 0 2616702.215117 signal 17 errno 0 code 262145
zsh 2091 0 2616702.217011 fork 2119 flags 0x1200011
stty 2119 0 2616702.220137 exec
stty 2119 0 2616702.223496 exit 0
zsh 2091 0 2616702.223977 signal 17 errno 0 code 262145
zsh 2091 0 2616702.229063 fork 2120 flags 0x1200011
mesg 2120 0 2616702.232073 exec
mesg 2120 0 2616702.233994 exit 0
zsh 2091 0 2616702.234454 signal 17 errno 0 code 262145
zsh 2091 0 2616711.333172 fork 2121 flags 0x1200011
ls 2121 0 2616711.336055 exec
ls 2121 0 2616711.356496 exit 0
zsh 2091 0 2616711.364547 signal 17 errno 0 code 262145
zsh 2091 0 2616714.474787 fork 2125 flags 0x1200011
df 2125 0 2616714.479280 exec
df 2125 0 2616714.483010 exit 0
zsh 2091 0 2616714.483701 signal 17 errno 0 code 262145
zsh 2091 0 2616716.594615 fork 2126 flags 0x1200011
clear 2126 0 2616716.598083 exec
clear 2126 0 2616716.599856 exit 0
zsh 2091 0 2616716.600439 signal 17 errno 0 code 262145
zsh 2091 0 2616716.601532 fork 2127 flags 0x1200011
date 2127 0 2616716.613852 exec
date 2127 0 2616716.619608 exit 0
zsh 2091 0 2616716.620334 signal 17 errno 0 code 262145
zsh 2091 0 2616716.632090 fork 2128 flags 0x1200011
clear 2128 0 2616716.634284 exec
clear 2128 0 2616716.636012 exit 0
zsh 2091 0 2616716.636775 signal 17 errno 0 code 262145
zsh 2091 0 2616716.637448 exit 0
[root@vm-fed10-64 tracing]# nop > current_tracer
[root@vm-fed10-64 tracing]# cat trace
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
[root@vm-fed10-64 tracing]# exit
Script done on Tue 27 Jan 2009 02:40:26 PM EST
diff --git a/include/linux/processtrace.h b/include/linux/processtrace.h
new file mode 100644
index 0000000..f902443
--- /dev/null
+++ b/include/linux/processtrace.h
@@ -0,0 +1,33 @@
+#ifndef PROCESSTRACE_H
+#define PROCESSTRACE_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+struct process_trace_entry {
+ unsigned char opcode; /* one of _UTRACE_EVENT_* */
+ char comm[TASK_COMM_LEN]; /* XXX: should be in/via trace_entry */
+ union {
+ struct {
+ pid_t child;
+ unsigned long flags;
+ } trace_clone;
+ struct {
+ long code;
+ } trace_exit;
+ struct {
+ } trace_exec;
+ struct {
+ int si_signo;
+ int si_errno;
+ int si_code;
+ } trace_signal;
+ };
+};
+
+/* in kernel/trace/trace_process.c */
+
+extern void enable_process_trace (void);
+extern void disable_process_trace (void);
+
+#endif /* PROCESSTRACE_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd..9276863 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -119,6 +119,15 @@ config CONTEXT_SWITCH_TRACER
This tracer gets called from the context switch and records
all switching of tasks.
+config PROCESS_TRACER
+ bool "Trace process events via utrace"
+ depends on DEBUG_KERNEL
+ select TRACING
+ select UTRACE
+ help
+ This tracer provides trace records from process events
+ accessible to utrace: lifecycle, system calls, and signals.
+
config BOOT_TRACER
bool "Trace boot initcalls"
depends on DEBUG_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c8228b1..b06a5d6 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -24,5 +24,6 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
+obj-$(CONFIG_PROCESS_TRACER) += trace_process.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad0..7c0cd57 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -7,6 +7,7 @@
#include <linux/clocksource.h>
#include <linux/ring_buffer.h>
#include <linux/mmiotrace.h>
+#include <linux/processtrace.h>
#include <linux/ftrace.h>
enum trace_type {
@@ -22,6 +23,7 @@ enum trace_type {
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
TRACE_BOOT,
+ TRACE_PROCESS,
__TRACE_LAST_TYPE
};
@@ -117,6 +119,11 @@ struct trace_boot {
struct boot_trace initcall;
};
+struct trace_process {
+ struct trace_entry ent;
+ struct process_trace_entry event;
+};
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
@@ -219,6 +226,7 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
TRACE_MMIO_MAP); \
IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
+ IF_ASSIGN(var, ent, struct trace_process, TRACE_PROCESS); \
__ftrace_bad_type(); \
} while (0)
diff --git a/kernel/trace/trace_process.c b/kernel/trace/trace_process.c
new file mode 100644
index 0000000..10c2c3c
--- /dev/null
+++ b/kernel/trace/trace_process.c
@@ -0,0 +1,440 @@
+/*
+ * utrace-based process event tracing
+ * Copyright (C) 2009 Red Hat Inc.
+ * By Frank Ch. Eigler <fche@redhat.com>
+ */
+
+#define DEBUG 1
+
+#include <linux/kernel.h>
+#include <linux/utrace.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+
+#include "trace.h"
+
+/* A process must match these filters in order to be traced. */
+static char trace_taskcomm_filter[TASK_COMM_LEN]; /* \0: unrestricted */
+static u32 trace_taskuid_filter = -1; /* -1: unrestricted */
+
+/* A process must be a direct child of given pid in order to be
+ followed. */
+static u32 process_follow_pid; /* 0: unrestricted/systemwide */
+
+/* XXX: lock the above? */
+
+
+/* trace data collection */
+
+static struct trace_array *process_trace_array;
+
+static void process_reset_data(struct trace_array *tr)
+{
+ int cpu;
+
+ pr_debug("in %s\n", __func__);
+ tr->time_start = ftrace_now(tr->cpu);
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+}
+
+static void process_trace_init(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ process_trace_array = tr;
+ if (tr->ctrl) {
+ process_reset_data(tr);
+ enable_process_trace();
+ }
+}
+
+static void process_trace_reset(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ if (tr->ctrl)
+ disable_process_trace();
+ process_reset_data(tr);
+ process_trace_array = NULL;
+}
+
+static void process_trace_ctrl_update(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ if (tr->ctrl) {
+ process_reset_data(tr);
+ enable_process_trace();
+ } else {
+ disable_process_trace();
+ }
+}
+
+static void __trace_processtrace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct process_trace_entry *ent)
+{
+ struct ring_buffer_event *event;
+ struct trace_process *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+ entry->ent.cpu = raw_smp_processor_id();
+ entry->ent.type = TRACE_PROCESS;
+ strlcpy (ent->comm, current->comm, TASK_COMM_LEN);
+ entry->event = *ent;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+}
+
+void process_trace(struct process_trace_entry *ent)
+{
+ struct trace_array *tr = process_trace_array;
+ struct trace_array_cpu *data = tr->data[smp_processor_id()];
+
+ __trace_processtrace(tr, data, ent);
+}
+
+
+/* trace data rendering */
+
+static void process_pipe_open(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ pr_debug("in %s\n", __func__);
+ trace_seq_printf(s, "VERSION 200901\n");
+}
+
+static void process_close(struct trace_iterator *iter)
+{
+ iter->private = NULL;
+}
+
+static ssize_t process_read(struct trace_iterator *iter, struct file *filp,
+ char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ ssize_t ret;
+ struct trace_seq *s = &iter->seq;
+ ret = trace_seq_to_user(s, ubuf, cnt);
+ return (ret == -EBUSY) ? 0 : ret;
+}
+
+static enum print_line_t process_print(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_process *field;
+ struct trace_seq *s = &iter->seq;
+ unsigned long long t = ns2usecs(iter->ts);
+ unsigned long usec_rem = do_div(t, 1000000ULL);
+ unsigned secs = (unsigned long)t;
+ int ret = 1;
+
+ pr_debug("in %s\n", __func__);
+ trace_assign_type(field, entry);
+
+ /* XXX: If print_lat_fmt() were not static, we wouldn't have
+ to duplicate this. */
+ trace_seq_printf(s, "%16s %5d %3d %9lu.%06ld ",
+ field->event.comm,
+ entry->pid, entry->cpu,
+ secs,
+ usec_rem);
+
+ switch (field->event.opcode) {
+ case _UTRACE_EVENT_CLONE:
+ ret = trace_seq_printf(s, "fork %d flags 0x%lx\n",
+ field->event.trace_clone.child,
+ field->event.trace_clone.flags);
+ break;
+ case _UTRACE_EVENT_EXEC:
+ ret = trace_seq_printf(s, "exec\n");
+ break;
+ case _UTRACE_EVENT_EXIT:
+ ret = trace_seq_printf(s, "exit %ld\n",
+ field->event.trace_exit.code);
+ break;
+ case _UTRACE_EVENT_SIGNAL:
+ ret = trace_seq_printf(s, "signal %d errno %d code %d\n",
+ field->event.trace_signal.si_signo,
+ field->event.trace_signal.si_errno,
+ field->event.trace_signal.si_code);
+ break;
+ default:
+ ret = trace_seq_printf(s, "process code %d?\n", field->event.opcode);
+ break;
+ }
+ if (ret)
+ return TRACE_TYPE_HANDLED;
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+
+static enum print_line_t process_print_line(struct trace_iterator *iter)
+{
+ switch (iter->ent->type) {
+ case TRACE_PROCESS:
+ return process_print(iter);
+ default:
+ return TRACE_TYPE_HANDLED; /* ignore unknown entries */
+ }
+}
+
+static struct tracer process_tracer __read_mostly =
+{
+ .name = "process",
+ .init = process_trace_init,
+ .reset = process_trace_reset,
+ .pipe_open = process_pipe_open,
+ .close = process_close,
+ .read = process_read,
+ .ctrl_update = process_trace_ctrl_update,
+ .print_line = process_print_line,
+};
+
+
+
+/* utrace backend */
+
+/* Should tracing apply to given task? Compare against filter
+ values. */
+static int trace_test (struct task_struct *tsk)
+{
+ if (trace_taskcomm_filter[0]
+ && strcmp (trace_taskcomm_filter, tsk->comm))
+ return 0;
+ if (trace_taskuid_filter != (u32)-1
+ && trace_taskuid_filter != task_uid (tsk))
+ return 0;
+
+ return 1;
+}
+
+
+static struct utrace_engine_ops process_trace_ops __read_mostly;
+
+static void process_trace_tryattach (struct task_struct *tsk)
+{
+ struct utrace_attached_engine *engine;
+
+ pr_debug("in %s\n", __func__);
+ engine = utrace_attach_task (tsk, UTRACE_ATTACH_CREATE,
+ & process_trace_ops, NULL);
+ if (IS_ERR(engine) || (engine == NULL)) {
+ pr_warning ("utrace_attach_task %d (rc %p)\n",
+ tsk->pid, engine);
+ } else {
+ int rc;
+
+ /* XXX: Why is this not implicit from the fields
+ set in the process_trace_ops? */
+ rc = utrace_set_events (tsk, engine,
+ UTRACE_EVENT(CLONE) |
+ UTRACE_EVENT(EXEC) |
+ UTRACE_EVENT(SIGNAL) |
+ UTRACE_EVENT(EXIT));
+ if (rc == -EINPROGRESS)
+ rc = utrace_barrier (tsk, engine);
+ if (rc)
+ pr_warning ("utrace_set_events/barrier rc %d\n", rc);
+
+ utrace_engine_put (engine);
+ pr_debug("attached in %s to %s(%d)\n", __func__, tsk->comm, tsk->pid);
+ }
+}
+
+
+u32 process_trace_report_clone (enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *parent,
+ unsigned long clone_flags,
+ struct task_struct *child)
+{
+ if (trace_test (parent)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_CLONE;
+ ent.trace_clone.child = child->pid;
+ ent.trace_clone.flags = clone_flags;
+ process_trace(& ent);
+ }
+
+ process_trace_tryattach (child);
+
+ return action;
+}
+
+
+u32 process_trace_report_exec (enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *task,
+ const struct linux_binfmt *fmt,
+ const struct linux_binprm *bprm,
+ struct pt_regs *regs)
+{
+ if (trace_test (task)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_EXEC;
+ process_trace(& ent);
+ }
+
+ /* We're already attached; no need for a new tryattach. */
+
+ return action;
+}
+
+
+u32 process_trace_report_signal (u32 action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *task,
+ struct pt_regs *regs,
+ siginfo_t *info,
+ const struct k_sigaction *orig_ka,
+ struct k_sigaction *return_ka)
+{
+ if (trace_test (task)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_SIGNAL;
+ ent.trace_signal.si_signo = info->si_signo;
+ ent.trace_signal.si_errno = info->si_errno;
+ ent.trace_signal.si_code = info->si_code;
+ process_trace(& ent);
+ }
+
+ /* We're already attached; no need for a new tryattach. */
+
+ return action;
+}
+
+
+u32 process_trace_report_exit (enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *task,
+ long orig_code, long *code)
+{
+ if (trace_test (task)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_EXIT;
+ ent.trace_exit.code = orig_code;
+ process_trace(& ent);
+ }
+
+ /* There is no need to explicitly attach or detach here. */
+
+ return action;
+}
+
+
+void enable_process_trace () {
+ struct task_struct *grp, *tsk;
+
+ pr_debug("in %s\n", __func__);
+ rcu_read_lock();
+ do_each_thread(grp, tsk) {
+ struct mm_struct *mm;
+
+ /* Skip over kernel threads. */
+ mm = get_task_mm (tsk);
+ if (!mm)
+ continue;
+
+ if (process_follow_pid) {
+ if (tsk->tgid == process_follow_pid ||
+ tsk->parent->tgid == process_follow_pid)
+ process_trace_tryattach (tsk);
+ } else {
+ process_trace_tryattach (tsk);
+ }
+ } while_each_thread(grp, tsk);
+ rcu_read_unlock();
+}
+
+void disable_process_trace () {
+ struct utrace_attached_engine *engine;
+ struct task_struct *grp, *tsk;
+ int rc;
+
+ pr_debug("in %s\n", __func__);
+ rcu_read_lock();
+ do_each_thread(grp, tsk) {
+ if (tsk->pid <= 1)
+ continue;
+
+ /* Find matching engine, if any. Returns -ENOENT for
+ unattached threads. */
+ engine = utrace_attach_task (tsk, UTRACE_ATTACH_MATCH_OPS,
+ & process_trace_ops, 0);
+ if (IS_ERR(engine)) {
+ if (PTR_ERR(engine) != -ENOENT)
+ pr_warning ("utrace_attach_task %d (rc %ld)\n",
+ tsk->pid, -PTR_ERR(engine));
+ } else if (engine == NULL) {
+ pr_warning ("utrace_attach_task %d (null engine)\n",
+ tsk->pid);
+ } else {
+ /* Found one of our own engines. Detach. */
+ rc = utrace_control (tsk, engine, UTRACE_DETACH);
+ switch (rc) {
+ case 0: /* success */
+ break;
+ case -ESRCH: /* REAP callback already begun */
+ case -EALREADY: /* DEATH callback already begun */
+ break;
+ default:
+ rc = -rc;
+ pr_warning ("utrace_detach %d (rc %d)\n",
+ tsk->pid, rc);
+ break;
+ }
+ utrace_engine_put(engine);
+ pr_debug("detached in %s from %s(%d)\n", __func__, tsk->comm, tsk->pid);
+ }
+ } while_each_thread(grp, tsk);
+ rcu_read_unlock();
+}
+
+
+static struct utrace_engine_ops process_trace_ops __read_mostly = {
+ .report_clone = process_trace_report_clone,
+ .report_exec = process_trace_report_exec,
+ .report_exit = process_trace_report_exit,
+ .report_signal = process_trace_report_signal,
+};
+
+
+
+/* control interfaces */
+
+static struct debugfs_blob_wrapper trace_taskcomm_filter_blob = {
+ .data = trace_taskcomm_filter,
+ .size = sizeof (trace_taskcomm_filter),
+};
+
+static __init int init_process_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_blob("process_trace_taskcomm_filter", 0644, d_tracer,
+ & trace_taskcomm_filter_blob);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_trace_taskcomm_filter' entry\n");
+
+ entry = debugfs_create_u32("process_trace_uid_filter", 0644, d_tracer,
+ & trace_taskuid_filter);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_trace_uid_filter' entry\n");
+
+ entry = debugfs_create_u32("process_follow_pid", 0644, d_tracer,
+ & process_follow_pid);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_follow_pid' entry\n");
+
+ return register_tracer(&process_tracer);
+}
+
+device_initcall(init_process_trace);
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: proof-of-concept, utrace->ftrace engine
2009-01-27 19:54 proof-of-concept, utrace->ftrace engine Frank Ch. Eigler
@ 2009-02-09 7:22 ` Roland McGrath
2009-02-22 22:22 ` utrace-based ftrace "process" engine, v2 Frank Ch. Eigler
0 siblings, 1 reply; 3+ messages in thread
From: Roland McGrath @ 2009-02-09 7:22 UTC (permalink / raw)
To: Frank Ch. Eigler; +Cc: utrace-devel, linux-kernel, systemtap
> Here's the start of a little ditty that ties process-related events as
> hooked by the Roland McGrath's utrace code into the ftrace
> buffer/control widgetry. If nothing else, think of it as one
> potential in-tree user of utrace.
Cool! I won't comment on the use of the tracer or its interface code,
I'll leave that to others. (It's simplistic and kludgey, but I know
that's what it's going for.) I'll just review your use of the utrace API.
> +/* Should tracing apply to given task? Compare against filter
> + values. */
> +static int trace_test (struct task_struct *tsk)
> +{
> + if (trace_taskcomm_filter[0]
> + && strcmp (trace_taskcomm_filter, tsk->comm))
> + return 0;
Note that this is the most simple-minded approach for this. The ->comm
value only changes at exec. So the "normal", slightly more sophisticated,
way to approach this would be to check at attach time if ->comm matches.
If so, enable full tracing. If not, enable only EXEC and CLONE events. In
your report_exec callback, check ->comm to see if the task now should be
filtered in or now should be filtered out, and call utrace_set_events with
more or fewer bits set accordingly. You always need the report_clone
callback to attach the new child so you can see when it execs; give the new
child the thin or fat event mask as its parent has.
This way, you don't go off the fast paths in signals, etc. when you are
never going to care about those events. For a trivial hack like this one,
you might not care. But for more serious use, you want to bother doing it
the fancier way. If you added syscall tracing support, you probably would
care about the overhead of enabling that on all the uninteresting tasks.
> + if (trace_taskuid_filter != (u32)-1
> + && trace_taskuid_filter != task_uid (tsk))
> + return 0;
We don't have a utrace event for uid changes, so this one you do have to do
"eagerly". (Some day in the future, we might well have an event for this
so it can be treated intelligently on transitions as with exec as the
"->comm change event".)
> +static struct utrace_engine_ops process_trace_ops __read_mostly;
This is normally const. utrace never touches it (all const pointers).
You could change it yourself, but that would not be a normal way to do things.
> + engine = utrace_attach_task (tsk, UTRACE_ATTACH_CREATE,
> + & process_trace_ops, NULL);
Given how you use UTRACE_ATTACH_MATCH_OPS to effect detach, you might want
to use UTRACE_ATTACH_MATCH_OPS|UTRACE_ATTACH_EXCLUSIVE here. It's probably
impossible to have another call than yours with the same ops pointer, but
if not then it probably indicates that your later detach could well foul
something up.
> + /* XXX: Why is this not implicit from the fields
> + set in the process_trace_ops? */
> + rc = utrace_set_events (tsk, engine,
The same reason FWRITE on a struct file is not implicit from having a
.write field set in your struct file_operations. Your ops struct says
statically what your code is written to handle. An engine's event mask
says what callbacks you want from that specific thread to that specific
engine at the moment.
> + UTRACE_EVENT(SIGNAL) |
Note this means (exactly as documented):
_UTRACE_EVENT_SIGNAL, /* Signal delivery will run a user handler. */
You might have had UTRACE_EVENT_SIGNAL_ALL in mind.
That is the union of the five different kinds of SIGNAL* event.
> +u32 process_trace_report_clone (enum utrace_resume_action action,
[...]
> + return action;
> +}
This is wrong. If you have nothing special you want to do (just
observing, not perturbing), then "return UTRACE_RESUME;" is what you say.
In report_signal, the non-utrace_resume_action part of the return value
matters, so:
return UTRACE_RESUME | utrace_signal_action(action);
is what doesn't change anything there.
As documented under 'struct utrace_engine_ops', the action argument is
what other engines are causing to be done independent of what your
engine does. The utrace_resume_action part of the return value is
what *your engine* wants done, independent of what other engines say.
Your choices might be informed by what other engines are doing in some
cases, but it is not right to mimic what they said. If some other
engine said UTRACE_STOP, then now you say UTRACE_STOP, but you'll
never call utrace_control to resume, and the thread will be stopped
forever. If he says UTRACE_STOP and you don't care, you say
UTRACE_RESUME, and the thread stops (UTRACE_STOP < UTRACE_RESUME).
When he calls utrace_control in the future, the thread resumes because
there is no engine left whose last command was UTRACE_STOP.
The non-utrace_resume_action part of the return value (only nonempty for
SIGNAL* and SYSCALL* events) is different. Unlike utrace_resume_action,
the different choices of different engines can't be combined into a
"least common denominator". The choice of utrace_signal_action or
utrace_syscall_action setting is what the user-visible disposition
resolving the event will be; all the choices are mutually exclusive and
their effects final. The last callback to run chooses the final answer.
So each callback has to decide something. It gets the incoming choice
in its action argument, either from the preceding callback or from the
original normal default (what prevails in the absence of tracing). The
idiom above passes through the incoming value to leave that choice alone.
> + /* Skip over kernel threads. */
> + mm = get_task_mm (tsk);
> + if (!mm)
> + continue;
This should just check PF_KTHREAD. (As it is, you leak an mm ref here.)
Or just don't bother and handle utrace_attach returning ERR_PTR(-EPERM),
which it will for a kernel thread.
Thanks,
Roland
^ permalink raw reply [flat|nested] 3+ messages in thread
* utrace-based ftrace "process" engine, v2
2009-02-09 7:22 ` Roland McGrath
@ 2009-02-22 22:22 ` Frank Ch. Eigler
0 siblings, 0 replies; 3+ messages in thread
From: Frank Ch. Eigler @ 2009-02-22 22:22 UTC (permalink / raw)
To: Roland McGrath; +Cc: utrace-devel, linux-kernel, systemtap
Hi -
This is v2 of the prototype utrace-ftrace interface. This code is
based on Roland McGrath's utrace API, which provides programmatic
hooks to the in-tree tracehook layer. This new patch interfaces many
of those events to ftrace, as configured by a small number of debugfs
controls. Here's the /debugfs/tracing/process_trace_README:
process event tracer mini-HOWTO
1. Select process hierarchy to monitor. Other processes will be
completely unaffected. Leave at 0 for system-wide tracing.
# echo NNN > process_follow_pid
2. Determine which process event traces are potentially desired.
syscall and signal tracing slow down monitored processes.
# echo 0 > process_trace_{syscalls,signals,lifecycle}
3. Add any final uid- or taskcomm-based filtering. Non-matching
processes will skip trace messages, but will still be slowed.
# echo NNN > process_trace_uid_filter # -1: unrestricted
# echo ls > process_trace_taskcomm_filter # empty: unrestricted
4. Start tracing.
# echo process > current_tracer
5. Examine trace.
# cat trace
6. Stop tracing.
# echo nop > current_tracer
Signed-off-By: Frank Ch. Eigler <fche@redhat.com>
---
include/linux/processtrace.h | 41 +++
kernel/trace/Kconfig | 9 +
kernel/trace/Makefile | 1 +
kernel/trace/trace.h | 30 ++-
kernel/trace/trace_process.c | 591 ++++++++++++++++++++++++++++++++++++++++++
5 files changed, 661 insertions(+), 11 deletions(-)
diff --git a/include/linux/processtrace.h b/include/linux/processtrace.h
new file mode 100644
index 0000000..f2b7d94
--- /dev/null
+++ b/include/linux/processtrace.h
@@ -0,0 +1,41 @@
+#ifndef PROCESSTRACE_H
+#define PROCESSTRACE_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+struct process_trace_entry {
+ unsigned char opcode; /* one of _UTRACE_EVENT_* */
+ char comm[TASK_COMM_LEN]; /* XXX: should be in/via trace_entry */
+ union {
+ struct {
+ pid_t child;
+ unsigned long flags;
+ } trace_clone;
+ struct {
+ long code;
+ } trace_exit;
+ struct {
+ } trace_exec;
+ struct {
+ int si_signo;
+ int si_errno;
+ int si_code;
+ } trace_signal;
+ struct {
+ long callno;
+ unsigned long args[6];
+ } trace_syscall_entry;
+ struct {
+ long rc;
+ long error;
+ } trace_syscall_exit;
+ };
+};
+
+/* in kernel/trace/trace_process.c */
+
+extern void enable_process_trace(void);
+extern void disable_process_trace(void);
+
+#endif /* PROCESSTRACE_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6..3ff727e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -149,6 +149,15 @@ config CONTEXT_SWITCH_TRACER
This tracer gets called from the context switch and records
all switching of tasks.
+config PROCESS_TRACER
+ bool "Trace process events via utrace"
+ depends on DEBUG_KERNEL
+ select TRACING
+ select UTRACE
+ help
+ This tracer provides trace records from process events
+ accessible to utrace: lifecycle, system calls, and signals.
+
config BOOT_TRACER
bool "Trace boot initcalls"
depends on DEBUG_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a9..a774db2 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -33,5 +33,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
obj-$(CONFIG_POWER_TRACER) += trace_power.o
+obj-$(CONFIG_PROCESS_TRACER) += trace_process.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381..b4ebccb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -7,6 +7,7 @@
#include <linux/clocksource.h>
#include <linux/ring_buffer.h>
#include <linux/mmiotrace.h>
+#include <linux/processtrace.h>
#include <linux/ftrace.h>
#include <trace/boot.h>
@@ -30,6 +31,7 @@ enum trace_type {
TRACE_USER_STACK,
TRACE_HW_BRANCHES,
TRACE_POWER,
+ TRACE_PROCESS,
__TRACE_LAST_TYPE
};
@@ -38,7 +40,7 @@ enum trace_type {
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
*
- * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
+ * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
*/
struct trace_entry {
unsigned char type;
@@ -153,7 +155,7 @@ struct trace_boot_ret {
#define TRACE_FILE_SIZE 20
struct trace_branch {
struct trace_entry ent;
- unsigned line;
+ unsigned line;
char func[TRACE_FUNC_SIZE+1];
char file[TRACE_FILE_SIZE+1];
char correct;
@@ -170,11 +172,16 @@ struct trace_power {
struct power_trace state_data;
};
+struct trace_process {
+ struct trace_entry ent;
+ struct process_trace_entry event;
+};
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
* IRQS_OFF - interrupts were disabled
- * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
+ * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
* NEED_RESCED - reschedule is requested
* HARDIRQ - inside an interrupt handler
* SOFTIRQ - inside a softirq handler
@@ -279,7 +286,8 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
- IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+ IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+ IF_ASSIGN(var, ent, struct trace_process, TRACE_PROCESS); \
__ftrace_bad_type(); \
} while (0)
@@ -297,8 +305,8 @@ enum print_line_t {
* flags value in struct tracer_flags.
*/
struct tracer_opt {
- const char *name; /* Will appear on the trace_options file */
- u32 bit; /* Mask assigned in val field in tracer_flags */
+ const char *name; /* Will appear on the trace_options file */
+ u32 bit; /* Mask assigned in val field in tracer_flags */
};
/*
@@ -307,7 +315,7 @@ struct tracer_opt {
*/
struct tracer_flags {
u32 val;
- struct tracer_opt *opts;
+ struct tracer_opt *opts;
};
/* Makes more easy to define a tracer opt */
@@ -339,7 +347,7 @@ struct tracer {
int (*set_flag)(u32 old_flags, u32 bit, int set);
struct tracer *next;
int print_max;
- struct tracer_flags *flags;
+ struct tracer_flags *flags;
};
struct trace_seq {
@@ -561,7 +569,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
* positions into trace_flags that controls the output.
*
* NOTE: These bits must match the trace_options array in
- * trace.c.
+ * trace.c.
*/
enum trace_iterator_flags {
TRACE_ITER_PRINT_PARENT = 0x01,
@@ -578,8 +586,8 @@ enum trace_iterator_flags {
TRACE_ITER_PREEMPTONLY = 0x800,
TRACE_ITER_BRANCH = 0x1000,
TRACE_ITER_ANNOTATE = 0x2000,
- TRACE_ITER_USERSTACKTRACE = 0x4000,
- TRACE_ITER_SYM_USEROBJ = 0x8000,
+ TRACE_ITER_USERSTACKTRACE = 0x4000,
+ TRACE_ITER_SYM_USEROBJ = 0x8000,
TRACE_ITER_PRINTK_MSGONLY = 0x10000
};
diff --git a/kernel/trace/trace_process.c b/kernel/trace/trace_process.c
new file mode 100644
index 0000000..038ff36
--- /dev/null
+++ b/kernel/trace/trace_process.c
@@ -0,0 +1,591 @@
+/*
+ * utrace-based process event tracing
+ * Copyright (C) 2009 Red Hat Inc.
+ * By Frank Ch. Eigler <fche@redhat.com>
+ *
+ * Based on mmio ftrace engine by Pekka Paalanen
+ * and utrace-syscall-tracing prototype by Ananth Mavinakayanahalli
+ */
+
+/* #define DEBUG 1 */
+
+#include <linux/kernel.h>
+#include <linux/utrace.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <asm/syscall.h>
+
+#include "trace.h"
+
+/* A process must match these filters in order to be traced. */
+static char trace_taskcomm_filter[TASK_COMM_LEN]; /* \0: unrestricted */
+static u32 trace_taskuid_filter = -1; /* -1: unrestricted */
+static u32 trace_lifecycle_p = 1;
+static u32 trace_syscalls_p = 1;
+static u32 trace_signals_p = 1;
+
+/* A process must be a direct child of given pid in order to be
+ followed. */
+static u32 process_follow_pid; /* 0: unrestricted/systemwide */
+
+/* XXX: lock the above? */
+
+
+/* trace data collection */
+
+static struct trace_array *process_trace_array;
+
+static void process_reset_data(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ tracing_reset_online_cpus(tr);
+}
+
+static int process_trace_init(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ process_trace_array = tr;
+ process_reset_data(tr);
+ enable_process_trace();
+ return 0;
+}
+
+static void process_trace_reset(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ disable_process_trace();
+ process_reset_data(tr);
+ process_trace_array = NULL;
+}
+
+static void process_trace_start(struct trace_array *tr)
+{
+ pr_debug("in %s\n", __func__);
+ process_reset_data(tr);
+}
+
+static void __trace_processtrace(struct trace_array *tr,
+ struct trace_array_cpu *data,
+ struct process_trace_entry *ent)
+{
+ struct ring_buffer_event *event;
+ struct trace_process *entry;
+ unsigned long irq_flags;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+ entry->ent.cpu = raw_smp_processor_id();
+ entry->ent.type = TRACE_PROCESS;
+ strlcpy(ent->comm, current->comm, TASK_COMM_LEN);
+ entry->event = *ent;
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+}
+
+void process_trace(struct process_trace_entry *ent)
+{
+ struct trace_array *tr = process_trace_array;
+ struct trace_array_cpu *data;
+
+ preempt_disable();
+ data = tr->data[smp_processor_id()];
+ __trace_processtrace(tr, data, ent);
+ preempt_enable();
+}
+
+
+/* trace data rendering */
+
+static void process_pipe_open(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ pr_debug("in %s\n", __func__);
+ trace_seq_printf(s, "VERSION 200901\n");
+}
+
+static void process_close(struct trace_iterator *iter)
+{
+ iter->private = NULL;
+}
+
+static ssize_t process_read(struct trace_iterator *iter, struct file *filp,
+ char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ ssize_t ret;
+ struct trace_seq *s = &iter->seq;
+ ret = trace_seq_to_user(s, ubuf, cnt);
+ return (ret == -EBUSY) ? 0 : ret;
+}
+
+static enum print_line_t process_print(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_process *field;
+ struct trace_seq *s = &iter->seq;
+ unsigned long long t = ns2usecs(iter->ts);
+ unsigned long usec_rem = do_div(t, 1000000ULL);
+ unsigned secs = (unsigned long)t;
+ int ret = 1;
+
+ trace_assign_type(field, entry);
+
+ /* XXX: If print_lat_fmt() were not static, we wouldn't have
+ to duplicate this. */
+ trace_seq_printf(s, "%16s %5d %3d %9lu.%06ld ",
+ field->event.comm,
+ entry->pid, entry->cpu,
+ secs,
+ usec_rem);
+
+ switch (field->event.opcode) {
+ case _UTRACE_EVENT_CLONE:
+ ret = trace_seq_printf(s, "fork %d flags 0x%lx\n",
+ field->event.trace_clone.child,
+ field->event.trace_clone.flags);
+ break;
+ case _UTRACE_EVENT_EXEC:
+ ret = trace_seq_printf(s, "exec\n");
+ break;
+ case _UTRACE_EVENT_EXIT:
+ ret = trace_seq_printf(s, "exit %ld\n",
+ field->event.trace_exit.code);
+ break;
+ case _UTRACE_EVENT_SIGNAL:
+ ret = trace_seq_printf(s, "signal %d errno %d code 0x%x\n",
+ field->event.trace_signal.si_signo,
+ field->event.trace_signal.si_errno,
+ field->event.trace_signal.si_code);
+ break;
+ case _UTRACE_EVENT_SYSCALL_ENTRY:
+ ret = trace_seq_printf(s, "syscall %ld [0x%lx 0x%lx 0x%lx 0x%lx 0x%lx]\n",
+ field->event.trace_syscall_entry.callno,
+ field->event.trace_syscall_entry.args[0],
+ field->event.trace_syscall_entry.args[1],
+ field->event.trace_syscall_entry.args[2],
+ field->event.trace_syscall_entry.args[3],
+ field->event.trace_syscall_entry.args[4],
+ field->event.trace_syscall_entry.args[5]);
+ break;
+ case _UTRACE_EVENT_SYSCALL_EXIT:
+ ret = trace_seq_printf(s, "syscall rc %ld error %ld\n",
+ field->event.trace_syscall_exit.rc,
+ field->event.trace_syscall_exit.error);
+ break;
+ default:
+ ret = trace_seq_printf(s, "process code %d?\n",
+ field->event.opcode);
+ break;
+ }
+ if (ret)
+ return TRACE_TYPE_HANDLED;
+ return TRACE_TYPE_HANDLED;
+}
+
+
+static enum print_line_t process_print_line(struct trace_iterator *iter)
+{
+ switch (iter->ent->type) {
+ case TRACE_PROCESS:
+ return process_print(iter);
+ default:
+ return TRACE_TYPE_HANDLED; /* ignore unknown entries */
+ }
+}
+
+static struct tracer process_tracer = {
+ .name = "process",
+ .init = process_trace_init,
+ .reset = process_trace_reset,
+ .start = process_trace_start,
+ .pipe_open = process_pipe_open,
+ .close = process_close,
+ .read = process_read,
+ .print_line = process_print_line,
+};
+
+
+
+/* utrace backend */
+
+/* Should tracing apply to given task? Compare against filter
+ values. */
+static int trace_test(struct task_struct *tsk)
+{
+ if (trace_taskcomm_filter[0]
+ && strncmp(trace_taskcomm_filter, tsk->comm, TASK_COMM_LEN))
+ return 0;
+
+ if (trace_taskuid_filter != (u32)-1
+ && trace_taskuid_filter != task_uid(tsk))
+ return 0;
+
+ return 1;
+}
+
+
+static const struct utrace_engine_ops process_trace_ops;
+
+static void process_trace_tryattach(struct task_struct *tsk)
+{
+ struct utrace_attached_engine *engine;
+
+ pr_debug("in %s\n", __func__);
+ engine = utrace_attach_task(tsk,
+ UTRACE_ATTACH_CREATE |
+ UTRACE_ATTACH_EXCLUSIVE,
+ &process_trace_ops, NULL);
+ if (IS_ERR(engine) || (engine == NULL)) {
+ pr_warning("utrace_attach_task %d (rc %p)\n",
+ tsk->pid, engine);
+ } else {
+ int rc;
+
+ /* We always hook cost-free events. */
+ unsigned long events =
+ UTRACE_EVENT(CLONE) |
+ UTRACE_EVENT(EXEC) |
+ UTRACE_EVENT(EXIT);
+
+ /* Penalizing events are individually controlled, so that
+ utrace doesn't even take the monitored threads off their
+ fast paths, nor bother call our callbacks. */
+ if (trace_syscalls_p)
+ events |= UTRACE_EVENT_SYSCALL;
+ if (trace_signals_p)
+ events |= UTRACE_EVENT_SIGNAL_ALL;
+
+ rc = utrace_set_events(tsk, engine, events);
+ if (rc == -EINPROGRESS)
+ rc = utrace_barrier(tsk, engine);
+ if (rc)
+ pr_warning("utrace_set_events/barrier rc %d\n", rc);
+
+ utrace_engine_put(engine);
+ pr_debug("attached in %s to %s(%d)\n", __func__,
+ tsk->comm, tsk->pid);
+ }
+}
+
+
+u32 process_trace_report_clone(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *parent,
+ unsigned long clone_flags,
+ struct task_struct *child)
+{
+ if (trace_lifecycle_p && trace_test(parent)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_CLONE;
+ ent.trace_clone.child = child->pid;
+ ent.trace_clone.flags = clone_flags;
+ process_trace(&ent);
+ }
+
+ process_trace_tryattach(child);
+
+ return UTRACE_RESUME;
+}
+
+
+u32 process_trace_report_syscall_entry(u32 action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *task,
+ struct pt_regs *regs)
+{
+ if (trace_syscalls_p && trace_test(task)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_SYSCALL_ENTRY;
+ ent.trace_syscall_entry.callno = syscall_get_nr(task, regs);
+ syscall_get_arguments(task, regs, 0, 6,
+ ent.trace_syscall_entry.args);
+ process_trace(&ent);
+ }
+
+ return UTRACE_RESUME;
+}
+
+
+u32 process_trace_report_syscall_exit(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *task,
+ struct pt_regs *regs)
+{
+ if (trace_syscalls_p && trace_test(task)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_SYSCALL_EXIT;
+ ent.trace_syscall_exit.rc = syscall_get_return_value(task, regs);
+ ent.trace_syscall_exit.error = syscall_get_error(task, regs);
+ process_trace(&ent);
+ }
+
+ return UTRACE_RESUME;
+}
+
+
+u32 process_trace_report_exec(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *task,
+ const struct linux_binfmt *fmt,
+ const struct linux_binprm *bprm,
+ struct pt_regs *regs)
+{
+ if (trace_lifecycle_p && trace_test(task)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_EXEC;
+ process_trace(&ent);
+ }
+
+ /* We're already attached; no need for a new tryattach. */
+
+ return UTRACE_RESUME;
+}
+
+
+u32 process_trace_report_signal(u32 action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *task,
+ struct pt_regs *regs,
+ siginfo_t *info,
+ const struct k_sigaction *orig_ka,
+ struct k_sigaction *return_ka)
+{
+ if (trace_signals_p && trace_test(task)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_SIGNAL;
+ ent.trace_signal.si_signo = info->si_signo;
+ ent.trace_signal.si_errno = info->si_errno;
+ ent.trace_signal.si_code = info->si_code;
+ process_trace(&ent);
+ }
+
+ /* We're already attached, so no need for a new tryattach. */
+
+ return UTRACE_RESUME | utrace_signal_action(action);
+}
+
+
+u32 process_trace_report_exit(enum utrace_resume_action action,
+ struct utrace_attached_engine *engine,
+ struct task_struct *task,
+ long orig_code, long *code)
+{
+ if (trace_lifecycle_p && trace_test(task)) {
+ struct process_trace_entry ent;
+ ent.opcode = _UTRACE_EVENT_EXIT;
+ ent.trace_exit.code = orig_code;
+ process_trace(&ent);
+ }
+
+ /* There is no need to explicitly attach or detach here. */
+
+ return UTRACE_RESUME;
+}
+
+
+void enable_process_trace()
+{
+ struct task_struct *grp, *tsk;
+
+ pr_debug("in %s\n", __func__);
+ rcu_read_lock();
+ do_each_thread(grp, tsk) {
+ /* Skip over kernel threads. */
+ if (tsk->flags & PF_KTHREAD)
+ continue;
+
+ if (process_follow_pid) {
+ if (tsk->tgid == process_follow_pid ||
+ tsk->parent->tgid == process_follow_pid)
+ process_trace_tryattach(tsk);
+ } else {
+ process_trace_tryattach(tsk);
+ }
+ } while_each_thread(grp, tsk);
+ rcu_read_unlock();
+}
+
+void disable_process_trace()
+{
+ struct utrace_attached_engine *engine;
+ struct task_struct *grp, *tsk;
+ int rc;
+
+ pr_debug("in %s\n", __func__);
+ rcu_read_lock();
+ do_each_thread(grp, tsk) {
+ /* Find matching engine, if any. Returns -ENOENT for
+ unattached threads. */
+ engine = utrace_attach_task(tsk, UTRACE_ATTACH_MATCH_OPS,
+ &process_trace_ops, 0);
+ if (IS_ERR(engine)) {
+ if (PTR_ERR(engine) != -ENOENT)
+ pr_warning("utrace_attach_task %d (rc %ld)\n",
+ tsk->pid, -PTR_ERR(engine));
+ } else if (engine == NULL) {
+ pr_warning("utrace_attach_task %d (null engine)\n",
+ tsk->pid);
+ } else {
+ /* Found one of our own engines. Detach. */
+ rc = utrace_control(tsk, engine, UTRACE_DETACH);
+ switch (rc) {
+ case 0: /* success */
+ break;
+ case -ESRCH: /* REAP callback already begun */
+ case -EALREADY: /* DEATH callback already begun */
+ break;
+ default:
+ rc = -rc;
+ pr_warning("utrace_detach %d (rc %d)\n",
+ tsk->pid, rc);
+ break;
+ }
+ utrace_engine_put(engine);
+ pr_debug("detached in %s from %s(%d)\n", __func__,
+ tsk->comm, tsk->pid);
+ }
+ } while_each_thread(grp, tsk);
+ rcu_read_unlock();
+}
+
+
+static const struct utrace_engine_ops process_trace_ops = {
+ .report_clone = process_trace_report_clone,
+ .report_exec = process_trace_report_exec,
+ .report_exit = process_trace_report_exit,
+ .report_signal = process_trace_report_signal,
+ .report_syscall_entry = process_trace_report_syscall_entry,
+ .report_syscall_exit = process_trace_report_syscall_exit,
+};
+
+
+
+/* control interfaces */
+
+
+static ssize_t
+trace_taskcomm_filter_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ return simple_read_from_buffer(ubuf, cnt, ppos,
+ trace_taskcomm_filter, TASK_COMM_LEN);
+}
+
+
+static ssize_t
+trace_taskcomm_filter_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *fpos)
+{
+ char *end;
+
+ if (cnt > TASK_COMM_LEN)
+ cnt = TASK_COMM_LEN;
+
+ if (copy_from_user(trace_taskcomm_filter, ubuf, cnt))
+ return -EFAULT;
+
+ /* Cut from the first nil or newline. */
+ trace_taskcomm_filter[cnt] = '\0';
+ end = strchr(trace_taskcomm_filter, '\n');
+ if (end)
+ *end = '\0';
+
+ *fpos += cnt;
+ return cnt;
+}
+
+
+static const struct file_operations trace_taskcomm_filter_fops = {
+ .open = tracing_open_generic,
+ .read = trace_taskcomm_filter_read,
+ .write = trace_taskcomm_filter_write,
+};
+
+
+
+static char README_text[] =
+ "process event tracer mini-HOWTO\n"
+ "\n"
+ "1. Select process hierarchy to monitor. Other processes will be\n"
+ " completely unaffected. Leave at 0 for system-wide tracing.\n"
+ "# echo NNN > process_follow_pid\n"
+ "\n"
+ "2. Determine which process event traces are potentially desired.\n"
+ " syscall and signal tracing slow down monitored processes.\n"
+ "# echo 0 > process_trace_{syscalls,signals,lifecycle}\n"
+ "\n"
+ "3. Add any final uid- or taskcomm-based filtering. Non-matching\n"
+ " processes will skip trace messages, but will still be slowed.\n"
+ "# echo NNN > process_trace_uid_filter # -1: unrestricted \n"
+ "# echo ls > process_trace_taskcomm_filter # empty: unrestricted\n"
+ "\n"
+ "4. Start tracing.\n"
+ "# echo process > current_tracer\n"
+ "\n"
+ "5. Examine trace.\n"
+ "# cat trace\n"
+ "\n"
+ "6. Stop tracing.\n"
+ "# echo nop > current_tracer\n"
+ ;
+
+static struct debugfs_blob_wrapper README_blob = {
+ .data = README_text,
+ .size = sizeof(README_text),
+};
+
+
+static __init int init_process_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+
+ entry = debugfs_create_blob("process_trace_README", 0444, d_tracer,
+ &README_blob);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_trace_README' entry\n");
+
+ /* Control for scoping process following. */
+ entry = debugfs_create_u32("process_follow_pid", 0644, d_tracer,
+ &process_follow_pid);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_follow_pid' entry\n");
+
+ /* Process-level filters */
+ entry = debugfs_create_file("process_trace_taskcomm_filter", 0644,
+ d_tracer, NULL, &trace_taskcomm_filter_fops);
+ /* XXX: it'd be nice to have a read/write debugfs_create_blob. */
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_trace_taskcomm_filter' entry\n");
+
+ entry = debugfs_create_u32("process_trace_uid_filter", 0644, d_tracer,
+ &trace_taskuid_filter);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_trace_uid_filter' entry\n");
+
+ /* Event-level filters. */
+ entry = debugfs_create_u32("process_trace_lifecycle", 0644, d_tracer,
+ &trace_lifecycle_p);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_trace_lifecycle' entry\n");
+
+ entry = debugfs_create_u32("process_trace_syscalls", 0644, d_tracer,
+ &trace_syscalls_p);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_trace_syscalls' entry\n");
+
+ entry = debugfs_create_u32("process_trace_signals", 0644, d_tracer,
+ &trace_signals_p);
+ if (!entry)
+ pr_warning("Could not create debugfs 'process_trace_signals' entry\n");
+
+ return register_tracer(&process_tracer);
+}
+
+device_initcall(init_process_trace);
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-02-22 22:22 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-27 19:54 proof-of-concept, utrace->ftrace engine Frank Ch. Eigler
2009-02-09 7:22 ` Roland McGrath
2009-02-22 22:22 ` utrace-based ftrace "process" engine, v2 Frank Ch. Eigler
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.