* [PATCH v7 1/4] trace: Add trace any kernel object
2022-01-13 1:38 [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object Jeff Xie
@ 2022-01-13 1:38 ` Jeff Xie
2022-01-13 1:38 ` [PATCH v7 2/4] trace/objtrace: get the value of the object Jeff Xie
` (3 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Jeff Xie @ 2022-01-13 1:38 UTC (permalink / raw)
To: rostedt; +Cc: mhiramat, mingo, zanussi, linux-kernel, Jeff Xie
Introduce objtrace trigger to get the call flow by tracing any kernel
object in the function parameter.
The objtrace trigger makes a list of the target object address from
the given event parameter, and records all kernel function calls
which has the object address in its parameter.
Syntax:
objtrace:add:obj[:count][if <filter>]
Usage:
# echo 'p bio_add_page arg1=$arg1' > kprobe_events
# cd events/kprobes/p_bio_add_page_0
# echo 'objtrace:add:arg1:1 if comm == "cat"' > ./trigger
# cat /test.txt
Signed-off-by: Jeff Xie <xiehuan09@gmail.com>
---
include/linux/trace_events.h | 1 +
kernel/trace/Kconfig | 10 +
kernel/trace/Makefile | 1 +
kernel/trace/trace.c | 3 +
kernel/trace/trace.h | 8 +
kernel/trace/trace_entries.h | 17 +
kernel/trace/trace_events_trigger.c | 1 +
kernel/trace/trace_object.c | 460 ++++++++++++++++++++++++++++
kernel/trace/trace_output.c | 40 +++
9 files changed, 541 insertions(+)
create mode 100644 kernel/trace/trace_object.c
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2d167ac3452c..54baec78ce62 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -684,6 +684,7 @@ enum event_trigger_type {
ETT_EVENT_HIST = (1 << 4),
ETT_HIST_ENABLE = (1 << 5),
ETT_EVENT_EPROBE = (1 << 6),
+ ETT_TRACE_OBJECT = (1 << 7),
};
extern int filter_match_preds(struct event_filter *filter, void *rec);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 420ff4bc67fd..f69761cbfaaa 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -237,6 +237,16 @@ config FUNCTION_PROFILER
If in doubt, say N.
+config TRACE_OBJECT
+ bool "Trace kernel object in function parameter"
+ depends on FUNCTION_TRACER
+ depends on HAVE_FUNCTION_ARG_ACCESS_API
+ select TRACING
+ default y
+ help
+ You can trace the kernel object in the kernel function parameter.
+ The kernel object is dynamically specified via event trigger.
+
config STACK_TRACER
bool "Trace max stack"
depends on HAVE_FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index bedc5caceec7..b924b8e55922 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += fgraph.o
+obj-$(CONFIG_TRACE_OBJECT) += trace_object.o
ifeq ($(CONFIG_BLOCK),y)
obj-$(CONFIG_EVENT_TRACING) += blktrace.o
endif
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 78ea542ce3bc..0b627963e343 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5590,6 +5590,9 @@ static const char readme_msg[] =
"\t enable_hist:<system>:<event>\n"
"\t disable_hist:<system>:<event>\n"
#endif
+#ifdef CONFIG_TRACE_OBJECT
+ "\t objtrace:add:obj[:count][if <filter>]\n"
+#endif
#ifdef CONFIG_STACKTRACE
"\t\t stacktrace\n"
#endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 38715aa6cfdf..46a1301ac47a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -54,6 +54,7 @@ enum trace_type {
TRACE_TIMERLAT,
TRACE_RAW_DATA,
TRACE_FUNC_REPEATS,
+ TRACE_OBJECT,
__TRACE_LAST_TYPE,
};
@@ -469,6 +470,7 @@ extern void __ftrace_bad_type(void);
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct func_repeats_entry, \
TRACE_FUNC_REPEATS); \
+ IF_ASSIGN(var, ent, struct trace_object_entry, TRACE_OBJECT);\
__ftrace_bad_type(); \
} while (0)
@@ -1531,6 +1533,12 @@ static inline int register_trigger_hist_cmd(void) { return 0; }
static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; }
#endif
+#ifdef CONFIG_TRACE_OBJECT
+extern int register_trigger_object_cmd(void);
+#else
+static inline int register_trigger_object_cmd(void) { return 0; }
+#endif
+
extern int register_trigger_cmds(void);
extern void clear_event_triggers(struct trace_array *tr);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index cd41e863b51c..bb120d9498a9 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -401,3 +401,20 @@ FTRACE_ENTRY(timerlat, timerlat_entry,
__entry->context,
__entry->timer_latency)
);
+
+/*
+ * trace object entry:
+ */
+FTRACE_ENTRY(object, trace_object_entry,
+
+ TRACE_OBJECT,
+
+ F_STRUCT(
+ __field( unsigned long, ip )
+ __field( unsigned long, parent_ip )
+ __field( unsigned long, object )
+ ),
+
+ F_printk(" %ps <-- %ps object:%lx\n",
+ (void *)__entry->ip, (void *)__entry->parent_ip, __entry->object)
+);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 3d5c07239a2a..da2d871fa809 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1687,6 +1687,7 @@ __init int register_trigger_cmds(void)
register_trigger_enable_disable_cmds();
register_trigger_hist_enable_disable_cmds();
register_trigger_hist_cmd();
+ register_trigger_object_cmd();
return 0;
}
diff --git a/kernel/trace/trace_object.c b/kernel/trace/trace_object.c
new file mode 100644
index 000000000000..4af1c117cbfa
--- /dev/null
+++ b/kernel/trace/trace_object.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * trace the kernel object in the kernel function parameter
+ * Copyright (C) 2021 Jeff Xie <xiehuan09@gmail.com>
+ */
+
+#define pr_fmt(fmt) "trace_object: " fmt
+
+#include "trace_output.h"
+
+#define MAX_TRACED_OBJECT 5
+static DEFINE_RAW_SPINLOCK(trace_obj_lock);
+static struct trace_event_file event_trace_file;
+static const int max_args_num = 6;
+static atomic_t trace_object_ref;
+static atomic_t num_traced_obj;
+static int exit_trace_object(void);
+static int init_trace_object(void);
+
+static struct object_instance {
+ void *obj;
+} traced_obj[MAX_TRACED_OBJECT];
+
+static bool object_exist(void *obj)
+{
+ int i, max;
+
+ max = atomic_read(&num_traced_obj);
+ smp_rmb();
+ for (i = 0; i < max; i++) {
+ if (traced_obj[i].obj == obj)
+ return true;
+ }
+ return false;
+}
+
+static bool object_empty(void)
+{
+ return !atomic_read(&num_traced_obj);
+}
+
+static void set_trace_object(void *obj)
+{
+ unsigned long flags;
+
+ if (in_nmi())
+ return;
+
+ if (!obj)
+ return;
+
+ if (object_exist(obj))
+ return;
+
+ /* only this place has write operations */
+ raw_spin_lock_irqsave(&trace_obj_lock, flags);
+ if (atomic_read(&num_traced_obj) == MAX_TRACED_OBJECT) {
+ trace_printk("object_pool is full, can't trace object:0x%px\n", obj);
+ goto out;
+ }
+ traced_obj[atomic_read(&num_traced_obj)].obj = obj;
+ /* make sure the num_traced_obj update always appears after traced_obj update */
+ smp_wmb();
+ atomic_inc(&num_traced_obj);
+out:
+ raw_spin_unlock_irqrestore(&trace_obj_lock, flags);
+}
+
+static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
+ unsigned long object)
+{
+
+ struct trace_buffer *buffer;
+ struct ring_buffer_event *event;
+ struct trace_object_entry *entry;
+ int pc;
+
+ pc = preempt_count();
+ event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
+ TRACE_OBJECT, sizeof(*entry), pc);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ entry->ip = ip;
+ entry->parent_ip = parent_ip;
+ entry->object = object;
+
+ event_trigger_unlock_commit(&event_trace_file, buffer, event,
+ entry, pc);
+}
+
+static void
+trace_object_events_call(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ struct pt_regs *pt_regs = ftrace_get_regs(fregs);
+ unsigned long obj;
+ int bit, n;
+
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0)
+ return;
+
+ if (object_empty())
+ goto out;
+
+ for (n = 0; n < max_args_num; n++) {
+ obj = regs_get_kernel_argument(pt_regs, n);
+ if (object_exist((void *)obj))
+ submit_trace_object(ip, parent_ip, obj);
+ /* The parameters of a function may match multiple objects */
+ }
+out:
+ ftrace_test_recursion_unlock(bit);
+}
+
+static struct ftrace_ops trace_ops = {
+ .func = trace_object_events_call,
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
+};
+
+static void
+trace_object_trigger(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+{
+
+ struct ftrace_event_field *field = data->private_data;
+ void *obj = NULL;
+
+ memcpy(&obj, rec + field->offset, sizeof(obj));
+ set_trace_object(obj);
+}
+
+static void
+trace_object_trigger_free(struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ if (WARN_ON_ONCE(data->ref <= 0))
+ return;
+
+ data->ref--;
+ if (!data->ref)
+ trigger_data_free(data);
+}
+
+static void
+trace_object_count_trigger(struct event_trigger_data *data,
+ struct trace_buffer *buffer, void *rec,
+ struct ring_buffer_event *event)
+{
+ if (!data->count)
+ return;
+
+ if (data->count != -1)
+ (data->count)--;
+
+ trace_object_trigger(data, buffer, rec, event);
+}
+
+static int event_object_trigger_init(struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ data->ref++;
+ return 0;
+}
+
+static int
+event_trigger_print(const char *name, struct seq_file *m,
+ void *data, char *filter_str)
+{
+ long count = (long)data;
+
+ seq_puts(m, name);
+
+ if (count == -1)
+ seq_puts(m, ":unlimited");
+ else
+ seq_printf(m, ":count=%ld", count);
+
+ if (filter_str)
+ seq_printf(m, " if %s\n", filter_str);
+ else
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+static int
+trace_object_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
+ struct event_trigger_data *data)
+{
+ return event_trigger_print("objtrace", m, (void *)data->count,
+ data->filter_str);
+}
+
+static struct event_trigger_ops objecttrace_trigger_ops = {
+ .func = trace_object_trigger,
+ .print = trace_object_trigger_print,
+ .init = event_object_trigger_init,
+ .free = trace_object_trigger_free,
+};
+
+static struct event_trigger_ops objecttrace_count_trigger_ops = {
+ .func = trace_object_count_trigger,
+ .print = trace_object_trigger_print,
+ .init = event_object_trigger_init,
+ .free = trace_object_trigger_free,
+};
+
+static struct event_trigger_ops *
+objecttrace_get_trigger_ops(char *cmd, char *param)
+{
+ return param ? &objecttrace_count_trigger_ops : &objecttrace_trigger_ops;
+}
+
+static int register_object_trigger(char *glob, struct event_trigger_ops *ops,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
+{
+ struct event_trigger_data *test;
+ int ret = 0;
+
+ lockdep_assert_held(&event_mutex);
+
+ list_for_each_entry(test, &file->triggers, list) {
+ if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
+ if (data->ops->init) {
+ ret = data->ops->init(data->ops, data);
+ if (ret < 0)
+ goto out;
+ }
+
+ list_add_rcu(&data->list, &file->triggers);
+ ret++;
+
+ update_cond_flag(file);
+ if (trace_event_trigger_enable_disable(file, 1) < 0) {
+ list_del_rcu(&data->list);
+ update_cond_flag(file);
+ ret--;
+ }
+ init_trace_object();
+out:
+ return ret;
+}
+
+static void unregister_object_trigger(char *glob, struct event_trigger_ops *ops,
+ struct event_trigger_data *test,
+ struct trace_event_file *file)
+{
+ struct event_trigger_data *data;
+ bool unregistered = false;
+
+ lockdep_assert_held(&event_mutex);
+
+ list_for_each_entry(data, &file->triggers, list) {
+ if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) {
+ unregistered = true;
+ list_del_rcu(&data->list);
+ trace_event_trigger_enable_disable(file, 0);
+ update_cond_flag(file);
+ break;
+ }
+ }
+
+ if (unregistered) {
+ if (data->ops->free)
+ data->ops->free(data->ops, data);
+ exit_trace_object();
+ }
+}
+
+static int
+event_object_trigger_callback(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
+{
+ struct event_trigger_data *trigger_data;
+ struct event_trigger_ops *trigger_ops;
+ struct trace_event_call *call;
+ struct ftrace_event_field *field;
+ char *objtrace_cmd;
+ char *trigger = NULL;
+ char *arg;
+ char *number;
+ int ret;
+
+ ret = -EINVAL;
+ if (!param)
+ goto out;
+
+ /* separate the trigger from the filter (c:a:n [if filter]) */
+ trigger = strsep(¶m, " \t");
+ if (!trigger)
+ goto out;
+ if (param) {
+ param = skip_spaces(param);
+ if (!*param)
+ param = NULL;
+ }
+
+ objtrace_cmd = strsep(&trigger, ":");
+ if (!objtrace_cmd || strcmp(objtrace_cmd, "add"))
+ goto out;
+
+ arg = strsep(&trigger, ":");
+ if (!arg)
+ goto out;
+ call = file->event_call;
+ field = trace_find_event_field(call, arg);
+ if (!field)
+ goto out;
+
+ if (field->size != sizeof(void *))
+ goto out;
+
+ trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
+
+ ret = -ENOMEM;
+ trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
+ if (!trigger_data)
+ goto out;
+
+ trigger_data->count = -1;
+ trigger_data->ops = trigger_ops;
+ trigger_data->cmd_ops = cmd_ops;
+ trigger_data->private_data = field;
+ INIT_LIST_HEAD(&trigger_data->list);
+ INIT_LIST_HEAD(&trigger_data->named_list);
+
+ if (glob[0] == '!') {
+ cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ kfree(trigger_data);
+ ret = 0;
+ goto out;
+ }
+
+ if (trigger) {
+ number = strsep(&trigger, ":");
+
+ ret = -EINVAL;
+ if (!strlen(number))
+ goto out_free;
+
+ /*
+ * We use the callback data field (which is a pointer)
+ * as our counter.
+ */
+ ret = kstrtoul(number, 0, &trigger_data->count);
+ if (ret)
+ goto out_free;
+ }
+
+ if (!param) /* if param is non-empty, it's supposed to be a filter */
+ goto out_reg;
+
+ if (!cmd_ops->set_filter)
+ goto out_reg;
+
+ ret = cmd_ops->set_filter(param, trigger_data, file);
+ if (ret < 0)
+ goto out_free;
+
+ out_reg:
+ /* Up the trigger_data count to make sure reg doesn't free it on failure */
+ event_object_trigger_init(trigger_ops, trigger_data);
+ ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ /*
+ * The above returns on success the # of functions enabled,
+ * but if it didn't find any functions it returns zero.
+ * Consider no functions a failure too.
+ */
+ if (!ret) {
+ cmd_ops->unreg(glob, trigger_ops, trigger_data, file);
+ ret = -ENOENT;
+ } else if (ret > 0)
+ ret = 0;
+
+ /* Down the counter of trigger_data or free it if not used anymore */
+ trace_object_trigger_free(trigger_ops, trigger_data);
+ out:
+ return ret;
+
+ out_free:
+ if (cmd_ops->set_filter)
+ cmd_ops->set_filter(NULL, trigger_data, NULL);
+ kfree(trigger_data);
+ goto out;
+}
+
+static struct event_command trigger_object_cmd = {
+ .name = "objtrace",
+ .trigger_type = ETT_TRACE_OBJECT,
+ .flags = EVENT_CMD_FL_NEEDS_REC,
+ .func = event_object_trigger_callback,
+ .reg = register_object_trigger,
+ .unreg = unregister_object_trigger,
+ .get_trigger_ops = objecttrace_get_trigger_ops,
+ .set_filter = set_trigger_filter,
+};
+
+__init int register_trigger_object_cmd(void)
+{
+ int ret;
+
+ ret = register_event_command(&trigger_object_cmd);
+ WARN_ON(ret < 0);
+
+ return ret;
+}
+
+static int init_trace_object(void)
+{
+ int ret;
+
+ if (atomic_inc_return(&trace_object_ref) != 1) {
+ ret = 0;
+ goto out;
+ }
+
+ event_trace_file.tr = top_trace_array();
+ if (WARN_ON(!event_trace_file.tr)) {
+ ret = -1;
+ atomic_dec(&trace_object_ref);
+ goto out;
+ }
+ ret = register_ftrace_function(&trace_ops);
+out:
+ return ret;
+}
+
+static int exit_trace_object(void)
+{
+ int ret;
+
+ if (WARN_ON_ONCE(atomic_read(&trace_object_ref) <= 0)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (atomic_dec_return(&trace_object_ref) != 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = unregister_ftrace_function(&trace_ops);
+ if (ret) {
+ pr_err("can't unregister ftrace for trace object\n");
+ goto out;
+ }
+ atomic_set(&num_traced_obj, 0);
+out:
+ return ret;
+}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3547e7176ff7..d747aed27104 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1543,6 +1543,45 @@ static struct trace_event trace_func_repeats_event = {
.funcs = &trace_func_repeats_funcs,
};
+/* TRACE_OBJECT */
+static enum print_line_t trace_object_print(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct trace_object_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+ print_fn_trace(s, field->ip, field->parent_ip, flags);
+ trace_seq_printf(s, " object:0x%lx", field->object);
+ trace_seq_putc(s, '\n');
+
+ return trace_handle_return(s);
+}
+
+static enum print_line_t trace_object_raw(struct trace_iterator *iter, int flags,
+ struct trace_event *event)
+{
+ struct trace_object_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ trace_seq_printf(&iter->seq, "%lx %lx\n",
+ field->ip,
+ field->parent_ip);
+
+ return trace_handle_return(&iter->seq);
+}
+
+static struct trace_event_functions trace_object_funcs = {
+ .trace = trace_object_print,
+ .raw = trace_object_raw,
+};
+
+static struct trace_event trace_object_event = {
+ .type = TRACE_OBJECT,
+ .funcs = &trace_object_funcs,
+};
+
static struct trace_event *events[] __initdata = {
&trace_fn_event,
&trace_ctx_event,
@@ -1557,6 +1596,7 @@ static struct trace_event *events[] __initdata = {
&trace_timerlat_event,
&trace_raw_data_event,
&trace_func_repeats_event,
+ &trace_object_event,
NULL
};
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v7 2/4] trace/objtrace: get the value of the object
2022-01-13 1:38 [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object Jeff Xie
2022-01-13 1:38 ` [PATCH v7 1/4] trace: Add trace any " Jeff Xie
@ 2022-01-13 1:38 ` Jeff Xie
2022-01-18 15:13 ` Masami Hiramatsu
2022-01-13 1:38 ` [PATCH v7 3/4] trace/objtrace: Add testcases for objtrace Jeff Xie
` (2 subsequent siblings)
4 siblings, 1 reply; 11+ messages in thread
From: Jeff Xie @ 2022-01-13 1:38 UTC (permalink / raw)
To: rostedt; +Cc: mhiramat, mingo, zanussi, linux-kernel, Jeff Xie
Using objtrace trigger to get the value of the object which from the kernel
function parameter.
Syntax:
objtrace:add:obj[,offset][:type][:count][if <filter>]
Usage:
# echo 'p bio_add_page arg1=$arg1' > ./kprobe_events
# gdb vmlinux
(gdb) p &(((struct bio *)0)->bi_iter.bi_size)
$1 = (unsigned int *) 0x28
# echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/ \
p_bio_add_page_0/trigger
# cat /test.txt
Signed-off-by: Jeff Xie <xiehuan09@gmail.com>
---
kernel/trace/trace.c | 2 +-
kernel/trace/trace_entries.h | 5 +-
kernel/trace/trace_object.c | 190 +++++++++++++++++++++++++++++------
kernel/trace/trace_output.c | 6 +-
4 files changed, 169 insertions(+), 34 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0b627963e343..d5332ece4c67 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5591,7 +5591,7 @@ static const char readme_msg[] =
"\t disable_hist:<system>:<event>\n"
#endif
#ifdef CONFIG_TRACE_OBJECT
- "\t objtrace:add:obj[:count][if <filter>]\n"
+ "\t objtrace:add:obj[,offset][:type][:count][if <filter>]\n"
#endif
#ifdef CONFIG_STACKTRACE
"\t\t stacktrace\n"
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index bb120d9498a9..2407c45a568c 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -413,8 +413,9 @@ FTRACE_ENTRY(object, trace_object_entry,
__field( unsigned long, ip )
__field( unsigned long, parent_ip )
__field( unsigned long, object )
+ __field( unsigned long, value )
),
- F_printk(" %ps <-- %ps object:%lx\n",
- (void *)__entry->ip, (void *)__entry->parent_ip, __entry->object)
+ F_printk(" %ps <-- %ps object:%lx value:%lx\n", (void *)__entry->ip,
+ (void *)__entry->parent_ip, __entry->object, __entry->value)
);
diff --git a/kernel/trace/trace_object.c b/kernel/trace/trace_object.c
index 4af1c117cbfa..774a9dff4d0a 100644
--- a/kernel/trace/trace_object.c
+++ b/kernel/trace/trace_object.c
@@ -11,7 +11,6 @@
#define MAX_TRACED_OBJECT 5
static DEFINE_RAW_SPINLOCK(trace_obj_lock);
static struct trace_event_file event_trace_file;
-static const int max_args_num = 6;
static atomic_t trace_object_ref;
static atomic_t num_traced_obj;
static int exit_trace_object(void);
@@ -19,8 +18,22 @@ static int init_trace_object(void);
static struct object_instance {
void *obj;
+ int obj_type_size;
} traced_obj[MAX_TRACED_OBJECT];
+/* objtrace private data */
+struct objtrace_trigger_data {
+ struct ftrace_event_field *field;
+ int offset;
+ int obj_type_size;
+};
+
+/* get the type size for the special object */
+struct objtrace_fetch_type {
+ char *name;
+ int type_size;
+};
+
static bool object_exist(void *obj)
{
int i, max;
@@ -39,7 +52,7 @@ static bool object_empty(void)
return !atomic_read(&num_traced_obj);
}
-static void set_trace_object(void *obj)
+static void set_trace_object(void *obj, int obj_type_size)
{
unsigned long flags;
@@ -59,6 +72,7 @@ static void set_trace_object(void *obj)
goto out;
}
traced_obj[atomic_read(&num_traced_obj)].obj = obj;
+ traced_obj[atomic_read(&num_traced_obj)].obj_type_size = obj_type_size;
/* make sure the num_traced_obj update always appears after traced_obj update */
smp_wmb();
atomic_inc(&num_traced_obj);
@@ -67,7 +81,7 @@ static void set_trace_object(void *obj)
}
static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
- unsigned long object)
+ unsigned long object, unsigned long value)
{
struct trace_buffer *buffer;
@@ -84,18 +98,66 @@ static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
entry->ip = ip;
entry->parent_ip = parent_ip;
entry->object = object;
+ entry->value = value;
event_trigger_unlock_commit(&event_trace_file, buffer, event,
entry, pc);
}
+static inline long get_object_value(unsigned long *val, void *obj, int type_size)
+{
+ long ret = 0;
+
+ switch (type_size) {
+ case 1: {
+ u8 tmp;
+
+ ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
+ if (ret)
+ goto out;
+ *val = tmp;
+ break;
+ }
+ case 2: {
+ u16 tmp;
+
+ ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
+ if (ret)
+ goto out;
+ *val = tmp;
+ break;
+ }
+ case 4: {
+ u32 tmp;
+
+ ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
+ if (ret)
+ goto out;
+ *val = tmp;
+ break;
+ }
+ case 8: {
+ u64 tmp;
+
+ ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
+ if (ret)
+ goto out;
+ *val = tmp;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+out:
+ return ret;
+}
+
static void
trace_object_events_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- struct pt_regs *pt_regs = ftrace_get_regs(fregs);
- unsigned long obj;
- int bit, n;
+ unsigned long val = 0;
+ int bit, n, max;
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
@@ -104,11 +166,12 @@ trace_object_events_call(unsigned long ip, unsigned long parent_ip,
if (object_empty())
goto out;
- for (n = 0; n < max_args_num; n++) {
- obj = regs_get_kernel_argument(pt_regs, n);
- if (object_exist((void *)obj))
- submit_trace_object(ip, parent_ip, obj);
- /* The parameters of a function may match multiple objects */
+ max = atomic_read(&num_traced_obj);
+ smp_rmb();
+ for (n = 0; n < max; n++) {
+ if (get_object_value(&val, traced_obj[n].obj, traced_obj[n].obj_type_size))
+ goto out;
+ submit_trace_object(ip, parent_ip, (unsigned long)traced_obj[n].obj, val);
}
out:
ftrace_test_recursion_unlock(bit);
@@ -124,12 +187,15 @@ trace_object_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
+ struct objtrace_trigger_data *obj_data = data->private_data;
+ struct ftrace_event_field *field;
+ void *obj, *val = NULL;
- struct ftrace_event_field *field = data->private_data;
- void *obj = NULL;
-
- memcpy(&obj, rec + field->offset, sizeof(obj));
- set_trace_object(obj);
+ field = obj_data->field;
+ memcpy(&val, rec + field->offset, sizeof(val));
+ /* get the final object */
+ obj = val + obj_data->offset;
+ set_trace_object(obj, obj_data->obj_type_size);
}
static void
@@ -140,8 +206,10 @@ trace_object_trigger_free(struct event_trigger_ops *ops,
return;
data->ref--;
- if (!data->ref)
+ if (!data->ref) {
+ kfree(data->private_data);
trigger_data_free(data);
+ }
}
static void
@@ -276,6 +344,22 @@ static void unregister_object_trigger(char *glob, struct event_trigger_ops *ops,
}
}
+static const struct objtrace_fetch_type objtrace_fetch_types[] = {
+ {"u8", 1},
+ {"s8", 1},
+ {"x8", 1},
+ {"u16", 2},
+ {"s16", 2},
+ {"x16", 2},
+ {"u32", 4},
+ {"s32", 4},
+ {"x32", 4},
+ {"u64", 8},
+ {"s64", 8},
+ {"x64", 8},
+ {}
+};
+
static int
event_object_trigger_callback(struct event_command *cmd_ops,
struct trace_event_file *file,
@@ -283,19 +367,22 @@ event_object_trigger_callback(struct event_command *cmd_ops,
{
struct event_trigger_data *trigger_data;
struct event_trigger_ops *trigger_ops;
+ struct objtrace_trigger_data *obj_data;
struct trace_event_call *call;
struct ftrace_event_field *field;
- char *objtrace_cmd;
- char *trigger = NULL;
- char *arg;
- char *number;
- int ret;
+ char *type, *tr, *obj, *tmp, *trigger = NULL;
+ char *number, *objtrace_cmd;
+ int ret, i, def_type_size, obj_type_size = 0;
+ long offset = 0;
ret = -EINVAL;
if (!param)
goto out;
- /* separate the trigger from the filter (c:a:n [if filter]) */
+ /*
+ * separate the trigger from the filter:
+ * objtrace:add:OBJ[,OFFS][:TYPE][:COUNT] [if filter]
+ */
trigger = strsep(¶m, " \t");
if (!trigger)
goto out;
@@ -309,33 +396,79 @@ event_object_trigger_callback(struct event_command *cmd_ops,
if (!objtrace_cmd || strcmp(objtrace_cmd, "add"))
goto out;
- arg = strsep(&trigger, ":");
- if (!arg)
+ obj = strsep(&trigger, ":");
+ if (!obj)
goto out;
+
+ tr = strchr(obj, ',');
+ if (!tr)
+ offset = 0;
+ else {
+ *tr++ = '\0';
+ ret = kstrtol(tr, 0, &offset);
+ if (ret)
+ goto out;
+ }
+
+ ret = -EINVAL;
call = file->event_call;
- field = trace_find_event_field(call, arg);
+ field = trace_find_event_field(call, obj);
if (!field)
goto out;
if (field->size != sizeof(void *))
goto out;
+ def_type_size = sizeof(void *);
+ if (!trigger) {
+ obj_type_size = def_type_size;
+ goto skip_get_type;
+ }
+ tmp = trigger;
+ type = strsep(&trigger, ":");
+ if (!type)
+ obj_type_size = def_type_size;
+ else if (isdigit(type[0])) {
+ obj_type_size = def_type_size;
+ trigger = tmp;
+ } else {
+ for (i = 0; objtrace_fetch_types[i].name; i++) {
+ if (strcmp(objtrace_fetch_types[i].name, type) == 0) {
+ obj_type_size = objtrace_fetch_types[i].type_size;
+ break;
+ }
+ }
+ }
+ if (!obj_type_size)
+ goto out;
+skip_get_type:
trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
ret = -ENOMEM;
+ obj_data = kzalloc(sizeof(*obj_data), GFP_KERNEL);
+ if (!obj_data)
+ goto out;
+
+ obj_data->field = field;
+ obj_data->offset = offset;
+ obj_data->obj_type_size = obj_type_size;
+
trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
- if (!trigger_data)
+ if (!trigger_data) {
+ kfree(obj_data);
goto out;
+ }
trigger_data->count = -1;
trigger_data->ops = trigger_ops;
trigger_data->cmd_ops = cmd_ops;
- trigger_data->private_data = field;
+ trigger_data->private_data = obj_data;
INIT_LIST_HEAD(&trigger_data->list);
INIT_LIST_HEAD(&trigger_data->named_list);
if (glob[0] == '!') {
cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ kfree(obj_data);
kfree(trigger_data);
ret = 0;
goto out;
@@ -390,6 +523,7 @@ event_object_trigger_callback(struct event_command *cmd_ops,
out_free:
if (cmd_ops->set_filter)
cmd_ops->set_filter(NULL, trigger_data, NULL);
+ kfree(obj_data);
kfree(trigger_data);
goto out;
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d747aed27104..12a971927d8c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1553,6 +1553,7 @@ static enum print_line_t trace_object_print(struct trace_iterator *iter, int fla
trace_assign_type(field, iter->ent);
print_fn_trace(s, field->ip, field->parent_ip, flags);
trace_seq_printf(s, " object:0x%lx", field->object);
+ trace_seq_printf(s, " value:0x%lx", field->value);
trace_seq_putc(s, '\n');
return trace_handle_return(s);
@@ -1565,9 +1566,8 @@ static enum print_line_t trace_object_raw(struct trace_iterator *iter, int flags
trace_assign_type(field, iter->ent);
- trace_seq_printf(&iter->seq, "%lx %lx\n",
- field->ip,
- field->parent_ip);
+ trace_seq_printf(&iter->seq, "%lx %lx %lx %lx\n", field->ip,
+ field->parent_ip, field->object, field->value);
return trace_handle_return(&iter->seq);
}
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v7 2/4] trace/objtrace: get the value of the object
2022-01-13 1:38 ` [PATCH v7 2/4] trace/objtrace: get the value of the object Jeff Xie
@ 2022-01-18 15:13 ` Masami Hiramatsu
2022-01-19 2:28 ` Jeff Xie
0 siblings, 1 reply; 11+ messages in thread
From: Masami Hiramatsu @ 2022-01-18 15:13 UTC (permalink / raw)
To: Jeff Xie; +Cc: rostedt, mhiramat, mingo, zanussi, linux-kernel
Hi Jeff,
On Thu, 13 Jan 2022 09:38:33 +0800
Jeff Xie <xiehuan09@gmail.com> wrote:
> Using objtrace trigger to get the value of the object which from the kernel
> function parameter.
>
> Syntax:
> objtrace:add:obj[,offset][:type][:count][if <filter>]
>
> Usage:
> # echo 'p bio_add_page arg1=$arg1' > ./kprobe_events
> # gdb vmlinux
> (gdb) p &(((struct bio *)0)->bi_iter.bi_size)
> $1 = (unsigned int *) 0x28
> # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/ \
> p_bio_add_page_0/trigger
> # cat /test.txt
>
> Signed-off-by: Jeff Xie <xiehuan09@gmail.com>
> ---
> kernel/trace/trace.c | 2 +-
> kernel/trace/trace_entries.h | 5 +-
> kernel/trace/trace_object.c | 190 +++++++++++++++++++++++++++++------
> kernel/trace/trace_output.c | 6 +-
> 4 files changed, 169 insertions(+), 34 deletions(-)
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 0b627963e343..d5332ece4c67 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -5591,7 +5591,7 @@ static const char readme_msg[] =
> "\t disable_hist:<system>:<event>\n"
> #endif
> #ifdef CONFIG_TRACE_OBJECT
> - "\t objtrace:add:obj[:count][if <filter>]\n"
> + "\t objtrace:add:obj[,offset][:type][:count][if <filter>]\n"
> #endif
> #ifdef CONFIG_STACKTRACE
> "\t\t stacktrace\n"
> diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> index bb120d9498a9..2407c45a568c 100644
> --- a/kernel/trace/trace_entries.h
> +++ b/kernel/trace/trace_entries.h
> @@ -413,8 +413,9 @@ FTRACE_ENTRY(object, trace_object_entry,
> __field( unsigned long, ip )
> __field( unsigned long, parent_ip )
> __field( unsigned long, object )
> + __field( unsigned long, value )
> ),
>
> - F_printk(" %ps <-- %ps object:%lx\n",
> - (void *)__entry->ip, (void *)__entry->parent_ip, __entry->object)
> + F_printk(" %ps <-- %ps object:%lx value:%lx\n", (void *)__entry->ip,
> + (void *)__entry->parent_ip, __entry->object, __entry->value)
> );
> diff --git a/kernel/trace/trace_object.c b/kernel/trace/trace_object.c
> index 4af1c117cbfa..774a9dff4d0a 100644
> --- a/kernel/trace/trace_object.c
> +++ b/kernel/trace/trace_object.c
> @@ -11,7 +11,6 @@
> #define MAX_TRACED_OBJECT 5
> static DEFINE_RAW_SPINLOCK(trace_obj_lock);
> static struct trace_event_file event_trace_file;
> -static const int max_args_num = 6;
> static atomic_t trace_object_ref;
> static atomic_t num_traced_obj;
> static int exit_trace_object(void);
> @@ -19,8 +18,22 @@ static int init_trace_object(void);
>
> static struct object_instance {
> void *obj;
> + int obj_type_size;
> } traced_obj[MAX_TRACED_OBJECT];
>
> +/* objtrace private data */
> +struct objtrace_trigger_data {
> + struct ftrace_event_field *field;
> + int offset;
> + int obj_type_size;
> +};
> +
> +/* get the type size for the special object */
> +struct objtrace_fetch_type {
> + char *name;
> + int type_size;
> +};
> +
> static bool object_exist(void *obj)
> {
> int i, max;
> @@ -39,7 +52,7 @@ static bool object_empty(void)
> return !atomic_read(&num_traced_obj);
> }
>
> -static void set_trace_object(void *obj)
> +static void set_trace_object(void *obj, int obj_type_size)
> {
> unsigned long flags;
>
> @@ -59,6 +72,7 @@ static void set_trace_object(void *obj)
> goto out;
> }
> traced_obj[atomic_read(&num_traced_obj)].obj = obj;
> + traced_obj[atomic_read(&num_traced_obj)].obj_type_size = obj_type_size;
> /* make sure the num_traced_obj update always appears after traced_obj update */
> smp_wmb();
> atomic_inc(&num_traced_obj);
> @@ -67,7 +81,7 @@ static void set_trace_object(void *obj)
> }
>
> static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
> - unsigned long object)
> + unsigned long object, unsigned long value)
> {
>
> struct trace_buffer *buffer;
> @@ -84,18 +98,66 @@ static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
> entry->ip = ip;
> entry->parent_ip = parent_ip;
> entry->object = object;
> + entry->value = value;
>
> event_trigger_unlock_commit(&event_trace_file, buffer, event,
> entry, pc);
> }
>
> +static inline long get_object_value(unsigned long *val, void *obj, int type_size)
> +{
> + long ret = 0;
> +
> + switch (type_size) {
> + case 1: {
> + u8 tmp;
> +
> + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> + if (ret)
> + goto out;
> + *val = tmp;
> + break;
> + }
> + case 2: {
> + u16 tmp;
> +
> + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> + if (ret)
> + goto out;
> + *val = tmp;
> + break;
> + }
> + case 4: {
> + u32 tmp;
> +
> + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> + if (ret)
> + goto out;
> + *val = tmp;
> + break;
> + }
> + case 8: {
> + u64 tmp;
> +
> + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> + if (ret)
> + goto out;
> + *val = tmp;
> + break;
> + }
Can't you move the copy_from_kernel_nofault() outside of the switch-case?
e.g.
char tmp[sizeof(u64)];
ret = copy_from_kernel_nofault(tmp, obj, sizeof(tmp));
if (ret)
return ret;
switch (type_size) {
case 1:
*val = (unsigned long)*(u8 *)tmp;
break;
...
}
return 0;
Thank you,
> + default:
> + return -EINVAL;
> + }
> +out:
> + return ret;
> +}
> +
> static void
> trace_object_events_call(unsigned long ip, unsigned long parent_ip,
> struct ftrace_ops *op, struct ftrace_regs *fregs)
> {
> - struct pt_regs *pt_regs = ftrace_get_regs(fregs);
> - unsigned long obj;
> - int bit, n;
> + unsigned long val = 0;
> + int bit, n, max;
>
> bit = ftrace_test_recursion_trylock(ip, parent_ip);
> if (bit < 0)
> @@ -104,11 +166,12 @@ trace_object_events_call(unsigned long ip, unsigned long parent_ip,
> if (object_empty())
> goto out;
>
> - for (n = 0; n < max_args_num; n++) {
> - obj = regs_get_kernel_argument(pt_regs, n);
> - if (object_exist((void *)obj))
> - submit_trace_object(ip, parent_ip, obj);
This actually filtered the (function call) event which is related to the object.
> - /* The parameters of a function may match multiple objects */
> + max = atomic_read(&num_traced_obj);
> + smp_rmb();
> + for (n = 0; n < max; n++) {
> + if (get_object_value(&val, traced_obj[n].obj, traced_obj[n].obj_type_size))
> + goto out;
> + submit_trace_object(ip, parent_ip, (unsigned long)traced_obj[n].obj, val);
But you lost the filter here.
This is because you saves the "object + offset" address below;
> }
> out:
> ftrace_test_recursion_unlock(bit);
> @@ -124,12 +187,15 @@ trace_object_trigger(struct event_trigger_data *data,
> struct trace_buffer *buffer, void *rec,
> struct ring_buffer_event *event)
> {
> + struct objtrace_trigger_data *obj_data = data->private_data;
> + struct ftrace_event_field *field;
> + void *obj, *val = NULL;
>
> - struct ftrace_event_field *field = data->private_data;
> - void *obj = NULL;
> -
> - memcpy(&obj, rec + field->offset, sizeof(obj));
> - set_trace_object(obj);
> + field = obj_data->field;
> + memcpy(&val, rec + field->offset, sizeof(val));
> + /* get the final object */
> + obj = val + obj_data->offset;
Here, you saved the object address including offset. This means
you can not filter the actual function related to the object.
Please recover the filter.
> + set_trace_object(obj, obj_data->obj_type_size);
You can save the offset with type-size and object address itself.
Thank you,
> }
>
> static void
> @@ -140,8 +206,10 @@ trace_object_trigger_free(struct event_trigger_ops *ops,
> return;
>
> data->ref--;
> - if (!data->ref)
> + if (!data->ref) {
> + kfree(data->private_data);
> trigger_data_free(data);
> + }
> }
>
> static void
> @@ -276,6 +344,22 @@ static void unregister_object_trigger(char *glob, struct event_trigger_ops *ops,
> }
> }
>
> +static const struct objtrace_fetch_type objtrace_fetch_types[] = {
> + {"u8", 1},
> + {"s8", 1},
> + {"x8", 1},
> + {"u16", 2},
> + {"s16", 2},
> + {"x16", 2},
> + {"u32", 4},
> + {"s32", 4},
> + {"x32", 4},
> + {"u64", 8},
> + {"s64", 8},
> + {"x64", 8},
> + {}
> +};
> +
> static int
> event_object_trigger_callback(struct event_command *cmd_ops,
> struct trace_event_file *file,
> @@ -283,19 +367,22 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> {
> struct event_trigger_data *trigger_data;
> struct event_trigger_ops *trigger_ops;
> + struct objtrace_trigger_data *obj_data;
> struct trace_event_call *call;
> struct ftrace_event_field *field;
> - char *objtrace_cmd;
> - char *trigger = NULL;
> - char *arg;
> - char *number;
> - int ret;
> + char *type, *tr, *obj, *tmp, *trigger = NULL;
> + char *number, *objtrace_cmd;
> + int ret, i, def_type_size, obj_type_size = 0;
> + long offset = 0;
>
> ret = -EINVAL;
> if (!param)
> goto out;
>
> - /* separate the trigger from the filter (c:a:n [if filter]) */
> + /*
> + * separate the trigger from the filter:
> + * objtrace:add:OBJ[,OFFS][:TYPE][:COUNT] [if filter]
> + */
> trigger = strsep(¶m, " \t");
> if (!trigger)
> goto out;
> @@ -309,33 +396,79 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> if (!objtrace_cmd || strcmp(objtrace_cmd, "add"))
> goto out;
>
> - arg = strsep(&trigger, ":");
> - if (!arg)
> + obj = strsep(&trigger, ":");
> + if (!obj)
> goto out;
> +
> + tr = strchr(obj, ',');
> + if (!tr)
> + offset = 0;
> + else {
> + *tr++ = '\0';
> + ret = kstrtol(tr, 0, &offset);
> + if (ret)
> + goto out;
> + }
> +
> + ret = -EINVAL;
> call = file->event_call;
> - field = trace_find_event_field(call, arg);
> + field = trace_find_event_field(call, obj);
> if (!field)
> goto out;
>
> if (field->size != sizeof(void *))
> goto out;
> + def_type_size = sizeof(void *);
> + if (!trigger) {
> + obj_type_size = def_type_size;
> + goto skip_get_type;
> + }
>
> + tmp = trigger;
> + type = strsep(&trigger, ":");
> + if (!type)
> + obj_type_size = def_type_size;
> + else if (isdigit(type[0])) {
> + obj_type_size = def_type_size;
> + trigger = tmp;
> + } else {
> + for (i = 0; objtrace_fetch_types[i].name; i++) {
> + if (strcmp(objtrace_fetch_types[i].name, type) == 0) {
> + obj_type_size = objtrace_fetch_types[i].type_size;
> + break;
> + }
> + }
> + }
> + if (!obj_type_size)
> + goto out;
> +skip_get_type:
> trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
>
> ret = -ENOMEM;
> + obj_data = kzalloc(sizeof(*obj_data), GFP_KERNEL);
> + if (!obj_data)
> + goto out;
> +
> + obj_data->field = field;
> + obj_data->offset = offset;
> + obj_data->obj_type_size = obj_type_size;
> +
> trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
> - if (!trigger_data)
> + if (!trigger_data) {
> + kfree(obj_data);
> goto out;
> + }
>
> trigger_data->count = -1;
> trigger_data->ops = trigger_ops;
> trigger_data->cmd_ops = cmd_ops;
> - trigger_data->private_data = field;
> + trigger_data->private_data = obj_data;
> INIT_LIST_HEAD(&trigger_data->list);
> INIT_LIST_HEAD(&trigger_data->named_list);
>
> if (glob[0] == '!') {
> cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
> + kfree(obj_data);
> kfree(trigger_data);
> ret = 0;
> goto out;
> @@ -390,6 +523,7 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> out_free:
> if (cmd_ops->set_filter)
> cmd_ops->set_filter(NULL, trigger_data, NULL);
> + kfree(obj_data);
> kfree(trigger_data);
> goto out;
> }
> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> index d747aed27104..12a971927d8c 100644
> --- a/kernel/trace/trace_output.c
> +++ b/kernel/trace/trace_output.c
> @@ -1553,6 +1553,7 @@ static enum print_line_t trace_object_print(struct trace_iterator *iter, int fla
> trace_assign_type(field, iter->ent);
> print_fn_trace(s, field->ip, field->parent_ip, flags);
> trace_seq_printf(s, " object:0x%lx", field->object);
> + trace_seq_printf(s, " value:0x%lx", field->value);
> trace_seq_putc(s, '\n');
>
> return trace_handle_return(s);
> @@ -1565,9 +1566,8 @@ static enum print_line_t trace_object_raw(struct trace_iterator *iter, int flags
>
> trace_assign_type(field, iter->ent);
>
> - trace_seq_printf(&iter->seq, "%lx %lx\n",
> - field->ip,
> - field->parent_ip);
> + trace_seq_printf(&iter->seq, "%lx %lx %lx %lx\n", field->ip,
> + field->parent_ip, field->object, field->value);
>
> return trace_handle_return(&iter->seq);
> }
> --
> 2.25.1
>
--
Masami Hiramatsu <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v7 2/4] trace/objtrace: get the value of the object
2022-01-18 15:13 ` Masami Hiramatsu
@ 2022-01-19 2:28 ` Jeff Xie
0 siblings, 0 replies; 11+ messages in thread
From: Jeff Xie @ 2022-01-19 2:28 UTC (permalink / raw)
To: Masami Hiramatsu; +Cc: Steven Rostedt, mingo, Tom Zanussi, linux-kernel
Hi Masami,
On Tue, Jan 18, 2022 at 11:13 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> Hi Jeff,
>
> On Thu, 13 Jan 2022 09:38:33 +0800
> Jeff Xie <xiehuan09@gmail.com> wrote:
>
> > Using objtrace trigger to get the value of the object which from the kernel
> > function parameter.
> >
> > Syntax:
> > objtrace:add:obj[,offset][:type][:count][if <filter>]
> >
> > Usage:
> > # echo 'p bio_add_page arg1=$arg1' > ./kprobe_events
> > # gdb vmlinux
> > (gdb) p &(((struct bio *)0)->bi_iter.bi_size)
> > $1 = (unsigned int *) 0x28
> > # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/ \
> > p_bio_add_page_0/trigger
> > # cat /test.txt
> >
> > Signed-off-by: Jeff Xie <xiehuan09@gmail.com>
> > ---
> > kernel/trace/trace.c | 2 +-
> > kernel/trace/trace_entries.h | 5 +-
> > kernel/trace/trace_object.c | 190 +++++++++++++++++++++++++++++------
> > kernel/trace/trace_output.c | 6 +-
> > 4 files changed, 169 insertions(+), 34 deletions(-)
> >
> > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> > index 0b627963e343..d5332ece4c67 100644
> > --- a/kernel/trace/trace.c
> > +++ b/kernel/trace/trace.c
> > @@ -5591,7 +5591,7 @@ static const char readme_msg[] =
> > "\t disable_hist:<system>:<event>\n"
> > #endif
> > #ifdef CONFIG_TRACE_OBJECT
> > - "\t objtrace:add:obj[:count][if <filter>]\n"
> > + "\t objtrace:add:obj[,offset][:type][:count][if <filter>]\n"
> > #endif
> > #ifdef CONFIG_STACKTRACE
> > "\t\t stacktrace\n"
> > diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
> > index bb120d9498a9..2407c45a568c 100644
> > --- a/kernel/trace/trace_entries.h
> > +++ b/kernel/trace/trace_entries.h
> > @@ -413,8 +413,9 @@ FTRACE_ENTRY(object, trace_object_entry,
> > __field( unsigned long, ip )
> > __field( unsigned long, parent_ip )
> > __field( unsigned long, object )
> > + __field( unsigned long, value )
> > ),
> >
> > - F_printk(" %ps <-- %ps object:%lx\n",
> > - (void *)__entry->ip, (void *)__entry->parent_ip, __entry->object)
> > + F_printk(" %ps <-- %ps object:%lx value:%lx\n", (void *)__entry->ip,
> > + (void *)__entry->parent_ip, __entry->object, __entry->value)
> > );
> > diff --git a/kernel/trace/trace_object.c b/kernel/trace/trace_object.c
> > index 4af1c117cbfa..774a9dff4d0a 100644
> > --- a/kernel/trace/trace_object.c
> > +++ b/kernel/trace/trace_object.c
> > @@ -11,7 +11,6 @@
> > #define MAX_TRACED_OBJECT 5
> > static DEFINE_RAW_SPINLOCK(trace_obj_lock);
> > static struct trace_event_file event_trace_file;
> > -static const int max_args_num = 6;
> > static atomic_t trace_object_ref;
> > static atomic_t num_traced_obj;
> > static int exit_trace_object(void);
> > @@ -19,8 +18,22 @@ static int init_trace_object(void);
> >
> > static struct object_instance {
> > void *obj;
> > + int obj_type_size;
> > } traced_obj[MAX_TRACED_OBJECT];
> >
> > +/* objtrace private data */
> > +struct objtrace_trigger_data {
> > + struct ftrace_event_field *field;
> > + int offset;
> > + int obj_type_size;
> > +};
> > +
> > +/* get the type size for the special object */
> > +struct objtrace_fetch_type {
> > + char *name;
> > + int type_size;
> > +};
> > +
> > static bool object_exist(void *obj)
> > {
> > int i, max;
> > @@ -39,7 +52,7 @@ static bool object_empty(void)
> > return !atomic_read(&num_traced_obj);
> > }
> >
> > -static void set_trace_object(void *obj)
> > +static void set_trace_object(void *obj, int obj_type_size)
> > {
> > unsigned long flags;
> >
> > @@ -59,6 +72,7 @@ static void set_trace_object(void *obj)
> > goto out;
> > }
> > traced_obj[atomic_read(&num_traced_obj)].obj = obj;
> > + traced_obj[atomic_read(&num_traced_obj)].obj_type_size = obj_type_size;
> > /* make sure the num_traced_obj update always appears after traced_obj update */
> > smp_wmb();
> > atomic_inc(&num_traced_obj);
> > @@ -67,7 +81,7 @@ static void set_trace_object(void *obj)
> > }
> >
> > static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
> > - unsigned long object)
> > + unsigned long object, unsigned long value)
> > {
> >
> > struct trace_buffer *buffer;
> > @@ -84,18 +98,66 @@ static void submit_trace_object(unsigned long ip, unsigned long parent_ip,
> > entry->ip = ip;
> > entry->parent_ip = parent_ip;
> > entry->object = object;
> > + entry->value = value;
> >
> > event_trigger_unlock_commit(&event_trace_file, buffer, event,
> > entry, pc);
> > }
> >
> > +static inline long get_object_value(unsigned long *val, void *obj, int type_size)
> > +{
> > + long ret = 0;
> > +
> > + switch (type_size) {
> > + case 1: {
> > + u8 tmp;
> > +
> > + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> > + if (ret)
> > + goto out;
> > + *val = tmp;
> > + break;
> > + }
> > + case 2: {
> > + u16 tmp;
> > +
> > + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> > + if (ret)
> > + goto out;
> > + *val = tmp;
> > + break;
> > + }
> > + case 4: {
> > + u32 tmp;
> > +
> > + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> > + if (ret)
> > + goto out;
> > + *val = tmp;
> > + break;
> > + }
> > + case 8: {
> > + u64 tmp;
> > +
> > + ret = copy_from_kernel_nofault(&tmp, obj, sizeof(tmp));
> > + if (ret)
> > + goto out;
> > + *val = tmp;
> > + break;
> > + }
>
> Can't you move the copy_from_kernel_nofault() outside of the switch-case?
> e.g.
>
> char tmp[sizeof(u64)];
>
> ret = copy_from_kernel_nofault(tmp, obj, sizeof(tmp));
> if (ret)
> return ret;
>
> switch (type_size) {
> case 1:
> *val = (unsigned long)*(u8 *)tmp;
> break;
> ...
> }
> return 0;
Thanks, I will change it.
> Thank you,
>
>
> > + default:
> > + return -EINVAL;
> > + }
> > +out:
> > + return ret;
> > +}
> > +
> > static void
> > trace_object_events_call(unsigned long ip, unsigned long parent_ip,
> > struct ftrace_ops *op, struct ftrace_regs *fregs)
> > {
> > - struct pt_regs *pt_regs = ftrace_get_regs(fregs);
> > - unsigned long obj;
> > - int bit, n;
> > + unsigned long val = 0;
> > + int bit, n, max;
> >
> > bit = ftrace_test_recursion_trylock(ip, parent_ip);
> > if (bit < 0)
> > @@ -104,11 +166,12 @@ trace_object_events_call(unsigned long ip, unsigned long parent_ip,
> > if (object_empty())
> > goto out;
> >
> > - for (n = 0; n < max_args_num; n++) {
> > - obj = regs_get_kernel_argument(pt_regs, n);
> > - if (object_exist((void *)obj))
> > - submit_trace_object(ip, parent_ip, obj);
>
> This actually filtered the (function call) event which is related to the object.
>
> > - /* The parameters of a function may match multiple objects */
> > + max = atomic_read(&num_traced_obj);
> > + smp_rmb();
> > + for (n = 0; n < max; n++) {
> > + if (get_object_value(&val, traced_obj[n].obj, traced_obj[n].obj_type_size))
> > + goto out;
> > + submit_trace_object(ip, parent_ip, (unsigned long)traced_obj[n].obj, val);
>
> But you lost the filter here.
> This is because you saves the "object + offset" address below;
I misunderstood the previous discussion and thought that filter
do not apply when getting the value of the object. ;-)
I will add the filter.
>
> > }
> > out:
> > ftrace_test_recursion_unlock(bit);
> > @@ -124,12 +187,15 @@ trace_object_trigger(struct event_trigger_data *data,
> > struct trace_buffer *buffer, void *rec,
> > struct ring_buffer_event *event)
> > {
> > + struct objtrace_trigger_data *obj_data = data->private_data;
> > + struct ftrace_event_field *field;
> > + void *obj, *val = NULL;
> >
> > - struct ftrace_event_field *field = data->private_data;
> > - void *obj = NULL;
> > -
> > - memcpy(&obj, rec + field->offset, sizeof(obj));
> > - set_trace_object(obj);
> > + field = obj_data->field;
> > + memcpy(&val, rec + field->offset, sizeof(val));
> > + /* get the final object */
> > + obj = val + obj_data->offset;
>
> Here, you saved the object address including offset. This means
> you can not filter the actual function related to the object.
>
> Please recover the filter.
>
> > + set_trace_object(obj, obj_data->obj_type_size);
>
> You can save the offset with type-size and object address itself.
Thanks, I will recover it.
> Thank you,
>
> > }
> >
> > static void
> > @@ -140,8 +206,10 @@ trace_object_trigger_free(struct event_trigger_ops *ops,
> > return;
> >
> > data->ref--;
> > - if (!data->ref)
> > + if (!data->ref) {
> > + kfree(data->private_data);
> > trigger_data_free(data);
> > + }
> > }
> >
> > static void
> > @@ -276,6 +344,22 @@ static void unregister_object_trigger(char *glob, struct event_trigger_ops *ops,
> > }
> > }
> >
> > +static const struct objtrace_fetch_type objtrace_fetch_types[] = {
> > + {"u8", 1},
> > + {"s8", 1},
> > + {"x8", 1},
> > + {"u16", 2},
> > + {"s16", 2},
> > + {"x16", 2},
> > + {"u32", 4},
> > + {"s32", 4},
> > + {"x32", 4},
> > + {"u64", 8},
> > + {"s64", 8},
> > + {"x64", 8},
> > + {}
> > +};
> > +
> > static int
> > event_object_trigger_callback(struct event_command *cmd_ops,
> > struct trace_event_file *file,
> > @@ -283,19 +367,22 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> > {
> > struct event_trigger_data *trigger_data;
> > struct event_trigger_ops *trigger_ops;
> > + struct objtrace_trigger_data *obj_data;
> > struct trace_event_call *call;
> > struct ftrace_event_field *field;
> > - char *objtrace_cmd;
> > - char *trigger = NULL;
> > - char *arg;
> > - char *number;
> > - int ret;
> > + char *type, *tr, *obj, *tmp, *trigger = NULL;
> > + char *number, *objtrace_cmd;
> > + int ret, i, def_type_size, obj_type_size = 0;
> > + long offset = 0;
> >
> > ret = -EINVAL;
> > if (!param)
> > goto out;
> >
> > - /* separate the trigger from the filter (c:a:n [if filter]) */
> > + /*
> > + * separate the trigger from the filter:
> > + * objtrace:add:OBJ[,OFFS][:TYPE][:COUNT] [if filter]
> > + */
> > trigger = strsep(¶m, " \t");
> > if (!trigger)
> > goto out;
> > @@ -309,33 +396,79 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> > if (!objtrace_cmd || strcmp(objtrace_cmd, "add"))
> > goto out;
> >
> > - arg = strsep(&trigger, ":");
> > - if (!arg)
> > + obj = strsep(&trigger, ":");
> > + if (!obj)
> > goto out;
> > +
> > + tr = strchr(obj, ',');
> > + if (!tr)
> > + offset = 0;
> > + else {
> > + *tr++ = '\0';
> > + ret = kstrtol(tr, 0, &offset);
> > + if (ret)
> > + goto out;
> > + }
> > +
> > + ret = -EINVAL;
> > call = file->event_call;
> > - field = trace_find_event_field(call, arg);
> > + field = trace_find_event_field(call, obj);
> > if (!field)
> > goto out;
> >
> > if (field->size != sizeof(void *))
> > goto out;
> > + def_type_size = sizeof(void *);
> > + if (!trigger) {
> > + obj_type_size = def_type_size;
> > + goto skip_get_type;
> > + }
> >
> > + tmp = trigger;
> > + type = strsep(&trigger, ":");
> > + if (!type)
> > + obj_type_size = def_type_size;
> > + else if (isdigit(type[0])) {
> > + obj_type_size = def_type_size;
> > + trigger = tmp;
> > + } else {
> > + for (i = 0; objtrace_fetch_types[i].name; i++) {
> > + if (strcmp(objtrace_fetch_types[i].name, type) == 0) {
> > + obj_type_size = objtrace_fetch_types[i].type_size;
> > + break;
> > + }
> > + }
> > + }
> > + if (!obj_type_size)
> > + goto out;
> > +skip_get_type:
> > trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
> >
> > ret = -ENOMEM;
> > + obj_data = kzalloc(sizeof(*obj_data), GFP_KERNEL);
> > + if (!obj_data)
> > + goto out;
> > +
> > + obj_data->field = field;
> > + obj_data->offset = offset;
> > + obj_data->obj_type_size = obj_type_size;
> > +
> > trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
> > - if (!trigger_data)
> > + if (!trigger_data) {
> > + kfree(obj_data);
> > goto out;
> > + }
> >
> > trigger_data->count = -1;
> > trigger_data->ops = trigger_ops;
> > trigger_data->cmd_ops = cmd_ops;
> > - trigger_data->private_data = field;
> > + trigger_data->private_data = obj_data;
> > INIT_LIST_HEAD(&trigger_data->list);
> > INIT_LIST_HEAD(&trigger_data->named_list);
> >
> > if (glob[0] == '!') {
> > cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
> > + kfree(obj_data);
> > kfree(trigger_data);
> > ret = 0;
> > goto out;
> > @@ -390,6 +523,7 @@ event_object_trigger_callback(struct event_command *cmd_ops,
> > out_free:
> > if (cmd_ops->set_filter)
> > cmd_ops->set_filter(NULL, trigger_data, NULL);
> > + kfree(obj_data);
> > kfree(trigger_data);
> > goto out;
> > }
> > diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> > index d747aed27104..12a971927d8c 100644
> > --- a/kernel/trace/trace_output.c
> > +++ b/kernel/trace/trace_output.c
> > @@ -1553,6 +1553,7 @@ static enum print_line_t trace_object_print(struct trace_iterator *iter, int fla
> > trace_assign_type(field, iter->ent);
> > print_fn_trace(s, field->ip, field->parent_ip, flags);
> > trace_seq_printf(s, " object:0x%lx", field->object);
> > + trace_seq_printf(s, " value:0x%lx", field->value);
> > trace_seq_putc(s, '\n');
> >
> > return trace_handle_return(s);
> > @@ -1565,9 +1566,8 @@ static enum print_line_t trace_object_raw(struct trace_iterator *iter, int flags
> >
> > trace_assign_type(field, iter->ent);
> >
> > - trace_seq_printf(&iter->seq, "%lx %lx\n",
> > - field->ip,
> > - field->parent_ip);
> > + trace_seq_printf(&iter->seq, "%lx %lx %lx %lx\n", field->ip,
> > + field->parent_ip, field->object, field->value);
> >
> > return trace_handle_return(&iter->seq);
> > }
> > --
> > 2.25.1
> >
>
>
> --
> Masami Hiramatsu <mhiramat@kernel.org>
---
JeffXie
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v7 3/4] trace/objtrace: Add testcases for objtrace
2022-01-13 1:38 [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object Jeff Xie
2022-01-13 1:38 ` [PATCH v7 1/4] trace: Add trace any " Jeff Xie
2022-01-13 1:38 ` [PATCH v7 2/4] trace/objtrace: get the value of the object Jeff Xie
@ 2022-01-13 1:38 ` Jeff Xie
2022-01-13 1:38 ` [PATCH v7 4/4] trace/objtrace: Add documentation " Jeff Xie
2022-01-18 14:24 ` [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object Masami Hiramatsu
4 siblings, 0 replies; 11+ messages in thread
From: Jeff Xie @ 2022-01-13 1:38 UTC (permalink / raw)
To: rostedt; +Cc: mhiramat, mingo, zanussi, linux-kernel, Jeff Xie
Add a series of testcases to illustrate correct and incorrect usage of
objtrace trigger.
Signed-off-by: Jeff Xie <xiehuan09@gmail.com>
---
.../ftrace/test.d/trigger/trigger-objtrace.tc | 39 +++++++++++++++++++
1 file changed, 39 insertions(+)
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-objtrace.tc
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-objtrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-objtrace.tc
new file mode 100644
index 000000000000..d71eb157c51f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-objtrace.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test objtrace-trigger
+# requires: kprobe_events "objtrace":README
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo 'p bio_add_page arg1=$arg1' > kprobe_events
+
+FEATURE=`grep objtrace events/kprobes/p_bio_add_page_0/trigger`
+if [ -z "$FEATURE" ]; then
+ echo "objtrace trigger is not supported"
+ exit_unsupported
+fi
+
+echo "Test objtrace trigger"
+echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > \
+ events/kprobes/p_bio_add_page_0/trigger
+if [ -z $? ]; then
+ fail "objtrace trigger syntax error"
+fi
+
+echo "Test objtrace semantic errors"
+
+# Being lack of objtrace command
+! echo 'objtrace:arg1,0x28:u32:1' > events/kprobes/p_bio_add_page_0/trigger
+# Bad parameter name
+! echo 'objtrace:add:argx:u32:1' > events/kprobes/p_bio_add_page_0/trigger
+
+echo "reset objtrace trigger"
+
+echo '!objtrace:add:arg1,0x28:u32' > \
+ events/kprobes/p_bio_add_page_0/trigger
+echo '-:p_bio_add_page_0' >> ./kprobe_events
+
+exit 0
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v7 4/4] trace/objtrace: Add documentation for objtrace
2022-01-13 1:38 [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object Jeff Xie
` (2 preceding siblings ...)
2022-01-13 1:38 ` [PATCH v7 3/4] trace/objtrace: Add testcases for objtrace Jeff Xie
@ 2022-01-13 1:38 ` Jeff Xie
2022-01-18 14:24 ` [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object Masami Hiramatsu
4 siblings, 0 replies; 11+ messages in thread
From: Jeff Xie @ 2022-01-13 1:38 UTC (permalink / raw)
To: rostedt; +Cc: mhiramat, mingo, zanussi, linux-kernel, Jeff Xie
Added documentation explaining how to use objtrace trigger to get the value
of the object.
Signed-off-by: Jeff Xie <xiehuan09@gmail.com>
---
Documentation/trace/events.rst | 114 +++++++++++++++++++++++++++++++++
1 file changed, 114 insertions(+)
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 8ddb9b09451c..5e5d5dc31ad2 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -527,6 +527,120 @@ The following commands are supported:
See Documentation/trace/histogram.rst for details and examples.
+- objtrace
+
+ This command provides a way to get the value of any object, The object
+ can be obtained from the dynamic event(kprobe_event/uprobe_event) or the
+ static event(tracepoint).
+
+ Usage:
+ When using the kprobe event, only need to set the objtrace(a new trigger),
+ we can get the value of the object. The object is from the setting of the
+ kprobe event.
+
+ For example:
+ For the function bio_add_page():
+
+ int bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset)
+
+ Firstly, we can set the base of the object, thus the first string "arg1"
+ stands for the value of the first parameter of this function bio_add_gage(),
+
+ # echo 'p bio_add_page arg1=$arg1' >> ./kprobe_events
+
+ Secondly, we can get the value dynamically base the above object.
+
+ find the offset of the bi_size in struct bio:
+ $ gdb vmlinux
+ (gdb) p &(((struct bio *)0)->bi_iter.bi_size)
+ $1 = (unsigned int *) 0x28
+
+ # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/ \
+ p_bio_add_page_0/trigger
+
+ The best way to use this is that we can set the entrance event and exit
+ event, for example, the following example is to set the read_papes as
+ the entrance event, and set the __blk_account_io_start as the exit event.
+
+ # cd /sys/kernel/debug/tracing/
+ # echo 0 > ./tracing_on
+ # echo 'p read_pages' >> ./kprobe_events
+ # echo 'p __blk_account_io_start' >> ./kprobe_events
+ # echo 'traceon if comm == "cat"' > ./events/kprobes/p_read_pages_0/trigger
+ # echo 'traceoff if comm == "cat"' > ./events/kprobes/p___blk_account_io_start_0/trigger
+ # echo 'p bio_add_page arg1=$arg1' >> ./kprobe_events
+ # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/p_bio_add_page_0/trigger
+
+ # du -sh /test.txt
+ 12.0K /test.txt
+
+ # cat /test.txt > /dev/null
+ # cat ./trace
+
+ # tracer: nop
+ #
+ # entries-in-buffer/entries-written: 50/50 #P:1
+ #
+ # _-----=> irqs-off
+ # / _----=> need-resched
+ # | / _---=> hardirq/softirq
+ # || / _--=> preempt-depth
+ # ||| / _-=> migrate-disable
+ # |||| / delay
+ # TASK-PID CPU# ||||| TIMESTAMP FUNCTION
+ # | | | ||||| | |
+ cat-95 [000] ..... 1.412065: _raw_spin_unlock_irqrestore <-event_triggers_call object:0xffff888108af6328 value:0x0
+ cat-95 [000] ..... 1.412066: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x0
+ cat-95 [000] ..... 1.412066: __bio_add_page <-bio_add_page object:0xffff888108af6328 value:0x0
+ cat-95 [000] ..... 1.412066: rcu_read_unlock_strict <-xa_load object:0xffff888108af6328 value:0x1000
+ cat-95 [000] ..... 1.412066: bio_add_page <-ext4_mpage_readpages object:0xffff888108af6328 value:0x1000
+ cat-95 [000] ..... 1.412066: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x1000
+ cat-95 [000] ..... 1.412067: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x1000
+ cat-95 [000] ..... 1.412067: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x1000
+ cat-95 [000] ..... 1.412067: __bio_add_page <-bio_add_page object:0xffff888108af6328 value:0x1000
+ cat-95 [000] ..... 1.412067: rcu_read_unlock_strict <-xa_load object:0xffff888108af6328 value:0x2000
+ cat-95 [000] ..... 1.412067: bio_add_page <-ext4_mpage_readpages object:0xffff888108af6328 value:0x2000
+ cat-95 [000] ..... 1.412067: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x2000
+ cat-95 [000] ..... 1.412067: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x2000
+ cat-95 [000] ..... 1.412067: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x2000
+ cat-95 [000] ..... 1.412068: submit_bio <-ext4_mpage_readpages object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: submit_bio_noacct <-ext4_mpage_readpages object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: __submit_bio <-submit_bio_noacct object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: blk_try_enter_queue <-__submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: rcu_read_unlock_strict <-blk_try_enter_queue object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: rcu_read_unlock_strict <-blk_try_enter_queue object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: submit_bio_checks <-__submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: __cond_resched <-submit_bio_checks object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: rcu_all_qs <-__cond_resched object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412068: should_fail_bio <-submit_bio_checks object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412069: create_task_io_context <-submit_bio_checks object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412069: kmem_cache_alloc_node <-create_task_io_context object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412069: should_failslab <-kmem_cache_alloc_node object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412069: _raw_spin_lock <-create_task_io_context object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412069: blk_mq_submit_bio <-__submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412069: __blk_queue_split <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412069: bvec_split_segs <-__blk_queue_split object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412069: blk_attempt_plug_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: __blk_mq_sched_bio_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: dd_bio_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: _raw_spin_lock <-dd_bio_merge object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: blk_mq_sched_try_merge <-dd_bio_merge object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: elv_merge <-blk_mq_sched_try_merge object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: elv_rqhash_find <-elv_merge object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: dd_request_merge <-blk_mq_sched_try_merge object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: elv_rb_find <-dd_request_merge object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412070: __blk_mq_alloc_request <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: dd_limit_depth <-__blk_mq_alloc_request object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: blk_mq_get_tag <-__blk_mq_alloc_request object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: __blk_mq_get_tag <-blk_mq_get_tag object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: blk_mq_rq_ctx_init.isra.0 <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: ktime_get <-blk_mq_rq_ctx_init.isra.0 object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: dd_prepare_request <-blk_mq_rq_ctx_init.isra.0 object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: __blk_account_io_start <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x3000
+ cat-95 [000] ..... 1.412071: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x3000
+
7. In-kernel trace event API
============================
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object
2022-01-13 1:38 [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object Jeff Xie
` (3 preceding siblings ...)
2022-01-13 1:38 ` [PATCH v7 4/4] trace/objtrace: Add documentation " Jeff Xie
@ 2022-01-18 14:24 ` Masami Hiramatsu
2022-01-19 2:32 ` Jeff Xie
4 siblings, 1 reply; 11+ messages in thread
From: Masami Hiramatsu @ 2022-01-18 14:24 UTC (permalink / raw)
To: Jeff Xie; +Cc: rostedt, mhiramat, mingo, zanussi, linux-kernel
On Thu, 13 Jan 2022 09:38:31 +0800
Jeff Xie <xiehuan09@gmail.com> wrote:
> Introduce a method based on function tracer to trace any object and get
> the value of the object dynamically. the object can be obtained from the
> dynamic event (kprobe_event/uprobe_event) or the static event(tracepoint).
>
> Usage:
> When using the kprobe event, only need to set the objtrace(a new trigger),
> we can get the value of the object. The object is from the setting of the
> kprobe event.
>
> For example:
> For the function bio_add_page():
>
> int bio_add_page(struct bio *bio, struct page *page,
> unsigned int len, unsigned int offset)
>
> Firstly, we can set the base of the object, thus the first string "arg1"
> stands for the value of the first parameter of this function bio_add_gage(),
>
> # echo 'p bio_add_page arg1=$arg1' >> ./kprobe_events
>
> Secondly, we can get the value dynamically base the above object.
>
> find the offset of the bi_size in struct bio:
> $ gdb vmlinux
> (gdb) p &(((struct bio *)0)->bi_iter.bi_size)
> $1 = (unsigned int *) 0x28
>
> # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/ \
> p_bio_add_page_0/trigger
>
> The best way to use this is that we can set the entrance event and exit
> event, for example, the following example is to set the read_papes as
> the entrance event, and set the __blk_account_io_start as the exit event.
>
> # cd /sys/kernel/debug/tracing/
> # echo 0 > ./tracing_on
> # echo 'p read_pages' >> ./kprobe_events
> # echo 'p __blk_account_io_start' >> ./kprobe_events
> # echo 'traceon if comm == "cat"' > ./events/kprobes/p_read_pages_0/trigger
> # echo 'traceoff if comm == "cat"' > ./events/kprobes/p___blk_account_io_start_0/trigger
> # echo 'p bio_add_page arg1=$arg1' >> ./kprobe_events
> # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/p_bio_add_page_0/trigger
>
> # du -sh /test.txt
> 12.0K /test.txt
>
> # cat /test.txt > /dev/null
> # cat ./trace
>
> # tracer: nop
> #
> # entries-in-buffer/entries-written: 50/50 #P:1
> #
> # _-----=> irqs-off
> # / _----=> need-resched
> # | / _---=> hardirq/softirq
> # || / _--=> preempt-depth
> # ||| / _-=> migrate-disable
> # |||| / delay
> # TASK-PID CPU# ||||| TIMESTAMP FUNCTION
> # | | | ||||| | |
> cat-95 [000] ..... 1.412065: _raw_spin_unlock_irqrestore <-event_triggers_call object:0xffff888108af6328 value:0x0
> cat-95 [000] ..... 1.412066: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x0
> cat-95 [000] ..... 1.412066: __bio_add_page <-bio_add_page object:0xffff888108af6328 value:0x0
> cat-95 [000] ..... 1.412066: rcu_read_unlock_strict <-xa_load object:0xffff888108af6328 value:0x1000
> cat-95 [000] ..... 1.412066: bio_add_page <-ext4_mpage_readpages object:0xffff888108af6328 value:0x1000
> cat-95 [000] ..... 1.412066: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x1000
> cat-95 [000] ..... 1.412067: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x1000
> cat-95 [000] ..... 1.412067: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x1000
> cat-95 [000] ..... 1.412067: __bio_add_page <-bio_add_page object:0xffff888108af6328 value:0x1000
> cat-95 [000] ..... 1.412067: rcu_read_unlock_strict <-xa_load object:0xffff888108af6328 value:0x2000
> cat-95 [000] ..... 1.412067: bio_add_page <-ext4_mpage_readpages object:0xffff888108af6328 value:0x2000
> cat-95 [000] ..... 1.412067: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x2000
> cat-95 [000] ..... 1.412067: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x2000
> cat-95 [000] ..... 1.412067: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x2000
> cat-95 [000] ..... 1.412068: submit_bio <-ext4_mpage_readpages object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: submit_bio_noacct <-ext4_mpage_readpages object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: __submit_bio <-submit_bio_noacct object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: blk_try_enter_queue <-__submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: rcu_read_unlock_strict <-blk_try_enter_queue object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: rcu_read_unlock_strict <-blk_try_enter_queue object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: submit_bio_checks <-__submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: __cond_resched <-submit_bio_checks object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: rcu_all_qs <-__cond_resched object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412068: should_fail_bio <-submit_bio_checks object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412069: create_task_io_context <-submit_bio_checks object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412069: kmem_cache_alloc_node <-create_task_io_context object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412069: should_failslab <-kmem_cache_alloc_node object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412069: _raw_spin_lock <-create_task_io_context object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412069: blk_mq_submit_bio <-__submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412069: __blk_queue_split <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412069: bvec_split_segs <-__blk_queue_split object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412069: blk_attempt_plug_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: __blk_mq_sched_bio_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: dd_bio_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: _raw_spin_lock <-dd_bio_merge object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: blk_mq_sched_try_merge <-dd_bio_merge object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: elv_merge <-blk_mq_sched_try_merge object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: elv_rqhash_find <-elv_merge object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: dd_request_merge <-blk_mq_sched_try_merge object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: elv_rb_find <-dd_request_merge object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412070: __blk_mq_alloc_request <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: dd_limit_depth <-__blk_mq_alloc_request object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: blk_mq_get_tag <-__blk_mq_alloc_request object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: __blk_mq_get_tag <-blk_mq_get_tag object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: blk_mq_rq_ctx_init.isra.0 <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: ktime_get <-blk_mq_rq_ctx_init.isra.0 object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: dd_prepare_request <-blk_mq_rq_ctx_init.isra.0 object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: __blk_account_io_start <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x3000
> cat-95 [000] ..... 1.412071: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x3000
>
> Almost all changelogs were suggested by Masami(mhiramat@kernel.org)
> and steve(rostedt@goodmis.org), thank you all so much.
>
> v7:
> - use fixed-size array for object pool instead of list structure
> - use ftrace_test_recursion_trylock for function trace hook function
> - fix trace_object_ref reference count in the init_trace_object
> - invoke exit_trace_object no matter whether data->ops->free is null
> in the unregister_object_trigger
> - release private_data of event_trigger_data in the trace_object_trigger_free
> - remove [RFC] tag
>
> Note: when change to use the ftrace_test_recursion_trylock, all the functions
> will call the copy_from_kernel_nofault, I don't know where this is the problem now,
> maybe should fall back to the usage in v6.
>
> for example:
>
> cat-118 [000] ...1. 1.458998: __bio_add_page <-bio_add_page object:0xffff88811a12e9e8 value:0x0
> cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> cat-118 [000] ...1. 1.458998: __rcu_read_lock <-xa_load object:0xffff88811a12e9e8 value:0x1000
> cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> cat-118 [000] ...1. 1.458998: __rcu_read_unlock <-xa_load object:0xffff88811a12e9e8 value:0x1000
> cat-118 [000] ...3. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> cat-118 [000] ...3. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> ....
Hmm, this is strange, but I got it is the expected behavior, since the
ftrace_test_recursion_trylock() accepts one stage recursion for the
first event in the interrupt as transition event.
Steve, any good way to limit probing this transition events?
BTW, I tried your series on the
git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git ftrace/core
and got below build errors.
linux/kernel/trace/trace_object.c:266:3: error: ‘struct event_trigger_ops’ has no member named ‘func’
266 | .func = trace_object_trigger,
| ^~~~
linux/kernel/trace/trace_object.c:273:3: error: ‘struct event_trigger_ops’ has no member named ‘func’
273 | .func = trace_object_count_trigger,
| ^~~~
linux/kernel/trace/trace_object.c:535:3: error: ‘struct event_command’ has no member named ‘func’
535 | .func = event_object_trigger_callback,
| ^~~~
CC net/ipv6/sysctl_net_ipv6.o
This is because commit 7d28e1e7d4fa ("tracing: Change event_trigger_ops func() to trigger()")
and commit 4dfe5dff80a4 ("tracing: Change event_command func() to parse()") changed the field
names. Please update it.
Thank you,
--
Masami Hiramatsu <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object
2022-01-18 14:24 ` [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object Masami Hiramatsu
@ 2022-01-19 2:32 ` Jeff Xie
2022-01-26 15:54 ` Masami Hiramatsu
0 siblings, 1 reply; 11+ messages in thread
From: Jeff Xie @ 2022-01-19 2:32 UTC (permalink / raw)
To: Masami Hiramatsu; +Cc: Steven Rostedt, mingo, Tom Zanussi, linux-kernel
Hi Masami,
On Tue, Jan 18, 2022 at 10:24 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Thu, 13 Jan 2022 09:38:31 +0800
> Jeff Xie <xiehuan09@gmail.com> wrote:
>
> > Introduce a method based on function tracer to trace any object and get
> > the value of the object dynamically. the object can be obtained from the
> > dynamic event (kprobe_event/uprobe_event) or the static event(tracepoint).
> >
> > Usage:
> > When using the kprobe event, only need to set the objtrace(a new trigger),
> > we can get the value of the object. The object is from the setting of the
> > kprobe event.
> >
> > For example:
> > For the function bio_add_page():
> >
> > int bio_add_page(struct bio *bio, struct page *page,
> > unsigned int len, unsigned int offset)
> >
> > Firstly, we can set the base of the object, thus the first string "arg1"
> > stands for the value of the first parameter of this function bio_add_gage(),
> >
> > # echo 'p bio_add_page arg1=$arg1' >> ./kprobe_events
> >
> > Secondly, we can get the value dynamically base the above object.
> >
> > find the offset of the bi_size in struct bio:
> > $ gdb vmlinux
> > (gdb) p &(((struct bio *)0)->bi_iter.bi_size)
> > $1 = (unsigned int *) 0x28
> >
> > # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/ \
> > p_bio_add_page_0/trigger
> >
> > The best way to use this is that we can set the entrance event and exit
> > event, for example, the following example is to set the read_papes as
> > the entrance event, and set the __blk_account_io_start as the exit event.
> >
> > # cd /sys/kernel/debug/tracing/
> > # echo 0 > ./tracing_on
> > # echo 'p read_pages' >> ./kprobe_events
> > # echo 'p __blk_account_io_start' >> ./kprobe_events
> > # echo 'traceon if comm == "cat"' > ./events/kprobes/p_read_pages_0/trigger
> > # echo 'traceoff if comm == "cat"' > ./events/kprobes/p___blk_account_io_start_0/trigger
> > # echo 'p bio_add_page arg1=$arg1' >> ./kprobe_events
> > # echo 'objtrace:add:arg1,0x28:u32:1 if comm == "cat"' > ./events/kprobes/p_bio_add_page_0/trigger
> >
> > # du -sh /test.txt
> > 12.0K /test.txt
> >
> > # cat /test.txt > /dev/null
> > # cat ./trace
> >
> > # tracer: nop
> > #
> > # entries-in-buffer/entries-written: 50/50 #P:1
> > #
> > # _-----=> irqs-off
> > # / _----=> need-resched
> > # | / _---=> hardirq/softirq
> > # || / _--=> preempt-depth
> > # ||| / _-=> migrate-disable
> > # |||| / delay
> > # TASK-PID CPU# ||||| TIMESTAMP FUNCTION
> > # | | | ||||| | |
> > cat-95 [000] ..... 1.412065: _raw_spin_unlock_irqrestore <-event_triggers_call object:0xffff888108af6328 value:0x0
> > cat-95 [000] ..... 1.412066: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x0
> > cat-95 [000] ..... 1.412066: __bio_add_page <-bio_add_page object:0xffff888108af6328 value:0x0
> > cat-95 [000] ..... 1.412066: rcu_read_unlock_strict <-xa_load object:0xffff888108af6328 value:0x1000
> > cat-95 [000] ..... 1.412066: bio_add_page <-ext4_mpage_readpages object:0xffff888108af6328 value:0x1000
> > cat-95 [000] ..... 1.412066: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x1000
> > cat-95 [000] ..... 1.412067: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x1000
> > cat-95 [000] ..... 1.412067: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x1000
> > cat-95 [000] ..... 1.412067: __bio_add_page <-bio_add_page object:0xffff888108af6328 value:0x1000
> > cat-95 [000] ..... 1.412067: rcu_read_unlock_strict <-xa_load object:0xffff888108af6328 value:0x2000
> > cat-95 [000] ..... 1.412067: bio_add_page <-ext4_mpage_readpages object:0xffff888108af6328 value:0x2000
> > cat-95 [000] ..... 1.412067: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x2000
> > cat-95 [000] ..... 1.412067: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x2000
> > cat-95 [000] ..... 1.412067: __bio_try_merge_page <-bio_add_page object:0xffff888108af6328 value:0x2000
> > cat-95 [000] ..... 1.412068: submit_bio <-ext4_mpage_readpages object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: submit_bio_noacct <-ext4_mpage_readpages object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: __submit_bio <-submit_bio_noacct object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: blk_try_enter_queue <-__submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: rcu_read_unlock_strict <-blk_try_enter_queue object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: rcu_read_unlock_strict <-blk_try_enter_queue object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: submit_bio_checks <-__submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: __cond_resched <-submit_bio_checks object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: rcu_all_qs <-__cond_resched object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412068: should_fail_bio <-submit_bio_checks object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412069: create_task_io_context <-submit_bio_checks object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412069: kmem_cache_alloc_node <-create_task_io_context object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412069: should_failslab <-kmem_cache_alloc_node object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412069: _raw_spin_lock <-create_task_io_context object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412069: blk_mq_submit_bio <-__submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412069: __blk_queue_split <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412069: bvec_split_segs <-__blk_queue_split object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412069: blk_attempt_plug_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: __blk_mq_sched_bio_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: dd_bio_merge <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: _raw_spin_lock <-dd_bio_merge object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: blk_mq_sched_try_merge <-dd_bio_merge object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: elv_merge <-blk_mq_sched_try_merge object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: elv_rqhash_find <-elv_merge object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: dd_request_merge <-blk_mq_sched_try_merge object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: elv_rb_find <-dd_request_merge object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412070: __blk_mq_alloc_request <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: dd_limit_depth <-__blk_mq_alloc_request object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: blk_mq_get_tag <-__blk_mq_alloc_request object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: __blk_mq_get_tag <-blk_mq_get_tag object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: blk_mq_rq_ctx_init.isra.0 <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: ktime_get <-blk_mq_rq_ctx_init.isra.0 object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: dd_prepare_request <-blk_mq_rq_ctx_init.isra.0 object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: __blk_account_io_start <-blk_mq_submit_bio object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: kprobe_ftrace_handler <-ftrace_ops_list_func object:0xffff888108af6328 value:0x3000
> > cat-95 [000] ..... 1.412071: get_kprobe <-kprobe_ftrace_handler object:0xffff888108af6328 value:0x3000
> >
> > Almost all changelogs were suggested by Masami(mhiramat@kernel.org)
> > and steve(rostedt@goodmis.org), thank you all so much.
> >
> > v7:
> > - use fixed-size array for object pool instead of list structure
> > - use ftrace_test_recursion_trylock for function trace hook function
> > - fix trace_object_ref reference count in the init_trace_object
> > - invoke exit_trace_object no matter whether data->ops->free is null
> > in the unregister_object_trigger
> > - release private_data of event_trigger_data in the trace_object_trigger_free
> > - remove [RFC] tag
> >
> > Note: when change to use the ftrace_test_recursion_trylock, all the functions
> > will call the copy_from_kernel_nofault, I don't know where this is the problem now,
> > maybe should fall back to the usage in v6.
> >
> > for example:
> >
> > cat-118 [000] ...1. 1.458998: __bio_add_page <-bio_add_page object:0xffff88811a12e9e8 value:0x0
> > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > cat-118 [000] ...1. 1.458998: __rcu_read_lock <-xa_load object:0xffff88811a12e9e8 value:0x1000
> > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > cat-118 [000] ...1. 1.458998: __rcu_read_unlock <-xa_load object:0xffff88811a12e9e8 value:0x1000
> > cat-118 [000] ...3. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > cat-118 [000] ...3. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > ....
>
> Hmm, this is strange, but I got it is the expected behavior, since the
> ftrace_test_recursion_trylock() accepts one stage recursion for the
> first event in the interrupt as transition event.
> Steve, any good way to limit probing this transition events?
>
>
> BTW, I tried your series on the
> git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git ftrace/core
> and got below build errors.
>
> linux/kernel/trace/trace_object.c:266:3: error: ‘struct event_trigger_ops’ has no member named ‘func’
> 266 | .func = trace_object_trigger,
> | ^~~~
> linux/kernel/trace/trace_object.c:273:3: error: ‘struct event_trigger_ops’ has no member named ‘func’
> 273 | .func = trace_object_count_trigger,
> | ^~~~
> linux/kernel/trace/trace_object.c:535:3: error: ‘struct event_command’ has no member named ‘func’
> 535 | .func = event_object_trigger_callback,
> | ^~~~
> CC net/ipv6/sysctl_net_ipv6.o
>
> This is because commit 7d28e1e7d4fa ("tracing: Change event_trigger_ops func() to trigger()")
> and commit 4dfe5dff80a4 ("tracing: Change event_command func() to parse()") changed the field
> names. Please update it.
>
Thanks, I will update it.
>
> Thank you,
>
> --
> Masami Hiramatsu <mhiramat@kernel.org>
---
JeffXie
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object
2022-01-19 2:32 ` Jeff Xie
@ 2022-01-26 15:54 ` Masami Hiramatsu
2022-01-30 2:37 ` Jeff Xie
0 siblings, 1 reply; 11+ messages in thread
From: Masami Hiramatsu @ 2022-01-26 15:54 UTC (permalink / raw)
To: Jeff Xie; +Cc: Steven Rostedt, mingo, Tom Zanussi, linux-kernel
Hi Jeff,
On Wed, 19 Jan 2022 10:32:49 +0800
Jeff Xie <xiehuan09@gmail.com> wrote:
> > > Note: when change to use the ftrace_test_recursion_trylock, all the functions
> > > will call the copy_from_kernel_nofault, I don't know where this is the problem now,
> > > maybe should fall back to the usage in v6.
> > >
> > > for example:
> > >
> > > cat-118 [000] ...1. 1.458998: __bio_add_page <-bio_add_page object:0xffff88811a12e9e8 value:0x0
> > > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > > cat-118 [000] ...1. 1.458998: __rcu_read_lock <-xa_load object:0xffff88811a12e9e8 value:0x1000
> > > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > > cat-118 [000] ...1. 1.458998: __rcu_read_unlock <-xa_load object:0xffff88811a12e9e8 value:0x1000
> > > cat-118 [000] ...3. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > > cat-118 [000] ...3. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > > ....
> >
> > Hmm, this is strange, but I got it is the expected behavior, since the
> > ftrace_test_recursion_trylock() accepts one stage recursion for the
> > first event in the interrupt as transition event.
I think you should revert that change and back to your own per-cpu recursion
flag instead of using ftrace_test_recursion_trylock().
Thank you,
--
Masami Hiramatsu <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v7 0/4] trace: Introduce objtrace trigger to trace the kernel object
2022-01-26 15:54 ` Masami Hiramatsu
@ 2022-01-30 2:37 ` Jeff Xie
0 siblings, 0 replies; 11+ messages in thread
From: Jeff Xie @ 2022-01-30 2:37 UTC (permalink / raw)
To: Masami Hiramatsu; +Cc: Steven Rostedt, mingo, Tom Zanussi, linux-kernel
Hi Masami,
On Wed, Jan 26, 2022 at 11:54 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> Hi Jeff,
>
> On Wed, 19 Jan 2022 10:32:49 +0800
> Jeff Xie <xiehuan09@gmail.com> wrote:
>
> > > > Note: when change to use the ftrace_test_recursion_trylock, all the functions
> > > > will call the copy_from_kernel_nofault, I don't know where this is the problem now,
> > > > maybe should fall back to the usage in v6.
> > > >
> > > > for example:
> > > >
> > > > cat-118 [000] ...1. 1.458998: __bio_add_page <-bio_add_page object:0xffff88811a12e9e8 value:0x0
> > > > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > > > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > > > cat-118 [000] ...1. 1.458998: __rcu_read_lock <-xa_load object:0xffff88811a12e9e8 value:0x1000
> > > > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > > > cat-118 [000] ...2. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > > > cat-118 [000] ...1. 1.458998: __rcu_read_unlock <-xa_load object:0xffff88811a12e9e8 value:0x1000
> > > > cat-118 [000] ...3. 1.458998: copy_from_kernel_nofault <-trace_object_events_call object:0xffff88811a12e9e8 value:0x1000
> > > > cat-118 [000] ...3. 1.458998: copy_from_kernel_nofault_allowed <-copy_from_kernel_nofault object:0xffff88811a12e9e8 value:0x1000
> > > > ....
> > >
> > > Hmm, this is strange, but I got it is the expected behavior, since the
> > > ftrace_test_recursion_trylock() accepts one stage recursion for the
> > > first event in the interrupt as transition event.
>
> I think you should revert that change and back to your own per-cpu recursion
> flag instead of using ftrace_test_recursion_trylock().
Thanks, I will revert it.
> Thank you,
>
> --
> Masami Hiramatsu <mhiramat@kernel.org>
---
JeffXie
^ permalink raw reply [flat|nested] 11+ messages in thread