BPF Archive on lore.kernel.org
 help / color / Atom feed
From: Daniel Xu <dxu@dxuuu.xyz>
To: bpf@vger.kernel.org, songliubraving@fb.com, yhs@fb.com,
	andriin@fb.com, peterz@infradead.org, mingo@redhat.com,
	acme@kernel.org
Cc: Daniel Xu <dxu@dxuuu.xyz>,
	ast@fb.com, alexander.shishkin@linux.intel.com, jolsa@redhat.com,
	namhyung@kernel.org, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, kernel-team@fb.com
Subject: [PATCH bpf-next 1/5] perf/core: Add PERF_FORMAT_LOST read_format
Date: Tue, 17 Sep 2019 06:30:52 -0700
Message-ID: <20190917133056.5545-2-dxu@dxuuu.xyz> (raw)
In-Reply-To: <20190917133056.5545-1-dxu@dxuuu.xyz>

It's useful to know kprobe's nmissed count. For example with tracing
tools, it's important to know when events may have been lost.  debugfs
currently exposes a control file to get this information, but it is not
compatible with probes registered with the perf API.

While bpf programs may be able to manually count nhit, there is no way
to gather nmissed. In other words, it is currently not possible to this
retrieve information about FD-based probes.

This patch adds a new field to perf's read_format that lets users query
misses. Misses include both misses from the underlying kprobe
infrastructure and misses from ringbuffer infrastructure.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 include/linux/trace_events.h    |  1 +
 include/uapi/linux/perf_event.h |  5 ++++-
 kernel/events/core.c            | 39 ++++++++++++++++++++++++++++++---
 kernel/trace/trace_kprobe.c     |  8 +++++++
 4 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 30a8cdcfd4a4..952520c1240a 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -587,6 +587,7 @@ extern int bpf_get_kprobe_info(const struct perf_event *event,
 			       u32 *fd_type, const char **symbol,
 			       u64 *probe_offset, u64 *probe_addr,
 			       bool perf_type_tracepoint);
+extern u64 perf_kprobe_missed(const struct perf_event *event);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
 extern int  perf_uprobe_init(struct perf_event *event,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7198ddd0c6b1..bd874c7257f0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -273,6 +273,7 @@ enum {
  *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
  *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
  *	  { u64		id;           } && PERF_FORMAT_ID
+ *	  { u64		missed;       } && PERF_FORMAT_LOST
  *	} && !PERF_FORMAT_GROUP
  *
  *	{ u64		nr;
@@ -280,6 +281,7 @@ enum {
  *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
  *	  { u64		value;
  *	    { u64	id;           } && PERF_FORMAT_ID
+ *	    { u64	missed;       } && PERF_FORMAT_LOST
  *	  }		cntr[nr];
  *	} && PERF_FORMAT_GROUP
  * };
@@ -289,8 +291,9 @@ enum perf_event_read_format {
 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
 	PERF_FORMAT_ID				= 1U << 2,
 	PERF_FORMAT_GROUP			= 1U << 3,
+	PERF_FORMAT_LOST			= 1U << 4,
 
-	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
+	PERF_FORMAT_MAX = 1U << 5,		/* non-ABI */
 };
 
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..ee08d3ed6299 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1715,6 +1715,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
 	if (event->attr.read_format & PERF_FORMAT_ID)
 		entry += sizeof(u64);
 
+	if (event->attr.read_format & PERF_FORMAT_LOST)
+		entry += sizeof(u64);
+
 	if (event->attr.read_format & PERF_FORMAT_GROUP) {
 		nr += nr_siblings;
 		size += sizeof(u64);
@@ -4734,6 +4737,24 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 }
 EXPORT_SYMBOL_GPL(perf_event_read_value);
 
+static struct pmu perf_kprobe;
+static u64 perf_event_lost(struct perf_event *event)
+{
+	struct ring_buffer *rb;
+	u64 lost = 0;
+
+	rcu_read_lock();
+	rb = rcu_dereference(event->rb);
+	if (likely(!!rb))
+		lost += local_read(&rb->lost);
+	rcu_read_unlock();
+
+	if (event->attr.type == perf_kprobe.type)
+		lost += perf_kprobe_missed(event);
+
+	return lost;
+}
+
 static int __perf_read_group_add(struct perf_event *leader,
 					u64 read_format, u64 *values)
 {
@@ -4770,11 +4791,15 @@ static int __perf_read_group_add(struct perf_event *leader,
 	values[n++] += perf_event_count(leader);
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
+	if (read_format & PERF_FORMAT_LOST)
+		values[n++] = perf_event_lost(leader);
 
 	for_each_sibling_event(sub, leader) {
 		values[n++] += perf_event_count(sub);
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
+		if (read_format & PERF_FORMAT_LOST)
+			values[n++] = perf_event_lost(sub);
 	}
 
 	raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -4831,7 +4856,7 @@ static int perf_read_one(struct perf_event *event,
 				 u64 read_format, char __user *buf)
 {
 	u64 enabled, running;
-	u64 values[4];
+	u64 values[5];
 	int n = 0;
 
 	values[n++] = __perf_event_read_value(event, &enabled, &running);
@@ -4841,6 +4866,8 @@ static int perf_read_one(struct perf_event *event,
 		values[n++] = running;
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(event);
+	if (read_format & PERF_FORMAT_LOST)
+		values[n++] = perf_event_lost(event);
 
 	if (copy_to_user(buf, values, n * sizeof(u64)))
 		return -EFAULT;
@@ -6141,7 +6168,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 				 u64 enabled, u64 running)
 {
 	u64 read_format = event->attr.read_format;
-	u64 values[4];
+	u64 values[5];
 	int n = 0;
 
 	values[n++] = perf_event_count(event);
@@ -6155,6 +6182,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 	}
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(event);
+	if (read_format & PERF_FORMAT_LOST)
+		values[n++] = perf_event_lost(event);
 
 	__output_copy(handle, values, n * sizeof(u64));
 }
@@ -6165,7 +6194,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 {
 	struct perf_event *leader = event->group_leader, *sub;
 	u64 read_format = event->attr.read_format;
-	u64 values[5];
+	u64 values[6];
 	int n = 0;
 
 	values[n++] = 1 + leader->nr_siblings;
@@ -6183,6 +6212,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 	values[n++] = perf_event_count(leader);
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
+	if (read_format & PERF_FORMAT_LOST)
+		values[n++] = perf_event_lost(leader);
 
 	__output_copy(handle, values, n * sizeof(u64));
 
@@ -6196,6 +6227,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 		values[n++] = perf_event_count(sub);
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
+		if (read_format & PERF_FORMAT_LOST)
+			values[n++] = perf_event_lost(sub);
 
 		__output_copy(handle, values, n * sizeof(u64));
 	}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9d483ad9bb6c..cff471c8750b 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -196,6 +196,14 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call)
 	return within_error_injection_list(trace_kprobe_address(tk));
 }
 
+u64 perf_kprobe_missed(const struct perf_event *event)
+{
+	struct trace_event_call *call = event->tp_event;
+	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+
+	return tk->rp.kp.nmissed;
+}
+
 static int register_kprobe_event(struct trace_kprobe *tk);
 static int unregister_kprobe_event(struct trace_kprobe *tk);
 
-- 
2.21.0


  reply index

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-17 13:30 [PATCH bpf-next 0/5] " Daniel Xu
2019-09-17 13:30 ` Daniel Xu [this message]
2019-09-17 14:32   ` [PATCH bpf-next 1/5] perf/core: " kbuild test robot
2019-09-17 15:22   ` kbuild test robot
2019-09-24  8:33   ` Jiri Olsa
2019-09-27 21:28     ` Daniel Xu
2019-10-25 18:19       ` Daniel Xu
2019-10-28  9:12     ` Peter Zijlstra
2019-09-17 13:30 ` [PATCH bpf-next 2/5] perf/core: Sync perf_event.h to tools Daniel Xu
2019-09-17 13:30 ` [PATCH bpf-next 3/5] libbpf: Add helpers to extract perf fd from bpf_link Daniel Xu
2019-09-17 13:30 ` [PATCH bpf-next 4/5] libbpf: Set read_format PERF_FORMAT_LOST on kprobe perf fds Daniel Xu
2019-09-17 13:30 ` [PATCH bpf-next 5/5] libbpf: Add selftest for PERF_FORMAT_LOST perf read_format Daniel Xu
2019-09-17 15:14 [PATCH bpf-next 1/5] perf/core: Add PERF_FORMAT_LOST read_format Daniel Xu

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190917133056.5545-2-dxu@dxuuu.xyz \
    --to=dxu@dxuuu.xyz \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andriin@fb.com \
    --cc=ast@fb.com \
    --cc=bpf@vger.kernel.org \
    --cc=jolsa@redhat.com \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

BPF Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/bpf/0 bpf/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 bpf bpf/ https://lore.kernel.org/bpf \
		bpf@vger.kernel.org
	public-inbox-index bpf

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.bpf


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git