linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [for-next][PATCH 07/21] ring-buffer: Have rb_iter_head_event() handle concurrent writer
Date: Sun, 29 Mar 2020 14:42:59 -0400	[thread overview]
Message-ID: <20200329184316.505376001@goodmis.org> (raw)
In-Reply-To: 20200329184252.289087453@goodmis.org

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

Have the ring_buffer_iter structure have a place to store an event, such
that it can not be overwritten by a writer, and load it in such a way via
rb_iter_head_event() that it will return NULL and reset the iter to the
start of the current page if a writer updated the page.

Link: http://lkml.kernel.org/r/20200317213416.306959216@goodmis.org

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 106 ++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 31 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e689bdcb53e8..3d718add73c1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -503,11 +503,13 @@ struct trace_buffer {
 struct ring_buffer_iter {
 	struct ring_buffer_per_cpu	*cpu_buffer;
 	unsigned long			head;
+	unsigned long			next_event;
 	struct buffer_page		*head_page;
 	struct buffer_page		*cache_reader_page;
 	unsigned long			cache_read;
 	u64				read_stamp;
 	u64				page_stamp;
+	struct ring_buffer_event	*event;
 };
 
 /**
@@ -1914,15 +1916,59 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
 			       cpu_buffer->reader_page->read);
 }
 
-static __always_inline struct ring_buffer_event *
-rb_iter_head_event(struct ring_buffer_iter *iter)
+static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
 {
-	return __rb_page_index(iter->head_page, iter->head);
+	return local_read(&bpage->page->commit);
 }
 
-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
+static struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
 {
-	return local_read(&bpage->page->commit);
+	struct ring_buffer_event *event;
+	struct buffer_page *iter_head_page = iter->head_page;
+	unsigned long commit;
+	unsigned length;
+
+	/*
+	 * When the writer goes across pages, it issues a cmpxchg which
+	 * is a mb(), which will synchronize with the rmb here.
+	 * (see rb_tail_page_update() and __rb_reserve_next())
+	 */
+	commit = rb_page_commit(iter_head_page);
+	smp_rmb();
+	event = __rb_page_index(iter_head_page, iter->head);
+	length = rb_event_length(event);
+
+	/*
+	 * READ_ONCE() doesn't work on functions and we don't want the
+	 * compiler doing any crazy optimizations with length.
+	 */
+	barrier();
+
+	if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
+		/* Writer corrupted the read? */
+		goto reset;
+
+	memcpy(iter->event, event, length);
+	/*
+	 * If the page stamp is still the same after this rmb() then the
+	 * event was safely copied without the writer entering the page.
+	 */
+	smp_rmb();
+
+	/* Make sure the page didn't change since we read this */
+	if (iter->page_stamp != iter_head_page->page->time_stamp ||
+	    commit > rb_page_commit(iter_head_page))
+		goto reset;
+
+	iter->next_event = iter->head + length;
+	return iter->event;
+ reset:
+	/* Reset to the beginning */
+	iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
+	iter->head = 0;
+	iter->next_event = 0;
+	return NULL;
 }
 
 /* Size is determined by what has been committed */
@@ -1962,6 +2008,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
 
 	iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
 	iter->head = 0;
+	iter->next_event = 0;
 }
 
 /*
@@ -3548,6 +3595,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
 	/* Iterator usage is expected to have record disabled */
 	iter->head_page = cpu_buffer->reader_page;
 	iter->head = cpu_buffer->reader_page->read;
+	iter->next_event = iter->head;
 
 	iter->cache_reader_page = iter->head_page;
 	iter->cache_read = cpu_buffer->read;
@@ -3625,7 +3673,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
 		return 0;
 
 	/* Still racy, as it may return a false positive, but that's OK */
-	return ((iter->head_page == commit_page && iter->head == commit) ||
+	return ((iter->head_page == commit_page && iter->head >= commit) ||
 		(iter->head_page == reader && commit_page == head_page &&
 		 head_page->read == commit &&
 		 iter->head == rb_page_commit(cpu_buffer->reader_page)));
@@ -3853,15 +3901,22 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 static void rb_advance_iter(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
-	struct ring_buffer_event *event;
-	unsigned length;
 
 	cpu_buffer = iter->cpu_buffer;
 
+	/* If head == next_event then we need to jump to the next event */
+	if (iter->head == iter->next_event) {
+		/* If the event gets overwritten again, there's nothing to do */
+		if (rb_iter_head_event(iter) == NULL)
+			return;
+	}
+
+	iter->head = iter->next_event;
+
 	/*
 	 * Check if we are at the end of the buffer.
 	 */
-	if (iter->head >= rb_page_size(iter->head_page)) {
+	if (iter->next_event >= rb_page_size(iter->head_page)) {
 		/* discarded commits can make the page empty */
 		if (iter->head_page == cpu_buffer->commit_page)
 			return;
@@ -3869,27 +3924,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 		return;
 	}
 
-	event = rb_iter_head_event(iter);
-
-	length = rb_event_length(event);
-
-	/*
-	 * This should not be called to advance the header if we are
-	 * at the tail of the buffer.
-	 */
-	if (RB_WARN_ON(cpu_buffer,
-		       (iter->head_page == cpu_buffer->commit_page) &&
-		       (iter->head + length > rb_commit_index(cpu_buffer))))
-		return;
-
-	rb_update_iter_read_stamp(iter, event);
-
-	iter->head += length;
-
-	/* check for end of page padding */
-	if ((iter->head >= rb_page_size(iter->head_page)) &&
-	    (iter->head_page != cpu_buffer->commit_page))
-		rb_inc_iter(iter);
+	rb_update_iter_read_stamp(iter, iter->event);
 }
 
 static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
@@ -4017,6 +4052,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	}
 
 	event = rb_iter_head_event(iter);
+	if (!event)
+		goto again;
 
 	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
@@ -4233,10 +4270,16 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
-	iter = kmalloc(sizeof(*iter), flags);
+	iter = kzalloc(sizeof(*iter), flags);
 	if (!iter)
 		return NULL;
 
+	iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags);
+	if (!iter->event) {
+		kfree(iter);
+		return NULL;
+	}
+
 	cpu_buffer = buffer->buffers[cpu];
 
 	iter->cpu_buffer = cpu_buffer;
@@ -4317,6 +4360,7 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
 
 	atomic_dec(&cpu_buffer->record_disabled);
 	atomic_dec(&cpu_buffer->buffer->resize_disabled);
+	kfree(iter->event);
 	kfree(iter);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
-- 
2.25.1



  parent reply	other threads:[~2020-03-29 18:44 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-29 18:42 [for-next][PATCH 00/21] tracing: Updates for v5.7 Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 01/21] tracing: Use address-of operator on section symbols Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 02/21] selftest/ftrace: Fix function trigger test to handle trace not disabling the tracer Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 03/21] tracing: Save off entry when peeking at next entry Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 04/21] ring-buffer: Have ring_buffer_empty() not depend on tracing stopped Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 05/21] ring-buffer: Rename ring_buffer_read() to read_buffer_iter_advance() Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 06/21] ring-buffer: Add page_stamp to iterator for synchronization Steven Rostedt
2020-03-29 18:42 ` Steven Rostedt [this message]
2020-03-29 18:43 ` [for-next][PATCH 08/21] ring-buffer: Do not die if rb_iter_peek() fails more than thrice Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 09/21] ring-buffer: Optimize rb_iter_head_event() Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 10/21] ring-buffer: Make resize disable per cpu buffer instead of total buffer Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 11/21] ring-buffer: Do not disable recording when there is an iterator Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 12/21] tracing: Do not disable tracing when reading the trace file Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 13/21] ring-buffer/tracing: Have iterator acknowledge dropped events Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 14/21] tracing: Have the document reflect that the trace file keeps tracing enabled Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 15/21] ftrace/kprobe: Show the maxactive number on kprobe_events Steven Rostedt
2020-03-31 13:11   ` Sasha Levin
2020-03-29 18:43 ` [for-next][PATCH 16/21] ftrace: Make function trace pid filtering a bit more exact Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 17/21] ftrace: Create set_ftrace_notrace_pid to not trace tasks Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 18/21] tracing: Create set_event_notrace_pid " Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 19/21] selftests/ftrace: Add test to test new set_ftrace_notrace_pid file Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 20/21] selftests/ftrace: Add test to test new set_event_notrace_pid file Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 21/21] tracing: Add documentation on set_ftrace_notrace_pid and set_event_notrace_pid Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200329184316.505376001@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).