linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vaibhav Nagarnaik <vnagarnaik@google.com>
To: Steven Rostedt <rostedt@goodmis.org>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Ingo Molnar <mingo@redhat.com>
Cc: Michael Rubin <mrubin@google.com>,
	David Sharp <dhsharp@google.com>,
	linux-kernel@vger.kernel.org,
	Vaibhav Nagarnaik <vnagarnaik@google.com>
Subject: [PATCH 4/5] trace: Make removal of ring buffer pages atomic
Date: Tue, 26 Jul 2011 15:59:53 -0700	[thread overview]
Message-ID: <1311721194-12164-5-git-send-email-vnagarnaik@google.com> (raw)
In-Reply-To: <1311721194-12164-1-git-send-email-vnagarnaik@google.com>

This patch adds the capability to remove pages from a ring buffer
atomically while write operations are going on. This makes it possible
to reduce the ring buffer size without losing any latest events from the
ring buffer.

This is done by moving the page after the tail page. This makes sure
that first all the empty pages in the ring buffer are removed. If the
page after tail page happens to be the head page, then that page is
removed and head page is moved forward to the next page. This removes
the oldest data from the ring buffer and keeps the latest data around to
be read.

Signed-off-by: Vaibhav Nagarnaik <vnagarnaik@google.com>
---
 kernel/trace/ring_buffer.c |  214 ++++++++++++++++++++++++++++++++++---------
 kernel/trace/trace.c       |   20 +----
 2 files changed, 170 insertions(+), 64 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 83450c9..0b43758 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -23,6 +23,8 @@
 #include <asm/local.h>
 #include "trace.h"
 
+static void update_pages_handler(struct work_struct *work);
+
 /*
  * The ring buffer header is special. We must manually up keep it.
  */
@@ -502,6 +504,8 @@ struct ring_buffer_per_cpu {
 	/* ring buffer pages to update, > 0 to add, < 0 to remove */
 	int				nr_pages_to_update;
 	struct list_head		new_pages; /* new pages to add */
+	struct work_struct		update_pages_work;
+	struct completion		update_completion;
 };
 
 struct ring_buffer {
@@ -1080,6 +1084,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
 	spin_lock_init(&cpu_buffer->reader_lock);
 	lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
 	cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+	INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
+	init_completion(&cpu_buffer->update_completion);
 
 	bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
 			    GFP_KERNEL, cpu_to_node(cpu));
@@ -1267,29 +1273,142 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
 
 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
 
+static inline unsigned long rb_page_entries(struct buffer_page *bpage)
+{
+	return local_read(&bpage->entries) & RB_WRITE_MASK;
+}
+
+static inline unsigned long rb_page_write(struct buffer_page *bpage)
+{
+	return local_read(&bpage->write) & RB_WRITE_MASK;
+}
+
 static void
 rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 {
-	struct buffer_page *bpage;
-	struct list_head *p;
-	unsigned i;
+	struct list_head *tail_page;
+	unsigned int nr_removed;
+	int retries, page_entries;
+	struct list_head *to_remove, *next_page, *ret;
+	struct buffer_page *to_remove_page, *next_buffer_page;
 
 	spin_lock_irq(&cpu_buffer->reader_lock);
-	rb_head_page_deactivate(cpu_buffer);
+	/*
+	 * We don't race with the readers since we have acquired the reader
+	 * lock.
+	 * The only race would be with the writer in 2 conditions:
+	 * 1. When moving to the new page to write (not head)
+	 * 2. When moving to the head page
+	 * In both these cases, we make sure that if we get any failures, we
+	 * pick the next page available and continue the delete operation.
+	 */
+	nr_removed = 0;
+	retries = 3;
+	while (nr_removed < nr_pages) {
 
-	for (i = 0; i < nr_pages; i++) {
 		if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
 			goto out;
-		p = cpu_buffer->pages->next;
-		bpage = list_entry(p, struct buffer_page, list);
-		list_del_init(&bpage->list);
-		free_buffer_page(bpage);
-	}
-	if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
-		goto out;
 
-	rb_reset_cpu(cpu_buffer);
-	rb_check_pages(cpu_buffer);
+		/*
+		 * Always get the fresh copy, the writer might have moved the
+		 * tail page while we are in this operation
+		 */
+		tail_page = &cpu_buffer->tail_page->list;
+		/*
+		 * tail page might be on reader page, we remove the next page
+		 * from the ring buffer
+		 */
+		if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+			tail_page = rb_list_head(tail_page->next);
+		to_remove = tail_page->next;
+		next_page = rb_list_head(to_remove)->next;
+
+		ret = NULL;
+		if (((unsigned long)to_remove & RB_FLAG_MASK) == RB_PAGE_HEAD) {
+			/*
+			 * this is a head page, we have to set RB_PAGE_HEAD
+			 * flag while updating the next pointer
+			 */
+			unsigned long tmp = (unsigned long)next_page |
+							RB_PAGE_HEAD;
+			ret = cmpxchg(&tail_page->next, to_remove,
+					(struct list_head *) tmp);
+
+		} else if (((unsigned long)to_remove & ~RB_PAGE_HEAD) ==
+					(unsigned long)to_remove) {
+
+			/* not a head page, just update the next pointer */
+			ret = cmpxchg(&tail_page->next, to_remove, next_page);
+
+		} else {
+			/*
+			 * this means that this page is being operated on
+			 * try the next page in the list
+			 */
+		}
+
+		if (ret != to_remove) {
+			/*
+			 * Well, try again with the next page.
+			 * If we cannot move the page in 3 retries, there are
+			 * lot of interrupts on this cpu and probably causing
+			 * some weird behavior. Warn in this case and stop
+			 * tracing
+			 */
+			if (RB_WARN_ON(cpu_buffer, !retries--))
+				break;
+			else
+				continue;
+		}
+
+		/*
+		 * point the next_page->prev to skip the to_remove page to
+		 * complete the removal process
+		 */
+		rb_list_head(next_page)->prev = rb_list_head(to_remove)->prev;
+
+		/* yay, we removed the page */
+		nr_removed++;
+		/* for the next page to remove, reset retry counter */
+		retries = 3;
+
+		to_remove_page = list_entry(rb_list_head(to_remove),
+					struct buffer_page, list);
+		next_buffer_page = list_entry(rb_list_head(next_page),
+					struct buffer_page, list);
+
+		if (cpu_buffer->head_page == to_remove_page) {
+			/*
+			 * update head page and change read pointer to make
+			 * sure any read iterators reset themselves
+			 */
+			cpu_buffer->head_page = next_buffer_page;
+			cpu_buffer->read = 0;
+		}
+		/* Also, update the start of cpu_buffer to keep it valid */
+		cpu_buffer->pages = rb_list_head(next_page);
+
+		/* update the counters */
+		page_entries = rb_page_entries(to_remove_page);
+		if (page_entries) {
+			/*
+			 * If something was added to this page, it was full
+			 * since it is not the tail page. So we deduct the
+			 * bytes consumed in ring buffer from here.
+			 * No need to update overruns, since this page is
+			 * deleted from ring buffer and its entries are
+			 * already accounted for.
+			 */
+			local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+		}
+
+		/*
+		 * We have already removed references to this list item, just
+		 * free up the buffer_page and its page
+		 */
+		free_buffer_page(to_remove_page);
+	}
+	RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
 
 out:
 	spin_unlock_irq(&cpu_buffer->reader_lock);
@@ -1303,6 +1422,12 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 	struct list_head *p;
 	unsigned i;
 
+	/* stop the writers while inserting pages */
+	atomic_inc(&cpu_buffer->record_disabled);
+
+	/* Make sure all writers are done with this buffer. */
+	synchronize_sched();
+
 	spin_lock_irq(&cpu_buffer->reader_lock);
 	rb_head_page_deactivate(cpu_buffer);
 
@@ -1319,17 +1444,21 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
 out:
 	spin_unlock_irq(&cpu_buffer->reader_lock);
+	atomic_dec(&cpu_buffer->record_disabled);
 }
 
-static void update_pages_handler(struct ring_buffer_per_cpu *cpu_buffer)
+static void update_pages_handler(struct work_struct *work)
 {
+	struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
+			struct ring_buffer_per_cpu, update_pages_work);
+
 	if (cpu_buffer->nr_pages_to_update > 0)
 		rb_insert_pages(cpu_buffer, &cpu_buffer->new_pages,
 				cpu_buffer->nr_pages_to_update);
 	else
 		rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update);
-	/* reset this value */
-	cpu_buffer->nr_pages_to_update = 0;
+
+	complete(&cpu_buffer->update_completion);
 }
 
 /**
@@ -1361,15 +1490,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
 	if (size < BUF_PAGE_SIZE * 2)
 		size = BUF_PAGE_SIZE * 2;
 
-	atomic_inc(&buffer->record_disabled);
-
-	/* Make sure all writers are done with this buffer. */
-	synchronize_sched();
+	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 
+	/* prevent another thread from changing buffer sizes */
 	mutex_lock(&buffer->mutex);
-	get_online_cpus();
-
-	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
 
 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
 		/* calculate the pages to update */
@@ -1396,13 +1520,23 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
 				goto no_mem;
 		}
 
+		/* fire off all the required work handlers */
+		for_each_buffer_cpu(buffer, cpu) {
+			cpu_buffer = buffer->buffers[cpu];
+			if (!cpu_buffer->nr_pages_to_update)
+				continue;
+			schedule_work_on(cpu, &cpu_buffer->update_pages_work);
+		}
+
 		/* wait for all the updates to complete */
 		for_each_buffer_cpu(buffer, cpu) {
 			cpu_buffer = buffer->buffers[cpu];
-			if (cpu_buffer->nr_pages_to_update) {
-				update_pages_handler(cpu_buffer);
-				cpu_buffer->nr_pages = nr_pages;
-			}
+			if (!cpu_buffer->nr_pages_to_update)
+				continue;
+			wait_for_completion(&cpu_buffer->update_completion);
+			cpu_buffer->nr_pages = nr_pages;
+			/* reset this value */
+			cpu_buffer->nr_pages_to_update = 0;
 		}
 	} else {
 		cpu_buffer = buffer->buffers[cpu_id];
@@ -1418,36 +1552,36 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
 						&cpu_buffer->new_pages, cpu_id))
 			goto no_mem;
 
-		update_pages_handler(cpu_buffer);
+		schedule_work_on(cpu_id, &cpu_buffer->update_pages_work);
+		wait_for_completion(&cpu_buffer->update_completion);
 
 		cpu_buffer->nr_pages = nr_pages;
+		/* reset this value */
+		cpu_buffer->nr_pages_to_update = 0;
 	}
 
  out:
-	put_online_cpus();
 	mutex_unlock(&buffer->mutex);
-
-	atomic_dec(&buffer->record_disabled);
-
 	return size;
 
  no_mem:
 	for_each_buffer_cpu(buffer, cpu) {
 		struct buffer_page *bpage, *tmp;
+
 		cpu_buffer = buffer->buffers[cpu];
 		/* reset this number regardless */
 		cpu_buffer->nr_pages_to_update = 0;
+
 		if (list_empty(&cpu_buffer->new_pages))
 			continue;
+
 		list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
 					list) {
 			list_del_init(&bpage->list);
 			free_buffer_page(bpage);
 		}
 	}
-	put_online_cpus();
 	mutex_unlock(&buffer->mutex);
-	atomic_dec(&buffer->record_disabled);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_resize);
@@ -1487,21 +1621,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
 	return __rb_page_index(iter->head_page, iter->head);
 }
 
-static inline unsigned long rb_page_write(struct buffer_page *bpage)
-{
-	return local_read(&bpage->write) & RB_WRITE_MASK;
-}
-
 static inline unsigned rb_page_commit(struct buffer_page *bpage)
 {
 	return local_read(&bpage->page->commit);
 }
 
-static inline unsigned long rb_page_entries(struct buffer_page *bpage)
-{
-	return local_read(&bpage->entries) & RB_WRITE_MASK;
-}
-
 /* Size is determined by what has been committed */
 static inline unsigned rb_page_size(struct buffer_page *bpage)
 {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8ea48e0..72894c1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2934,20 +2934,10 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
 
 static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
 {
-	int cpu, ret = size;
+	int ret = size;
 
 	mutex_lock(&trace_types_lock);
 
-	tracing_stop();
-
-	/* disable all cpu buffers */
-	for_each_tracing_cpu(cpu) {
-		if (global_trace.data[cpu])
-			atomic_inc(&global_trace.data[cpu]->disabled);
-		if (max_tr.data[cpu])
-			atomic_inc(&max_tr.data[cpu]->disabled);
-	}
-
 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
 		/* make sure, this cpu is enabled in the mask */
 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
@@ -2961,14 +2951,6 @@ static ssize_t tracing_resize_ring_buffer(unsigned long size, int cpu_id)
 		ret = -ENOMEM;
 
 out:
-	for_each_tracing_cpu(cpu) {
-		if (global_trace.data[cpu])
-			atomic_dec(&global_trace.data[cpu]->disabled);
-		if (max_tr.data[cpu])
-			atomic_dec(&max_tr.data[cpu]->disabled);
-	}
-
-	tracing_start();
 	mutex_unlock(&trace_types_lock);
 
 	return ret;
-- 
1.7.3.1


  parent reply	other threads:[~2011-07-26 23:00 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-26 22:59 [PATCH 0/5] Add dynamic updates to trace ring buffer Vaibhav Nagarnaik
2011-07-26 22:59 ` [PATCH 1/5] trace: Add a new readonly entry to report total buffer size Vaibhav Nagarnaik
2011-07-29 18:01   ` Steven Rostedt
2011-07-29 19:09     ` Vaibhav Nagarnaik
2011-07-26 22:59 ` [PATCH 2/5] trace: Add ring buffer stats to measure rate of events Vaibhav Nagarnaik
2011-07-29 18:10   ` Steven Rostedt
2011-07-29 19:10     ` Vaibhav Nagarnaik
2011-07-26 22:59 ` [PATCH 3/5] trace: Add per_cpu ring buffer control files Vaibhav Nagarnaik
2011-07-29 18:14   ` Steven Rostedt
2011-07-29 19:13     ` Vaibhav Nagarnaik
2011-07-29 21:25       ` Steven Rostedt
2011-07-26 22:59 ` Vaibhav Nagarnaik [this message]
2011-07-29 21:23   ` [PATCH 4/5] trace: Make removal of ring buffer pages atomic Steven Rostedt
2011-07-29 23:30     ` Vaibhav Nagarnaik
2011-07-30  1:12       ` Steven Rostedt
2011-07-30  1:50         ` David Sharp
2011-07-30  2:43           ` Steven Rostedt
2011-07-30  3:44             ` David Sharp
2011-07-26 22:59 ` [PATCH 5/5] trace: Make addition of pages in ring buffer atomic Vaibhav Nagarnaik
2011-08-16 21:46 ` [PATCH v2 0/5] Add dynamic updates to trace ring buffer Vaibhav Nagarnaik
2011-08-16 21:46 ` [PATCH v2 1/5] trace: Add a new readonly entry to report total buffer size Vaibhav Nagarnaik
2011-08-16 21:46 ` [PATCH v2 2/5] trace: Add ring buffer stats to measure rate of events Vaibhav Nagarnaik
2011-08-16 21:46 ` [PATCH v2 3/5] trace: Add per_cpu ring buffer control files Vaibhav Nagarnaik
2011-08-22 20:29   ` Steven Rostedt
2011-08-22 20:36     ` Vaibhav Nagarnaik
2011-08-22 22:09   ` [PATCH v3] " Vaibhav Nagarnaik
2011-08-23  0:49     ` Steven Rostedt
2011-08-23  1:16       ` Vaibhav Nagarnaik
2011-08-23  1:17   ` Vaibhav Nagarnaik
2011-09-03  2:45     ` Steven Rostedt
2011-09-06 18:56       ` Vaibhav Nagarnaik
2011-09-07 17:13         ` Steven Rostedt
2011-10-12  1:20     ` [PATCH v4 1/4] " Vaibhav Nagarnaik
2012-01-31 23:53       ` Vaibhav Nagarnaik
2012-02-02  2:42         ` Steven Rostedt
2012-02-02 19:20           ` Vaibhav Nagarnaik
2012-02-02 20:00       ` [PATCH v5 " Vaibhav Nagarnaik
2012-02-02 20:00         ` [PATCH v5 2/4] trace: Make removal of ring buffer pages atomic Vaibhav Nagarnaik
2012-04-21  4:27           ` Steven Rostedt
2012-04-23 17:31             ` Vaibhav Nagarnaik
2012-04-25 21:18           ` [PATCH v6 1/3] " Vaibhav Nagarnaik
2012-04-25 21:18             ` [PATCH v6 2/3] trace: Make addition of pages in ring buffer atomic Vaibhav Nagarnaik
2012-04-25 21:18             ` [PATCH v6 3/3] trace: change CPU ring buffer state from tracing_cpumask Vaibhav Nagarnaik
2012-05-03  1:55             ` [PATCH v6 1/3] trace: Make removal of ring buffer pages atomic Steven Rostedt
2012-05-03  6:40               ` Vaibhav Nagarnaik
2012-05-03 12:57                 ` Steven Rostedt
2012-05-03 14:12                   ` Steven Rostedt
2012-05-03 18:43                     ` Vaibhav Nagarnaik
2012-05-03 18:54                       ` Steven Rostedt
2012-05-03 18:54                         ` Vaibhav Nagarnaik
2012-05-04  1:59             ` [PATCH v7 " Vaibhav Nagarnaik
2012-05-04  1:59               ` [PATCH v7 2/3] trace: Make addition of pages in ring buffer atomic Vaibhav Nagarnaik
2012-05-19 10:18                 ` [tip:perf/core] ring-buffer: " tip-bot for Vaibhav Nagarnaik
2012-05-04  1:59               ` [PATCH v7 3/3] trace: change CPU ring buffer state from tracing_cpumask Vaibhav Nagarnaik
2012-05-19 10:21                 ` [tip:perf/core] tracing: " tip-bot for Vaibhav Nagarnaik
2012-05-07 20:22               ` [PATCH v7 1/3] trace: Make removal of ring buffer pages atomic Steven Rostedt
2012-05-07 21:48                 ` Vaibhav Nagarnaik
2012-05-08  0:14                   ` Steven Rostedt
2012-05-09  3:38               ` Steven Rostedt
2012-05-09  5:00                 ` Vaibhav Nagarnaik
2012-05-09 14:29                   ` Steven Rostedt
2012-05-09 17:46                     ` Vaibhav Nagarnaik
2012-05-09 17:54                       ` Steven Rostedt
2012-05-19 10:17               ` [tip:perf/core] ring-buffer: " tip-bot for Vaibhav Nagarnaik
2012-02-02 20:00         ` [PATCH v5 3/4] trace: Make addition of pages in ring buffer atomic Vaibhav Nagarnaik
2012-02-02 20:00         ` [PATCH v5 4/4] trace: change CPU ring buffer state from tracing_cpumask Vaibhav Nagarnaik
2012-03-08 23:51         ` [PATCH v5 1/4] trace: Add per_cpu ring buffer control files Vaibhav Nagarnaik
2012-05-02 21:03         ` [tip:perf/core] ring-buffer: " tip-bot for Vaibhav Nagarnaik
2011-10-12  1:20     ` [PATCH v4 2/4] trace: Make removal of ring buffer pages atomic Vaibhav Nagarnaik
2011-10-12  1:20     ` [PATCH v4 3/4] trace: Make addition of pages in ring buffer atomic Vaibhav Nagarnaik
2011-10-12  1:20     ` [PATCH v4 4/4] trace: change CPU ring buffer state from tracing_cpumask Vaibhav Nagarnaik
2011-08-16 21:46 ` [PATCH v2 4/5] trace: Make removal of ring buffer pages atomic Vaibhav Nagarnaik
2011-08-23  3:27   ` Steven Rostedt
2011-08-23 18:55     ` Vaibhav Nagarnaik
2011-08-23 18:55   ` [PATCH v3 " Vaibhav Nagarnaik
2011-08-23 19:16     ` David Sharp
2011-08-23 19:20       ` Vaibhav Nagarnaik
2011-08-23 19:24       ` Steven Rostedt
2011-08-23 18:55   ` [PATCH v3 5/5] trace: Make addition of pages in ring buffer atomic Vaibhav Nagarnaik
2011-08-16 21:46 ` [PATCH v2 " Vaibhav Nagarnaik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1311721194-12164-5-git-send-email-vnagarnaik@google.com \
    --to=vnagarnaik@google.com \
    --cc=dhsharp@google.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=mrubin@google.com \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).