All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@redhat.com>,
	linux-kernel@vger.kernel.org, vince@deater.net,
	eranian@google.com, johannes@sipsolutions.net,
	Arnaldo Carvalho de Melo <acme@infradead.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>
Subject: [PATCH 4/7] perf: Free aux pages in unmap path
Date: Thu,  3 Dec 2015 12:32:39 +0200	[thread overview]
Message-ID: <1449138762-15194-5-git-send-email-alexander.shishkin@linux.intel.com> (raw)
In-Reply-To: <1449138762-15194-1-git-send-email-alexander.shishkin@linux.intel.com>

Now that we can ensure that when ring buffer's aux area is on the way
to getting unmapped new transactions won't start, and we have means of
stopping the running transactions, we can do the latter to the events
on this ring buffer's event list and then safely free the aux pages and
corresponding pmu data, as this time it is guaranteed to be the last
aux reference holder. This partially reverts 57ffc5ca679 ("perf: Fix AUX
buffer refcounting"), which was made to defer deallocation that was
otherwise possible from an NMI context. Now it is no longer the case;
the last call to rb_free_aux() that drops the last AUX reference has
to happen in perf_mmap_close() on that AUX area.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 kernel/events/core.c        | 53 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/events/internal.h    |  1 -
 kernel/events/ring_buffer.c | 37 ++++++++++---------------------
 3 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 66f835a2df..10fce18710 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4630,11 +4630,62 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	 */
 	if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
 	    atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+		struct perf_event *iter;
+		LIST_HEAD(stop_list);
+		unsigned long flags;
+
+		/*
+		 * Stop all aux events that are writing to this here buffer,
+		 * so that we can free its aux pages and corresponding pmu
+		 * data. Note that after rb::aux_mmap_count dropped to zero,
+		 * they won't start any more (see perf_aux_output_begin()).
+		 *
+		 * Since we can't take ctx::mutex under rb::event_lock, we
+		 * need to jump through hoops to get there, namely fish out
+		 * all events from rb::event_list onto an on-stack list,
+		 * carry out the stopping and splice this on-stack list back
+		 * to rb::event_list.
+		 * This means that these events will miss wakeups during this
+		 * window, but since it's mmap_close, assume the consumer
+		 * doesn't care any more.
+		 *
+		 * Note: list_splice_init_rcu() doesn't cut it, since it syncs
+		 * and rb::event_lock is a spinlock.
+		 */
+retry:
+		spin_lock_irqsave(&rb->event_lock, flags);
+		list_for_each_entry_rcu(iter, &rb->event_list, rb_entry) {
+			list_del_rcu(&iter->rb_entry);
+			spin_unlock_irqrestore(&rb->event_lock, flags);
+
+			synchronize_rcu();
+			list_add_tail(&iter->rb_entry, &stop_list);
+
+			goto retry;
+		}
+		spin_unlock_irqrestore(&rb->event_lock, flags);
+
+		mutex_unlock(&event->mmap_mutex);
+
+		list_for_each_entry(iter, &stop_list, rb_entry) {
+			if (!has_aux(iter))
+				continue;
+
+			perf_event_stop(iter);
+		}
+
+		/* and splice it back now that we're done with them */
+		spin_lock_irqsave(&rb->event_lock, flags);
+		list_splice_tail(&stop_list, &rb->event_list);
+		spin_unlock_irqrestore(&rb->event_lock, flags);
+
+		/* now it's safe to free the pages */
 		atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
 		vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
 
+		/* this has to be the last one */
 		rb_free_aux(rb);
-		mutex_unlock(&event->mmap_mutex);
+		WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
 	}
 
 	atomic_dec(&rb->mmap_count);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 2bbad9c127..2b229fdcfc 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -11,7 +11,6 @@
 struct ring_buffer {
 	atomic_t			refcount;
 	struct rcu_head			rcu_head;
-	struct irq_work			irq_work;
 #ifdef CONFIG_PERF_USE_VMALLOC
 	struct work_struct		work;
 	int				page_order;	/* allocation order  */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 5709cc222f..6865ac95ca 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -221,8 +221,6 @@ void perf_output_end(struct perf_output_handle *handle)
 	rcu_read_unlock();
 }
 
-static void rb_irq_work(struct irq_work *work);
-
 static void
 ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 {
@@ -243,16 +241,6 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 
 	INIT_LIST_HEAD(&rb->event_list);
 	spin_lock_init(&rb->event_lock);
-	init_irq_work(&rb->irq_work, rb_irq_work);
-}
-
-static void ring_buffer_put_async(struct ring_buffer *rb)
-{
-	if (!atomic_dec_and_test(&rb->refcount))
-		return;
-
-	rb->rcu_head.next = (void *)rb;
-	irq_work_queue(&rb->irq_work);
 }
 
 /*
@@ -292,7 +280,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 	 * the aux buffer is in perf_mmap_close(), about to get free'd.
 	 */
 	if (!atomic_read(&rb->aux_mmap_count))
-		goto err;
+		goto err_put;
 
 	/*
 	 * Nesting is not supported for AUX area, make sure nested
@@ -338,7 +326,7 @@ err_put:
 	rb_free_aux(rb);
 
 err:
-	ring_buffer_put_async(rb);
+	ring_buffer_put(rb);
 	handle->event = NULL;
 
 	return NULL;
@@ -389,7 +377,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 
 	local_set(&rb->aux_nest, 0);
 	rb_free_aux(rb);
-	ring_buffer_put_async(rb);
+	ring_buffer_put(rb);
 }
 
 /*
@@ -563,6 +551,14 @@ static void __rb_free_aux(struct ring_buffer *rb)
 {
 	int pg;
 
+	/*
+	 * Should never happen, the last reference should be dropped from
+	 * perf_mmap_close() path, which first stops aux transactions (which
+	 * in turn are the atomic holders of aux_refcount) and then does the
+	 * last rb_free_aux().
+	 */
+	WARN_ON_ONCE(in_atomic());
+
 	if (rb->aux_priv) {
 		rb->free_aux(rb->aux_priv);
 		rb->free_aux = NULL;
@@ -581,18 +577,7 @@ static void __rb_free_aux(struct ring_buffer *rb)
 void rb_free_aux(struct ring_buffer *rb)
 {
 	if (atomic_dec_and_test(&rb->aux_refcount))
-		irq_work_queue(&rb->irq_work);
-}
-
-static void rb_irq_work(struct irq_work *work)
-{
-	struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
-
-	if (!atomic_read(&rb->aux_refcount))
 		__rb_free_aux(rb);
-
-	if (rb->rcu_head.next == (void *)rb)
-		call_rcu(&rb->rcu_head, rb_free_rcu);
 }
 
 #ifndef CONFIG_PERF_USE_VMALLOC
-- 
2.6.2


  parent reply	other threads:[~2015-12-03 10:35 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-03 10:32 [PATCH 0/7] perf: Untangle aux refcounting Alexander Shishkin
2015-12-03 10:32 ` [PATCH 1/7] perf: Refuse to begin aux transaction after aux_mmap_count drops Alexander Shishkin
2015-12-03 10:32 ` [PATCH 2/7] perf: Generalize task_function_call()ers Alexander Shishkin
2015-12-03 17:34   ` Peter Zijlstra
2015-12-08 16:42     ` Alexander Shishkin
2015-12-08 16:57       ` Peter Zijlstra
2015-12-17 13:40         ` Peter Zijlstra
2015-12-17 14:25           ` Alexander Shishkin
2015-12-17 15:07             ` Peter Zijlstra
2015-12-18  9:01               ` Peter Zijlstra
2015-12-18 15:07                 ` Alexander Shishkin
2015-12-18 16:47                   ` Peter Zijlstra
2015-12-18 17:41                     ` Alexander Shishkin
2015-12-21 14:39                 ` Alexander Shishkin
2016-01-11 10:44                 ` Alexander Shishkin
2015-12-03 10:32 ` [PATCH 3/7] perf: Add a helper to stop running events Alexander Shishkin
2015-12-03 10:32 ` Alexander Shishkin [this message]
2015-12-04 17:02   ` [PATCH 4/7] perf: Free aux pages in unmap path Peter Zijlstra
2015-12-04 22:17     ` Peter Zijlstra
2015-12-07 16:16       ` Peter Zijlstra
2015-12-09  9:57     ` Alexander Shishkin
2015-12-09 10:56       ` Peter Zijlstra
2015-12-10 11:20         ` Alexander Shishkin
2015-12-10 12:58           ` Alexander Shishkin
2015-12-03 10:32 ` [PATCH 5/7] perf: Document aux api usage Alexander Shishkin
2015-12-03 20:36   ` Mathieu Poirier
2015-12-03 10:32 ` [PATCH 6/7] perf/x86/intel/pt: Move transaction start/stop to pmu start/stop callbacks Alexander Shishkin
2015-12-03 10:32 ` [PATCH 7/7] perf/x86/intel/bts: Move transaction start/stop to " Alexander Shishkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1449138762-15194-5-git-send-email-alexander.shishkin@linux.intel.com \
    --to=alexander.shishkin@linux.intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@infradead.org \
    --cc=eranian@google.com \
    --cc=johannes@sipsolutions.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=vince@deater.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.