All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 0/2] perf vs d-cache aliasses
@ 2009-09-21 14:08 Peter Zijlstra
  2009-09-21 14:08 ` [RFC][PATCH 1/2] perf: Provide weak interfaces to mmap() backing Peter Zijlstra
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Peter Zijlstra @ 2009-09-21 14:08 UTC (permalink / raw)
  To: Ingo Molnar, Paul Mackerras, David Miller
  Cc: Andrew Morton, Jens Axboe, linux-kernel, Peter Zijlstra


Hi David,

based on your vmalloc code, an alternative implementation that lets x86 stay
with regular allocations.

While I'm sad to have to propose having two implementations around, I see no
other way to please both sides.

Alternative to placing the code in arch/sparc we could do something like

CONFIG_PERF_WANTS_VMALLOC and place both somewhere in kernel/




^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC][PATCH 1/2] perf: Provide weak interfaces to mmap() backing
  2009-09-21 14:08 [RFC][PATCH 0/2] perf vs d-cache aliasses Peter Zijlstra
@ 2009-09-21 14:08 ` Peter Zijlstra
  2009-09-21 14:08 ` [RFC][PATCH 2/2] perf,sparc: Use vmalloc to back the mmap() array Peter Zijlstra
  2009-09-21 17:10 ` [RFC][PATCH 0/2] perf vs d-cache aliasses David Miller
  2 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2009-09-21 14:08 UTC (permalink / raw)
  To: Ingo Molnar, Paul Mackerras, David Miller
  Cc: Andrew Morton, Jens Axboe, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf-weak-mmap-alloc.patch --]
[-- Type: text/plain, Size: 6520 bytes --]

Some architectures such as Sparc, ARM and MIPS (basically everything
with flush_dcache_page()) need to deal with dcache aliases by
carefully placing pages in both kernel and user maps.

These architectures typically have to use vmalloc_user() for this.

However, on other architectures, vmalloc() is not needed and has the
downsides of being more restricted and slower than regular
allocations.

Hence reshape the code so that we can do either by over-riding a few
simply functions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
---
 include/linux/perf_counter.h |   18 ++++++++++
 kernel/perf_counter.c        |   72 ++++++++++++++++++++++++++++---------------
 2 files changed, 66 insertions(+), 24 deletions(-)

Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -513,6 +513,7 @@ struct file;
 
 struct perf_mmap_data {
 	struct rcu_head			rcu_head;
+	struct work_struct		work;
 	int				nr_pages;	/* nr of data pages  */
 	int				writable;	/* are we writable   */
 	int				nr_locked;	/* nr pages mlocked  */
@@ -533,6 +534,23 @@ struct perf_mmap_data {
 	void				*data_pages[0];
 };
 
+/*
+ * The below three functions dealing with the mmap() backing are weak
+ * functions, which allow the arch implementation to over-ride them:
+ *
+ *   struct page *perf_mmap_to_page(void *addr);
+ *   struct perf_mmap_data *
+ *   perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages);
+ *   void perf_mmap_data_free(struct perf_mmap_data *data);
+ *
+ * They default to allocating memory using get_zeroed_page(GFP_KERNEL).
+ */
+
+struct page *perf_mmap_to_page(void *addr);
+struct perf_mmap_data *
+perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages);
+void perf_mmap_data_free(struct perf_mmap_data *data);
+
 struct perf_pending_entry {
 	struct perf_pending_entry *next;
 	void (*func)(struct perf_pending_entry *);
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -2106,6 +2106,11 @@ unlock:
 	rcu_read_unlock();
 }
 
+struct page * __weak perf_mmap_to_page(void *addr)
+{
+	return virt_to_page(addr);
+}
+
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct perf_counter *counter = vma->vm_file->private_data;
@@ -2124,7 +2129,7 @@ static int perf_mmap_fault(struct vm_are
 		goto unlock;
 
 	if (vmf->pgoff == 0) {
-		vmf->page = virt_to_page(data->user_page);
+		vmf->page = perf_mmap_to_page(data->user_page);
 	} else {
 		int nr = vmf->pgoff - 1;
 
@@ -2134,7 +2139,7 @@ static int perf_mmap_fault(struct vm_are
 		if (vmf->flags & FAULT_FLAG_WRITE)
 			goto unlock;
 
-		vmf->page = virt_to_page(data->data_pages[nr]);
+		vmf->page = perf_mmap_to_page(data->data_pages[nr]);
 	}
 
 	get_page(vmf->page);
@@ -2148,7 +2153,26 @@ unlock:
 	return ret;
 }
 
-static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
+void perf_mmap_data_init(struct perf_counter *counter, struct perf_mmap_data *data)
+{
+	long max_size = PAGE_SIZE * data->nr_pages;
+
+	atomic_set(&data->lock, -1);
+
+	if (counter->attr.watermark) {
+		data->watermark = min_t(long, max_size,
+					counter->attr.wakeup_watermark);
+	}
+
+	if (!data->watermark)
+		data->watermark = max_t(long, PAGE_SIZE, max_size / 2);
+
+
+	rcu_assign_pointer(counter->data, data);
+}
+
+struct perf_mmap_data * __weak
+perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
 {
 	struct perf_mmap_data *data;
 	unsigned long size;
@@ -2174,18 +2198,8 @@ static int perf_mmap_data_alloc(struct p
 	}
 
 	data->nr_pages = nr_pages;
-	atomic_set(&data->lock, -1);
 
-	if (counter->attr.watermark) {
-		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
-				      counter->attr.wakeup_watermark);
-	}
-	if (!data->watermark)
-		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
-
-	rcu_assign_pointer(counter->data, data);
-
-	return 0;
+	return data;
 
 fail_data_pages:
 	for (i--; i >= 0; i--)
@@ -2197,7 +2211,7 @@ fail_user_page:
 	kfree(data);
 
 fail:
-	return -ENOMEM;
+	return NULL;
 }
 
 static void perf_mmap_free_page(unsigned long addr)
@@ -2208,28 +2222,32 @@ static void perf_mmap_free_page(unsigned
 	__free_page(page);
 }
 
-static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+void __weak perf_mmap_data_free(struct perf_mmap_data *data)
 {
-	struct perf_mmap_data *data;
 	int i;
 
-	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-
 	perf_mmap_free_page((unsigned long)data->user_page);
 	for (i = 0; i < data->nr_pages; i++)
 		perf_mmap_free_page((unsigned long)data->data_pages[i]);
+}
 
+static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+{
+	struct perf_mmap_data *data;
+
+	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+	perf_mmap_data_free(data);
 	kfree(data);
 }
 
-static void perf_mmap_data_free(struct perf_counter *counter)
+static void perf_mmap_data_release(struct perf_counter *counter)
 {
 	struct perf_mmap_data *data = counter->data;
 
 	WARN_ON(atomic_read(&counter->mmap_count));
 
 	rcu_assign_pointer(counter->data, NULL);
-	call_rcu(&data->rcu_head, __perf_mmap_data_free);
+	call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2249,7 +2267,7 @@ static void perf_mmap_close(struct vm_ar
 
 		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= counter->data->nr_locked;
-		perf_mmap_data_free(counter);
+		perf_mmap_data_release(counter);
 		mutex_unlock(&counter->mmap_mutex);
 	}
 }
@@ -2267,6 +2285,7 @@ static int perf_mmap(struct file *file, 
 	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
 	unsigned long locked, lock_limit;
+	struct perf_mmap_data *data;
 	unsigned long vma_size;
 	unsigned long nr_pages;
 	long user_extra, extra;
@@ -2329,10 +2348,15 @@ static int perf_mmap(struct file *file, 
 	}
 
 	WARN_ON(counter->data);
-	ret = perf_mmap_data_alloc(counter, nr_pages);
-	if (ret)
+
+	data = perf_mmap_data_alloc(counter, nr_pages);
+	ret = -ENOMEM;
+	if (!data)
 		goto unlock;
 
+	ret = 0;
+	perf_mmap_data_init(counter, data);
+
 	atomic_set(&counter->mmap_count, 1);
 	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;

-- 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [RFC][PATCH 2/2] perf,sparc: Use vmalloc to back the mmap() array
  2009-09-21 14:08 [RFC][PATCH 0/2] perf vs d-cache aliasses Peter Zijlstra
  2009-09-21 14:08 ` [RFC][PATCH 1/2] perf: Provide weak interfaces to mmap() backing Peter Zijlstra
@ 2009-09-21 14:08 ` Peter Zijlstra
  2009-09-21 17:10 ` [RFC][PATCH 0/2] perf vs d-cache aliasses David Miller
  2 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2009-09-21 14:08 UTC (permalink / raw)
  To: Ingo Molnar, Paul Mackerras, David Miller
  Cc: Andrew Morton, Jens Axboe, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf-sparc-mmap-vmalloc.patch --]
[-- Type: text/plain, Size: 2389 bytes --]

Implement vmalloc() backed storage for perf_mmap().

Alternatively we could put this code in kernel/perf_counter_vmalloc.c
and conditionally compile that or something, since there really isn't
anything Sparc specific about it.

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
---
 arch/sparc/kernel/perf_counter.c |   72 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

Index: linux-2.6/arch/sparc/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/perf_counter.c
+++ linux-2.6/arch/sparc/kernel/perf_counter.c
@@ -14,6 +14,7 @@
 
 #include <linux/perf_counter.h>
 #include <linux/kprobes.h>
+#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/kdebug.h>
 #include <linux/mutex.h>
@@ -554,3 +555,74 @@ void __init init_hw_perf_counters(void)
 
 	register_die_notifier(&perf_counter_nmi_notifier);
 }
+
+struct page *perf_mmap_to_page(void *addr)
+{
+	return vmalloc_to_page(addr);
+}
+
+static void perf_mmap_unmark_page(void *addr)
+{
+	struct page *page = vmalloc_to_page(addr);
+
+	page->mapping = NULL;
+}
+
+static void perf_mmap_data_free_work(struct work_struct *work)
+{
+	struct perf_mmap_data *data;
+	void *base;
+	int i;
+
+	data = container_of(work, struct perf_mmap_data, work);
+
+	base = data->user_page;
+	for (i = 0; i < data->nr_pages + 1; i++)
+		perf_mmap_unmark_page(base + (i * PAGE_SIZE));
+
+	vfree(base);
+}
+
+void perf_mmap_data_free(struct perf_mmap_data *data)
+{
+	schedule_work(&data->work);
+}
+
+struct perf_mmap_data *
+perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
+{
+	struct perf_mmap_data *data;
+	unsigned long size;
+	void *all_buf;
+	int i;
+
+	WARN_ON(atomic_read(&counter->mmap_count));
+
+	size = sizeof(struct perf_mmap_data);
+	size += nr_pages * sizeof(void *);
+
+	data = kzalloc(size, GFP_KERNEL);
+	if (!data)
+		goto fail;
+
+	INIT_WORK(&data->work, perf_mmap_data_free_work);
+
+	all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
+	if (!all_buf)
+		goto fail_all_buf;
+
+	data->user_page = all_buf;
+
+	for (i = 0; i < nr_pages; i++)
+		data->data_pages[i] = all_buf + ((i + 1) * PAGE_SIZE);
+
+	data->nr_pages = nr_pages;
+
+	return data;
+
+fail_all_buf:
+	kfree(data);
+
+fail:
+	return NULL;
+}

-- 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC][PATCH 0/2] perf vs d-cache aliasses
  2009-09-21 14:08 [RFC][PATCH 0/2] perf vs d-cache aliasses Peter Zijlstra
  2009-09-21 14:08 ` [RFC][PATCH 1/2] perf: Provide weak interfaces to mmap() backing Peter Zijlstra
  2009-09-21 14:08 ` [RFC][PATCH 2/2] perf,sparc: Use vmalloc to back the mmap() array Peter Zijlstra
@ 2009-09-21 17:10 ` David Miller
  2009-09-22  9:02   ` Ingo Molnar
  2 siblings, 1 reply; 5+ messages in thread
From: David Miller @ 2009-09-21 17:10 UTC (permalink / raw)
  To: a.p.zijlstra; +Cc: mingo, paulus, akpm, jens.axboe, linux-kernel

From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 21 Sep 2009 16:08:48 +0200

> 
> based on your vmalloc code, an alternative implementation that lets x86 stay
> with regular allocations.
> 
> While I'm sad to have to propose having two implementations around, I see no
> other way to please both sides.

If you guys want to maintain two implementations of the same thing,
that's fine with me :)


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC][PATCH 0/2] perf vs d-cache aliasses
  2009-09-21 17:10 ` [RFC][PATCH 0/2] perf vs d-cache aliasses David Miller
@ 2009-09-22  9:02   ` Ingo Molnar
  0 siblings, 0 replies; 5+ messages in thread
From: Ingo Molnar @ 2009-09-22  9:02 UTC (permalink / raw)
  To: David Miller; +Cc: a.p.zijlstra, paulus, akpm, jens.axboe, linux-kernel


* David Miller <davem@davemloft.net> wrote:

> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Date: Mon, 21 Sep 2009 16:08:48 +0200
> 
> > 
> > based on your vmalloc code, an alternative implementation that lets 
> > x86 stay with regular allocations.
> > 
> > While I'm sad to have to propose having two implementations around, 
> > I see no other way to please both sides.
> 
> If you guys want to maintain two implementations of the same thing, 
> that's fine with me :)

I take this as an acked-by :-)

I'll reshape these patches on top of the counters -> events rename.

	Ingo

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-09-22  9:03 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-21 14:08 [RFC][PATCH 0/2] perf vs d-cache aliasses Peter Zijlstra
2009-09-21 14:08 ` [RFC][PATCH 1/2] perf: Provide weak interfaces to mmap() backing Peter Zijlstra
2009-09-21 14:08 ` [RFC][PATCH 2/2] perf,sparc: Use vmalloc to back the mmap() array Peter Zijlstra
2009-09-21 17:10 ` [RFC][PATCH 0/2] perf vs d-cache aliasses David Miller
2009-09-22  9:02   ` Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.