linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Andrea Arcangeli <aarcange@redhat.com>,
	Avi Kivity <avi@redhat.com>, Thomas Gleixner <tglx@linutronix.de>,
	Rik van Riel <riel@redhat.com>, Ingo Molnar <mingo@elte.hu>,
	akpm@linux-foundation.org,
	Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	David Miller <davem@davemloft.net>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Mel Gorman <mel@csn.ul.ie>, Nick Piggin <npiggin@suse.de>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Subject: [PATCH 23/28] sparc64: Implement get_user_pages_fast().
Date: Mon, 07 Jun 2010 13:07:17 +0200	[thread overview]
Message-ID: <20100607111408.500962980@chello.nl> (raw)
In-Reply-To: 20100607110654.606530953@chello.nl

[-- Attachment #1: davem-sparc64-Implement_get_user_pages_fast.patch --]
[-- Type: text/plain, Size: 5486 bytes --]

From: David S. Miller <davem@davemloft.net>

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100408.180015.30977694.davem@davemloft.net>
---
 arch/sparc/mm/Makefile |    2 
 arch/sparc/mm/gup.c    |  181 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 arch/sparc/mm/gup.c

Index: linux-2.6/arch/sparc/mm/Makefile
===================================================================
--- linux-2.6.orig/arch/sparc/mm/Makefile
+++ linux-2.6/arch/sparc/mm/Makefile
@@ -4,7 +4,7 @@
 asflags-y := -ansi
 ccflags-y := -Werror
 
-obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o
+obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o gup.o
 obj-y                   += fault_$(BITS).o
 obj-y                   += init_$(BITS).o
 obj-$(CONFIG_SPARC32)   += loadmmu.o
Index: linux-2.6/arch/sparc/mm/gup.c
===================================================================
--- /dev/null
+++ linux-2.6/arch/sparc/mm/gup.c
@@ -0,0 +1,181 @@
+/*
+ * Lockless get_user_pages_fast for sparc, cribbed from powerpc
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long mask, result;
+	pte_t *ptep;
+
+	if (tlb_type == hypervisor) {
+		result = _PAGE_PRESENT_4V|_PAGE_P_4V;
+		if (write)
+			result |= _PAGE_WRITE_4V;
+	} else {
+		result = _PAGE_PRESENT_4U|_PAGE_P_4U;
+		if (write)
+			result |= _PAGE_WRITE_4U;
+	}
+	mask = result | _PAGE_SPECIAL;
+
+	ptep = pte_offset_kernel(&pmd, addr);
+	do {
+		struct page *page, *head;
+		pte_t pte = *ptep;
+
+		if ((pte_val(pte) & mask) != result)
+			return 0;
+		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+		/* The hugepage case is simplified on sparc64 because
+		 * we encode the sub-page pfn offsets into the
+		 * hugepage PTEs.  We could optimize this in the future
+		 * use page_cache_add_speculative() for the hugepage case.
+		 */
+		page = pte_page(pte);
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
+			return 0;
+		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+			put_page(head);
+			return 0;
+		}
+
+		pages[*nr] = page;
+		(*nr)++;
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+
+	return 1;
+}
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pmd_t *pmdp;
+
+	pmdp = pmd_offset(&pud, addr);
+	do {
+		pmd_t pmd = *pmdp;
+
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(pmd))
+			return 0;
+		if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+			return 0;
+	} while (pmdp++, addr = next, addr != end);
+
+	return 1;
+}
+
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pud_t *pudp;
+
+	pudp = pud_offset(&pgd, addr);
+	do {
+		pud_t pud = *pudp;
+
+		next = pud_addr_end(addr, end);
+		if (pud_none(pud))
+			return 0;
+		if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+			return 0;
+	} while (pudp++, addr = next, addr != end);
+
+	return 1;
+}
+
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	pgd_t *pgdp;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	/*
+	 * XXX: batch / limit 'nr', to avoid large irq off latency
+	 * needs some instrumenting to determine the common sizes used by
+	 * important workloads (eg. DB2), and whether limiting the batch size
+	 * will decrease performance.
+	 *
+	 * It seems like we're in the clear for the moment. Direct-IO is
+	 * the main guy that batches up lots of get_user_pages, and even
+	 * they are limited to 64-at-a-time which is not so many.
+	 */
+	/*
+	 * This doesn't prevent pagetable teardown, but does prevent
+	 * the pagetables from being freed on sparc.
+	 *
+	 * So long as we atomically load page table pointers versus teardown,
+	 * we can follow the address down to the the page and take a ref on it.
+	 */
+	local_irq_disable();
+
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			goto slow;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			goto slow;
+	} while (pgdp++, addr = next, addr != end);
+
+	local_irq_enable();
+
+	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+	return nr;
+
+	{
+		int ret;
+
+slow:
+		local_irq_enable();
+
+		/* Try to get the remaining pages with get_user_pages */
+		start += nr << PAGE_SHIFT;
+		pages += nr;
+
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm, start,
+			(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+		up_read(&mm->mmap_sem);
+
+		/* Have to be a bit careful with return values */
+		if (nr > 0) {
+			if (ret < 0)
+				ret = nr;
+			else
+				ret += nr;
+		}
+
+		return ret;
+	}
+}



  parent reply	other threads:[~2010-06-07 11:20 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-07 11:06 [PATCH 00/28] mm: preemptibility -v3 Peter Zijlstra
2010-06-07 11:06 ` [PATCH 01/28] powerpc: Use call_rcu_sched() for pagetables Peter Zijlstra
2010-06-07 11:06 ` [PATCH 02/28] mm: Improve page_lock_anon_vma() comment Peter Zijlstra
2010-06-09 10:50   ` Mel Gorman
2010-06-07 11:06 ` [PATCH 03/28] rename anon_vma_lock to vma_lock_anon_vma Peter Zijlstra
2010-06-07 15:01   ` Peter Zijlstra
2010-06-09 10:59   ` Mel Gorman
2010-06-07 11:06 ` [PATCH 04/28] change direct call of spin_lock(anon_vma->lock) to inline function Peter Zijlstra
2010-06-07 11:06 ` [PATCH 05/28] track the root (oldest) anon_vma Peter Zijlstra
2010-06-07 11:07 ` [PATCH 06/28] always lock " Peter Zijlstra
2010-06-07 11:07 ` [PATCH 07/28] extend KSM refcounts to the anon_vma root Peter Zijlstra
2010-06-09 10:55   ` Mel Gorman
2010-06-07 11:07 ` [PATCH 08/28] mm: Rename drop_anon_vma to put_anon_vma Peter Zijlstra
2010-06-07 11:07 ` [PATCH 09/28] mm: Move anon_vma ref out from under CONFIG_KSM Peter Zijlstra
2010-06-09 11:05   ` Mel Gorman
2010-06-07 11:07 ` [PATCH 10/28] mm: Make use of the anon_vma ref count Peter Zijlstra
2010-06-09 13:22   ` Mel Gorman
2010-06-09 13:32     ` Peter Zijlstra
2010-06-07 11:07 ` [PATCH 11/28] mm: Preemptible mmu_gather Peter Zijlstra
2010-06-07 11:07 ` [PATCH 12/28] powerpc: " Peter Zijlstra
2010-06-07 11:07 ` [PATCH 13/28] sparc: " Peter Zijlstra
2010-06-07 11:07 ` [PATCH 14/28] s390: preemptible mmu_gather Peter Zijlstra
2010-06-07 11:07 ` [PATCH 15/28] arm: Preemptible mmu_gather Peter Zijlstra
2010-06-07 11:07 ` [PATCH 16/28] sh: " Peter Zijlstra
2010-06-07 11:38   ` Peter Zijlstra
2010-06-07 11:07 ` [PATCH 17/28] um: " Peter Zijlstra
2010-06-07 11:07 ` [PATCH 18/28] ia64: " Peter Zijlstra
2010-06-07 11:07 ` [PATCH 19/28] mm, powerpc: Move the RCU page-table freeing into generic code Peter Zijlstra
2010-06-07 11:07 ` [PATCH 20/28] sparc64: Kill page table quicklists Peter Zijlstra
2010-06-07 11:07 ` [PATCH 21/28] sparc64: Use RCU page table freeing Peter Zijlstra
2010-06-07 11:07 ` [PATCH 22/28] sparc64: Add support for _PAGE_SPECIAL Peter Zijlstra
2010-06-07 11:07 ` Peter Zijlstra [this message]
2010-06-07 11:07 ` [PATCH 24/28] lockdep, mutex: Provide mutex_lock_nest_lock Peter Zijlstra
2010-06-07 11:07 ` [PATCH 25/28] mutex: Provide mutex_is_contended Peter Zijlstra
2010-06-07 11:07 ` [PATCH 26/28] mm: Convert i_mmap_lock and anon_vma->lock to mutexes Peter Zijlstra
2010-06-07 11:07 ` [PATCH 27/28] mm: Extended batches for generic mmu_gather Peter Zijlstra
2010-06-07 11:07 ` [PATCH 28/28] mm: Optimize page_lock_anon_vma() fast-path Peter Zijlstra
2010-06-07 13:57 ` [PATCH 00/28] mm: preemptibility -v3 Sam Ravnborg
2010-06-07 15:02   ` Peter Zijlstra
2010-06-07 16:36 ` Andi Kleen
2010-06-07 16:39   ` Peter Zijlstra
2010-06-10  1:45 ` Zhang, Yanmin
2010-06-10  6:52   ` Peter Zijlstra
2010-06-10  6:59     ` Zhang, Yanmin
2010-06-21 10:21       ` Peter Zijlstra
2010-06-24  9:55         ` Peter Zijlstra
2010-06-29  7:40           ` Zhang, Yanmin
2010-06-29  7:48             ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100607111408.500962980@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=avi@redhat.com \
    --cc=benh@kernel.crashing.org \
    --cc=davem@davemloft.net \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mel@csn.ul.ie \
    --cc=mingo@elte.hu \
    --cc=npiggin@suse.de \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).