All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@linux-foundation.org>
To: akpm@linux-foundation.org, arjunroy@google.com,
	davem@davemloft.net, edumazet@google.com, jgg@ziepe.ca,
	linux-mm@kvack.org, mm-commits@vger.kernel.org,
	sfr@canb.auug.org.au, soheil@google.com,
	torvalds@linux-foundation.org, willy@infradead.org
Subject: [patch 15/35] mm/memory.c: add vm_insert_pages()
Date: Fri, 10 Apr 2020 14:33:01 -0700	[thread overview]
Message-ID: <20200410213301.Nh4ENtuKn%akpm@linux-foundation.org> (raw)
In-Reply-To: <20200410143047.bf34a933ce1affdc042c7c80@linux-foundation.org>

From: Arjun Roy <arjunroy@google.com>
Subject: mm/memory.c: add vm_insert_pages()

Add the ability to insert multiple pages at once to a user VM with lower
PTE spinlock operations.

The intention of this patch-set is to reduce atomic ops for tcp zerocopy
receives, which normally hits the same spinlock multiple times
consecutively.

[akpm@linux-foundation.org: pte_alloc() no longer takes the `addr' argument]
[arjunroy@google.com: add missing page_count() check to vm_insert_pages()]
  Link: http://lkml.kernel.org/r/20200214005929.104481-1-arjunroy.kdev@gmail.com
[arjunroy@google.com: vm_insert_pages() checks if pte_index defined]
  Link: http://lkml.kernel.org/r/20200228054714.204424-2-arjunroy.kdev@gmail.com
Link: http://lkml.kernel.org/r/20200128025958.43490-2-arjunroy.kdev@gmail.com
Signed-off-by: Arjun Roy <arjunroy@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 include/linux/mm.h |    2 
 mm/memory.c        |  129 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 129 insertions(+), 2 deletions(-)

--- a/include/linux/mm.h~mm-add-vm_insert_pages
+++ a/include/linux/mm.h
@@ -2689,6 +2689,8 @@ struct vm_area_struct *find_extend_vma(s
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
+int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
+			struct page **pages, unsigned long *num);
 int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
 				unsigned long num);
 int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
--- a/mm/memory.c~mm-add-vm_insert_pages
+++ a/mm/memory.c
@@ -1419,8 +1419,7 @@ void zap_vma_ptes(struct vm_area_struct
 }
 EXPORT_SYMBOL_GPL(zap_vma_ptes);
 
-pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
-			spinlock_t **ptl)
+static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
@@ -1439,6 +1438,16 @@ pte_t *__get_locked_pte(struct mm_struct
 		return NULL;
 
 	VM_BUG_ON(pmd_trans_huge(*pmd));
+	return pmd;
+}
+
+pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
+			spinlock_t **ptl)
+{
+	pmd_t *pmd = walk_to_pmd(mm, addr);
+
+	if (!pmd)
+		return NULL;
 	return pte_alloc_map_lock(mm, pmd, addr, ptl);
 }
 
@@ -1491,6 +1500,122 @@ out:
 	return retval;
 }
 
+#ifdef pte_index
+static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long addr, struct page *page, pgprot_t prot)
+{
+	int err;
+
+	if (!page_count(page))
+		return -EINVAL;
+	err = validate_page_before_insert(page);
+	return err ? err : insert_page_into_pte_locked(
+		mm, pte_offset_map(pmd, addr), addr, page, prot);
+}
+
+/* insert_pages() amortizes the cost of spinlock operations
+ * when inserting pages in a loop. Arch *must* define pte_index.
+ */
+static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
+			struct page **pages, unsigned long *num, pgprot_t prot)
+{
+	pmd_t *pmd = NULL;
+	spinlock_t *pte_lock = NULL;
+	struct mm_struct *const mm = vma->vm_mm;
+	unsigned long curr_page_idx = 0;
+	unsigned long remaining_pages_total = *num;
+	unsigned long pages_to_write_in_pmd;
+	int ret;
+more:
+	ret = -EFAULT;
+	pmd = walk_to_pmd(mm, addr);
+	if (!pmd)
+		goto out;
+
+	pages_to_write_in_pmd = min_t(unsigned long,
+		remaining_pages_total, PTRS_PER_PTE - pte_index(addr));
+
+	/* Allocate the PTE if necessary; takes PMD lock once only. */
+	ret = -ENOMEM;
+	if (pte_alloc(mm, pmd))
+		goto out;
+	pte_lock = pte_lockptr(mm, pmd);
+
+	while (pages_to_write_in_pmd) {
+		int pte_idx = 0;
+		const int batch_size = min_t(int, pages_to_write_in_pmd, 8);
+
+		spin_lock(pte_lock);
+		for (; pte_idx < batch_size; ++pte_idx) {
+			int err = insert_page_in_batch_locked(mm, pmd,
+				addr, pages[curr_page_idx], prot);
+			if (unlikely(err)) {
+				spin_unlock(pte_lock);
+				ret = err;
+				remaining_pages_total -= pte_idx;
+				goto out;
+			}
+			addr += PAGE_SIZE;
+			++curr_page_idx;
+		}
+		spin_unlock(pte_lock);
+		pages_to_write_in_pmd -= batch_size;
+		remaining_pages_total -= batch_size;
+	}
+	if (remaining_pages_total)
+		goto more;
+	ret = 0;
+out:
+	*num = remaining_pages_total;
+	return ret;
+}
+#endif  /* ifdef pte_index */
+
+/**
+ * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
+ * @vma: user vma to map to
+ * @addr: target start user address of these pages
+ * @pages: source kernel pages
+ * @num: in: number of pages to map. out: number of pages that were *not*
+ * mapped. (0 means all pages were successfully mapped).
+ *
+ * Preferred over vm_insert_page() when inserting multiple pages.
+ *
+ * In case of error, we may have mapped a subset of the provided
+ * pages. It is the caller's responsibility to account for this case.
+ *
+ * The same restrictions apply as in vm_insert_page().
+ */
+int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
+			struct page **pages, unsigned long *num)
+{
+#ifdef pte_index
+	const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1;
+
+	if (addr < vma->vm_start || end_addr >= vma->vm_end)
+		return -EFAULT;
+	if (!(vma->vm_flags & VM_MIXEDMAP)) {
+		BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem));
+		BUG_ON(vma->vm_flags & VM_PFNMAP);
+		vma->vm_flags |= VM_MIXEDMAP;
+	}
+	/* Defer page refcount checking till we're about to map that page. */
+	return insert_pages(vma, addr, pages, num, vma->vm_page_prot);
+#else
+	unsigned long idx = 0, pgcount = *num;
+	int err;
+
+	for (; idx < pgcount; ++idx) {
+		err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]);
+		if (err)
+			break;
+	}
+	*num = pgcount - idx;
+	return err;
+#endif  /* ifdef pte_index */
+}
+EXPORT_SYMBOL(vm_insert_pages);
+
 /**
  * vm_insert_page - insert single page into user vma
  * @vma: user vma to map to
_

  parent reply	other threads:[~2020-04-10 21:33 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-10 21:30 incoming Andrew Morton
2020-04-10 21:32 ` [patch 01/35] hfsplus: fix crash and filesystem corruption when deleting files Andrew Morton
2020-04-10 21:32 ` [patch 02/35] mm, memcg: do not high throttle allocators based on wraparound Andrew Morton
2020-04-10 21:32 ` [patch 03/35] mm, slab_common: fix a typo in comment "eariler"->"earlier" Andrew Morton
2020-04-10 21:32 ` [patch 04/35] docs: mm: slab.h: fix a broken cross-reference Andrew Morton
2020-04-10 21:32 ` [patch 05/35] mm/page_alloc.c: fix kernel-doc warning Andrew Morton
2020-04-10 21:32 ` [patch 06/35] mm/page_alloc: make pcpu_drain_mutex and pcpu_drain static Andrew Morton
2020-04-10 21:32 ` [patch 07/35] mm/gup: fix null pointer dereference detected by coverity Andrew Morton
2020-04-10 22:24   ` Linus Torvalds
2020-04-10 23:53     ` Peter Xu
2020-04-11  0:19       ` Linus Torvalds
2020-04-14  4:04     ` Miles Chen
2020-04-10 21:32 ` [patch 08/35] ocfs2: no need try to truncate file beyond i_size Andrew Morton
2020-04-10 21:32 ` [patch 09/35] mm: cma: NUMA node interface Andrew Morton
2020-04-10 21:32 ` [patch 10/35] mm: hugetlb: optionally allocate gigantic hugepages using cma Andrew Morton
2020-04-10 21:32 ` [patch 11/35] mm/mmap.c: initialize align_offset explicitly for vm_unmapped_area Andrew Morton
2020-04-10 21:32 ` [patch 12/35] mm/memory.c: refactor insert_page to prepare for batched-lock insert Andrew Morton
2020-04-10 21:32   ` Andrew Morton
2020-04-10 21:32 ` [patch 13/35] mm: bring sparc pte_index() semantics inline with other platforms Andrew Morton
2020-04-10 21:32 ` [patch 14/35] mm: define pte_index as macro for x86 Andrew Morton
2020-04-10 21:33 ` Andrew Morton [this message]
2020-04-10 21:33 ` [patch 16/35] mm/vma: define a default value for VM_DATA_DEFAULT_FLAGS Andrew Morton
2020-04-10 21:33   ` Andrew Morton
2020-04-10 21:33 ` [patch 17/35] mm/vma: introduce VM_ACCESS_FLAGS Andrew Morton
2020-04-10 21:33 ` [patch 18/35] mm/special: create generic fallbacks for pte_special() and pte_mkspecial() Andrew Morton
2020-04-10 21:33 ` [patch 19/35] mm/memory_hotplug: drop the flags field from struct mhp_restrictions Andrew Morton
2020-04-10 21:33 ` [patch 20/35] mm/memory_hotplug: rename mhp_restrictions to mhp_params Andrew Morton
2020-04-10 21:33 ` [patch 21/35] x86/mm: thread pgprot_t through init_memory_mapping() Andrew Morton
2020-04-10 21:33 ` [patch 22/35] x86/mm: introduce __set_memory_prot() Andrew Morton
2020-04-10 21:33 ` [patch 23/35] powerpc/mm: thread pgprot_t through create_section_mapping() Andrew Morton
2020-04-10 21:33 ` [patch 24/35] mm/memory_hotplug: add pgprot_t to mhp_params Andrew Morton
2020-04-10 21:33 ` [patch 25/35] mm/memremap: set caching mode for PCI P2PDMA memory to WC Andrew Morton
2020-04-10 21:33 ` [patch 26/35] kmod: make request_module() return an error when autoloading is disabled Andrew Morton
2020-04-10 21:33 ` [patch 27/35] fs/filesystems.c: downgrade user-reachable WARN_ONCE() to pr_warn_once() Andrew Morton
2020-04-10 21:33 ` Andrew Morton
2020-04-10 21:33 ` [patch 28/35] docs: admin-guide: document the kernel.modprobe sysctl Andrew Morton
2020-04-10 21:33 ` [patch 29/35] selftests: kmod: fix handling test numbers above 9 Andrew Morton
2020-04-10 21:33 ` [patch 30/35] selftests: kmod: test disabling module autoloading Andrew Morton
2020-04-10 21:34 ` [patch 31/35] change email address for Pali Rohár Andrew Morton
2020-04-10 21:44   ` Joe Perches
2020-04-10 21:34 ` [patch 32/35] drivers/dma/tegra20-apb-dma.c: fix platform_get_irq.cocci warnings Andrew Morton
2020-04-13 17:54   ` Jon Hunter
2020-04-10 21:34 ` [patch 33/35] fs/seq_file.c: seq_read(): add info message about buggy .next functions Andrew Morton
2020-04-10 21:34 ` [patch 34/35] kernel/gcov/fs.c: gcov_seq_next() should increase position index Andrew Morton
2020-04-10 21:34 ` [patch 35/35] ipc/util.c: sysvipc_find_ipc() " Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200410213301.Nh4ENtuKn%akpm@linux-foundation.org \
    --to=akpm@linux-foundation.org \
    --cc=arjunroy@google.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=jgg@ziepe.ca \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mm-commits@vger.kernel.org \
    --cc=sfr@canb.auug.org.au \
    --cc=soheil@google.com \
    --cc=torvalds@linux-foundation.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.