All of lore.kernel.org
 help / color / mirror / Atom feed
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
To: akpm@linux-foundation.org, mhocko@kernel.org,
	peterz@infradead.org, kirill@shutemov.name, ak@linux.intel.com,
	dave@stgolabs.net, jack@suse.cz,
	Matthew Wilcox <willy@infradead.org>,
	benh@kernel.crashing.org, mpe@ellerman.id.au, paulus@samba.org,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>,
	hpa@zytor.com, Will Deacon <will.deacon@arm.com>,
	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	kemi.wang@intel.com, sergey.senozhatsky.work@gmail.com,
	Daniel Jordan <daniel.m.jordan@oracle.com>,
	David Rientjes <rientjes@google.com>,
	Jerome Glisse <jglisse@redhat.com>,
	Ganesh Mahendran <opensource.ganesh@gmail.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	haren@linux.vnet.ibm.com, khandual@linux.vnet.ibm.com,
	npiggin@gmail.com, bsingharora@gmail.com,
	paulmck@linux.vnet.ibm.com, Tim Chen <tim.c.chen@linux.intel.com>,
	linuxppc-dev@lists.ozlabs.org, x86@kernel.org
Subject: [PATCH v10 08/25] mm: VMA sequence count
Date: Tue, 17 Apr 2018 16:33:14 +0200	[thread overview]
Message-ID: <1523975611-15978-9-git-send-email-ldufour@linux.vnet.ibm.com> (raw)
In-Reply-To: <1523975611-15978-1-git-send-email-ldufour@linux.vnet.ibm.com>

From: Peter Zijlstra <peterz@infradead.org>

Wrap the VMA modifications (vma_adjust/unmap_page_range) with sequence
counts such that we can easily test if a VMA is changed.

The unmap_page_range() one allows us to make assumptions about
page-tables; when we find the seqcount hasn't changed we can assume
page-tables are still valid.

The flip side is that we cannot distinguish between a vma_adjust() and
the unmap_page_range() -- where with the former we could have
re-checked the vma bounds against the address.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

[Port to 4.12 kernel]
[Build depends on CONFIG_SPECULATIVE_PAGE_FAULT]
[Introduce vm_write_* inline function depending on
 CONFIG_SPECULATIVE_PAGE_FAULT]
[Fix lock dependency between mapping->i_mmap_rwsem and vma->vm_sequence by
 using vm_raw_write* functions]
[Fix a lock dependency warning in mmap_region() when entering the error
 path]
[move sequence initialisation INIT_VMA()]
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
---
 include/linux/mm.h       | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm_types.h |  3 +++
 mm/memory.c              |  2 ++
 mm/mmap.c                | 31 +++++++++++++++++++++++++++++++
 4 files changed, 80 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index efc1248b82bd..988daf7030c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1264,6 +1264,9 @@ struct zap_details {
 static inline void INIT_VMA(struct vm_area_struct *vma)
 {
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+	seqcount_init(&vma->vm_sequence);
+#endif
 }
 
 struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1386,6 +1389,47 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+static inline void vm_write_begin(struct vm_area_struct *vma)
+{
+	write_seqcount_begin(&vma->vm_sequence);
+}
+static inline void vm_write_begin_nested(struct vm_area_struct *vma,
+					 int subclass)
+{
+	write_seqcount_begin_nested(&vma->vm_sequence, subclass);
+}
+static inline void vm_write_end(struct vm_area_struct *vma)
+{
+	write_seqcount_end(&vma->vm_sequence);
+}
+static inline void vm_raw_write_begin(struct vm_area_struct *vma)
+{
+	raw_write_seqcount_begin(&vma->vm_sequence);
+}
+static inline void vm_raw_write_end(struct vm_area_struct *vma)
+{
+	raw_write_seqcount_end(&vma->vm_sequence);
+}
+#else
+static inline void vm_write_begin(struct vm_area_struct *vma)
+{
+}
+static inline void vm_write_begin_nested(struct vm_area_struct *vma,
+					 int subclass)
+{
+}
+static inline void vm_write_end(struct vm_area_struct *vma)
+{
+}
+static inline void vm_raw_write_begin(struct vm_area_struct *vma)
+{
+}
+static inline void vm_raw_write_end(struct vm_area_struct *vma)
+{
+}
+#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
+
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
 		void *buf, int len, unsigned int gup_flags);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 21612347d311..db5e9d630e7a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -335,6 +335,9 @@ struct vm_area_struct {
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 #endif
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
+	seqcount_t vm_sequence;
+#endif
 } __randomize_layout;
 
 struct core_thread {
diff --git a/mm/memory.c b/mm/memory.c
index f86efcb8e268..f7fed053df80 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1503,6 +1503,7 @@ void unmap_page_range(struct mmu_gather *tlb,
 	unsigned long next;
 
 	BUG_ON(addr >= end);
+	vm_write_begin(vma);
 	tlb_start_vma(tlb, vma);
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
@@ -1512,6 +1513,7 @@ void unmap_page_range(struct mmu_gather *tlb,
 		next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
 	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
+	vm_write_end(vma);
 }
 
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 8bd9ae1dfacc..813e49589ea1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -692,6 +692,30 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	long adjust_next = 0;
 	int remove_next = 0;
 
+	/*
+	 * Why using vm_raw_write*() functions here to avoid lockdep's warning ?
+	 *
+	 * Locked is complaining about a theoretical lock dependency, involving
+	 * 3 locks:
+	 *   mapping->i_mmap_rwsem --> vma->vm_sequence --> fs_reclaim
+	 *
+	 * Here are the major path leading to this dependency :
+	 *  1. __vma_adjust() mmap_sem  -> vm_sequence -> i_mmap_rwsem
+	 *  2. move_vmap() mmap_sem -> vm_sequence -> fs_reclaim
+	 *  3. __alloc_pages_nodemask() fs_reclaim -> i_mmap_rwsem
+	 *  4. unmap_mapping_range() i_mmap_rwsem -> vm_sequence
+	 *
+	 * So there is no way to solve this easily, especially because in
+	 * unmap_mapping_range() the i_mmap_rwsem is grab while the impacted
+	 * VMAs are not yet known.
+	 * However, the way the vm_seq is used is guarantying that we will
+	 * never block on it since we just check for its value and never wait
+	 * for it to move, see vma_has_changed() and handle_speculative_fault().
+	 */
+	vm_raw_write_begin(vma);
+	if (next)
+		vm_raw_write_begin(next);
+
 	if (next && !insert) {
 		struct vm_area_struct *exporter = NULL, *importer = NULL;
 
@@ -902,6 +926,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 			anon_vma_merge(vma, next);
 		mm->map_count--;
 		mpol_put(vma_policy(next));
+		vm_raw_write_end(next);
 		kmem_cache_free(vm_area_cachep, next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
@@ -916,6 +941,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 			 * "vma->vm_next" gap must be updated.
 			 */
 			next = vma->vm_next;
+			if (next)
+				vm_raw_write_begin(next);
 		} else {
 			/*
 			 * For the scope of the comment "next" and
@@ -962,6 +989,10 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	if (insert && file)
 		uprobe_mmap(insert);
 
+	if (next && next != vma)
+		vm_raw_write_end(next);
+	vm_raw_write_end(vma);
+
 	validate_mm(mm);
 
 	return 0;
-- 
2.7.4

  parent reply	other threads:[~2018-04-17 14:34 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-17 14:33 [PATCH v10 00/25] Speculative page faults Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 01/25] mm: introduce CONFIG_SPECULATIVE_PAGE_FAULT Laurent Dufour
2018-04-23  5:58   ` Minchan Kim
2018-04-23 15:10     ` Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 02/25] x86/mm: define ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT Laurent Dufour
2018-05-08 11:04   ` Punit Agrawal
2018-05-08 11:04     ` Punit Agrawal
2018-05-08 11:04     ` Punit Agrawal
2018-05-14 14:47     ` Laurent Dufour
2018-05-14 15:05       ` Punit Agrawal
2018-05-14 15:05         ` Punit Agrawal
2018-05-14 15:05         ` Punit Agrawal
2018-04-17 14:33 ` [PATCH v10 03/25] powerpc/mm: set ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 04/25] mm: prepare for FAULT_FLAG_SPECULATIVE Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 05/25] mm: introduce pte_spinlock " Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 06/25] mm: make pte_unmap_same compatible with SPF Laurent Dufour
2018-04-23  6:31   ` Minchan Kim
2018-04-30 14:07     ` Laurent Dufour
2018-05-01 13:04       ` Minchan Kim
2018-05-10 16:15   ` vinayak menon
2018-05-14 15:09     ` Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 07/25] mm: introduce INIT_VMA() Laurent Dufour
2018-04-17 14:33 ` Laurent Dufour [this message]
2018-04-23  6:42   ` [PATCH v10 08/25] mm: VMA sequence count Minchan Kim
2018-04-30 15:14     ` Laurent Dufour
2018-05-01 13:16       ` Minchan Kim
2018-05-03 14:45         ` Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 09/25] mm: protect VMA modifications using " Laurent Dufour
2018-04-23  7:19   ` Minchan Kim
2018-05-14 15:25     ` Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 10/25] mm: protect mremap() against SPF hanlder Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 11/25] mm: protect SPF handler against anon_vma changes Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 12/25] mm: cache some VMA fields in the vm_fault structure Laurent Dufour
2018-04-23  7:42   ` Minchan Kim
2018-05-03 12:25     ` Laurent Dufour
2018-05-03 15:42       ` Minchan Kim
2018-05-04  9:10         ` Laurent Dufour
2018-05-08 10:56           ` Minchan Kim
2018-04-17 14:33 ` [PATCH v10 13/25] mm/migrate: Pass vm_fault pointer to migrate_misplaced_page() Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 14/25] mm: introduce __lru_cache_add_active_or_unevictable Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 15/25] mm: introduce __vm_normal_page() Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 16/25] mm: introduce __page_add_new_anon_rmap() Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 17/25] mm: protect mm_rb tree with a rwlock Laurent Dufour
2018-04-30 18:47   ` Punit Agrawal
2018-05-02  6:37     ` Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 18/25] mm: provide speculative fault infrastructure Laurent Dufour
2018-05-15 13:09   ` vinayak menon
2018-05-15 14:07     ` Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 19/25] mm: adding speculative page fault failure trace events Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 20/25] perf: add a speculative page fault sw event Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 21/25] perf tools: add support for the SPF perf event Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 22/25] mm: speculative page fault handler return VMA Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 23/25] mm: add speculative page fault vmstats Laurent Dufour
2018-05-16  2:50   ` Ganesh Mahendran
2018-05-16  6:42     ` Laurent Dufour
2018-04-17 14:33 ` [PATCH v10 24/25] x86/mm: add speculative pagefault handling Laurent Dufour
2018-04-30 18:43   ` Punit Agrawal
2018-05-03 14:59     ` Laurent Dufour
2018-05-04 15:55       ` Punit Agrawal
2018-05-04 15:55         ` Punit Agrawal
2018-04-17 14:33 ` [PATCH v10 25/25] powerpc/mm: add speculative page fault Laurent Dufour
2018-04-17 16:51 ` [PATCH v10 00/25] Speculative page faults Christopher Lameter
2018-05-02 14:17 ` Punit Agrawal
2018-05-02 14:17   ` Punit Agrawal
2018-05-02 14:17   ` Punit Agrawal
2018-05-02 14:45   ` Laurent Dufour
2018-05-02 15:50     ` Punit Agrawal
2018-05-02 15:50       ` Punit Agrawal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1523975611-15978-9-git-send-email-ldufour@linux.vnet.ibm.com \
    --to=ldufour@linux.vnet.ibm.com \
    --cc=aarcange@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexei.starovoitov@gmail.com \
    --cc=benh@kernel.crashing.org \
    --cc=bsingharora@gmail.com \
    --cc=daniel.m.jordan@oracle.com \
    --cc=dave@stgolabs.net \
    --cc=haren@linux.vnet.ibm.com \
    --cc=hpa@zytor.com \
    --cc=jack@suse.cz \
    --cc=jglisse@redhat.com \
    --cc=kemi.wang@intel.com \
    --cc=khandual@linux.vnet.ibm.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mhocko@kernel.org \
    --cc=mingo@redhat.com \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=opensource.ganesh@gmail.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=rientjes@google.com \
    --cc=sergey.senozhatsky.work@gmail.com \
    --cc=sergey.senozhatsky@gmail.com \
    --cc=tglx@linutronix.de \
    --cc=tim.c.chen@linux.intel.com \
    --cc=will.deacon@arm.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.