All of lore.kernel.org
 help / color / mirror / Atom feed
From: Suren Baghdasaryan <surenb@google.com>
To: akpm@linux-foundation.org
Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com,
	vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net,
	dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com,
	songliubraving@fb.com, peterx@redhat.com, david@redhat.com,
	dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de,
	punit.agrawal@bytedance.com, lstoakes@gmail.com, rppt@kernel.org,
	jannh@google.com, shakeelb@google.com, gthelen@google.com,
	gurua@google.com, arjunroy@google.com, posk@google.com,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	kernel-team@android.com, Suren Baghdasaryan <surenb@google.com>,
	syzbot+8955a9646d1a48b8be92@syzkaller.appspotmail.com
Subject: [PATCH 1/1] mm/khugepaged: fix vm_lock/i_mmap_rwsem inversion in retract_page_tables
Date: Fri,  3 Mar 2023 13:32:50 -0800	[thread overview]
Message-ID: <20230303213250.3555716-1-surenb@google.com> (raw)

Internal syscaller on linux-next reported a lock inversion cause by
vm_lock being taken after i_mmap_rwsem:

======================================================
WARNING: possible circular locking dependency detected
6.2.0-next-20230301-syzkaller #0 Not tainted
------------------------------------------------------
syz-executor115/5084 is trying to acquire lock:
ffff888078307a90 (&vma->vm_lock->lock){++++}-{3:3}, at: vma_start_write include/linux/mm.h:678 [inline]
ffff888078307a90 (&vma->vm_lock->lock){++++}-{3:3}, at: retract_page_tables mm/khugepaged.c:1826 [inline]
ffff888078307a90 (&vma->vm_lock->lock){++++}-{3:3}, at: collapse_file+0x4fa5/0x5980 mm/khugepaged.c:2204

but task is already holding lock:
ffff88801f93efa8 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: i_mmap_lock_write include/linux/fs.h:468 [inline]
ffff88801f93efa8 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: retract_page_tables mm/khugepaged.c:1745 [inline]
ffff88801f93efa8 (&mapping->i_mmap_rwsem){++++}-{3:3}, at: collapse_file+0x3da6/0x5980 mm/khugepaged.c:2204

retract_page_tables takes i_mmap_rwsem before exclusive mmap_lock, which
is inverse to normal order. Deadlock is avoided by try-locking mmap_lock
and skipping on failure to obtain it. Locking the VMA should use the same
locking pattern to avoid this lock inversion.

Fixes: 44a83f2083bd ("mm/khugepaged: write-lock VMA while collapsing a huge page")
Reported-by: syzbot+8955a9646d1a48b8be92@syzkaller.appspotmail.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/mm.h | 39 ++++++++++++++++++++++++++++-----------
 mm/khugepaged.c    |  5 ++++-
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1b9be34a24fb..5f16263d176d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -676,18 +676,23 @@ static inline void vma_end_read(struct vm_area_struct *vma)
 	rcu_read_unlock();
 }
 
-static inline void vma_start_write(struct vm_area_struct *vma)
+static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
 {
-	int mm_lock_seq;
-
 	mmap_assert_write_locked(vma->vm_mm);
 
 	/*
 	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
 	 * mm->mm_lock_seq can't be concurrently modified.
 	 */
-	mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq);
-	if (vma->vm_lock_seq == mm_lock_seq)
+	*mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq);
+	return (vma->vm_lock_seq == *mm_lock_seq);
+}
+
+static inline void vma_start_write(struct vm_area_struct *vma)
+{
+	int mm_lock_seq;
+
+	if (__is_vma_write_locked(vma, &mm_lock_seq))
 		return;
 
 	down_write(&vma->vm_lock->lock);
@@ -695,14 +700,26 @@ static inline void vma_start_write(struct vm_area_struct *vma)
 	up_write(&vma->vm_lock->lock);
 }
 
+static inline bool vma_try_start_write(struct vm_area_struct *vma)
+{
+	int mm_lock_seq;
+
+	if (__is_vma_write_locked(vma, &mm_lock_seq))
+		return true;
+
+	if (!down_write_trylock(&vma->vm_lock->lock))
+		return false;
+
+	vma->vm_lock_seq = mm_lock_seq;
+	up_write(&vma->vm_lock->lock);
+	return true;
+}
+
 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
 {
-	mmap_assert_write_locked(vma->vm_mm);
-	/*
-	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
-	 * mm->mm_lock_seq can't be concurrently modified.
-	 */
-	VM_BUG_ON_VMA(vma->vm_lock_seq != READ_ONCE(vma->vm_mm->mm_lock_seq), vma);
+	int mm_lock_seq;
+
+	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
 }
 
 static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index c64e01f03f27..408fed42c9f5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1795,6 +1795,10 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
 		result = SCAN_PTE_MAPPED_HUGEPAGE;
 		if ((cc->is_khugepaged || is_target) &&
 		    mmap_write_trylock(mm)) {
+			/* trylock for the same lock inversion as above */
+			if (!vma_try_start_write(vma))
+				goto unlock_next;
+
 			/*
 			 * Re-check whether we have an ->anon_vma, because
 			 * collapse_and_free_pmd() requires that either no
@@ -1823,7 +1827,6 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
 				result = SCAN_PTE_UFFD_WP;
 				goto unlock_next;
 			}
-			vma_start_write(vma);
 			collapse_and_free_pmd(mm, vma, addr, pmd);
 			if (!cc->is_khugepaged && is_target)
 				result = set_huge_pmd(vma, addr, pmd, hpage);
-- 
2.40.0.rc0.216.gc4246ad0f0-goog


             reply	other threads:[~2023-03-03 21:33 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-03 21:32 Suren Baghdasaryan [this message]
2023-03-04  9:14 ` [PATCH 1/1] mm/khugepaged: fix vm_lock/i_mmap_rwsem inversion in retract_page_tables kernel test robot
2023-03-04  9:14 ` kernel test robot
2023-03-04 10:57 ` kernel test robot
2023-03-04 23:24   ` Suren Baghdasaryan
2023-03-04 23:25 ` Suren Baghdasaryan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230303213250.3555716-1-surenb@google.com \
    --to=surenb@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=arjunroy@google.com \
    --cc=bigeasy@linutronix.de \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dhowells@redhat.com \
    --cc=gthelen@google.com \
    --cc=gurua@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jannh@google.com \
    --cc=jglisse@google.com \
    --cc=kernel-team@android.com \
    --cc=liam.howlett@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lstoakes@gmail.com \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=michel@lespinasse.org \
    --cc=peterx@redhat.com \
    --cc=posk@google.com \
    --cc=punit.agrawal@bytedance.com \
    --cc=rppt@kernel.org \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=syzbot+8955a9646d1a48b8be92@syzkaller.appspotmail.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.