linux-next.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
To: Michal Hocko <mhocko@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Stephen Rothwell <sfr@canb.auug.org.au>,
	linux-mm@kvack.org, linux-next@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Andrea Arcangeli <aarcange@redhat.com>
Subject: Re: [linux-next: Tree for Jun 1] __khugepaged_exit rwsem_down_write_failed lockup
Date: Fri, 3 Jun 2016 17:43:47 +0900	[thread overview]
Message-ID: <20160603084347.GA502@swordfish> (raw)
In-Reply-To: <20160603072536.GB20676@dhcp22.suse.cz>

On (06/03/16 09:25), Michal Hocko wrote:
> > it's quite hard to trigger the bug (somehow), so I can't
> > follow up with more information as of now.

either I did something very silly fixing up the patch, or the
patch may be causing general protection faults on my system.

RIP collect_mm_slot() + 0x42/0x84
	khugepaged
	prepare_to_wait_event
	maybe_pmd_mkwrite
	kthread
	_raw_sin_unlock_irq
	ret_from_fork
	kthread_create_on_node

collect_mm_slot() + 0x42/0x84 is

0000000000000328 <collect_mm_slot>:
     328:       55                      push   %rbp
     329:       48 89 e5                mov    %rsp,%rbp
     32c:       53                      push   %rbx
     32d:       48 8b 5f 20             mov    0x20(%rdi),%rbx
     331:       8b 43 48                mov    0x48(%rbx),%eax
     334:       ff c8                   dec    %eax
     336:       7f 71                   jg     3a9 <collect_mm_slot+0x81>
     338:       48 8b 57 08             mov    0x8(%rdi),%rdx
     33c:       48 85 d2                test   %rdx,%rdx
     33f:       74 1e                   je     35f <collect_mm_slot+0x37>
     341:       48 8b 07                mov    (%rdi),%rax
     344:       48 85 c0                test   %rax,%rax
     347:       48 89 02                mov    %rax,(%rdx)
     34a:       74 04                   je     350 <collect_mm_slot+0x28>
     34c:       48 89 50 08             mov    %rdx,0x8(%rax)
     350:       48 c7 07 00 00 00 00    movq   $0x0,(%rdi)
     357:       48 c7 47 08 00 00 00    movq   $0x0,0x8(%rdi)
     35e:       00 
     35f:       48 8b 57 10             mov    0x10(%rdi),%rdx
     363:       48 8b 47 18             mov    0x18(%rdi),%rax
     367:       48 89 fe                mov    %rdi,%rsi
     36a:       48 89 42 08             mov    %rax,0x8(%rdx)
		^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     36e:       48 89 10                mov    %rdx,(%rax)
     371:       48 b8 00 01 00 00 00    movabs $0xdead000000000100,%rax
     378:       00 ad de 
     37b:       48 89 47 10             mov    %rax,0x10(%rdi)
     37f:       48 b8 00 02 00 00 00    movabs $0xdead000000000200,%rax
     386:       00 ad de 
     389:       48 89 47 18             mov    %rax,0x18(%rdi)
     38d:       48 8b 3d 00 00 00 00    mov    0x0(%rip),%rdi        # 394 <collect_mm_slot+0x6c>
     394:       e8 00 00 00 00          callq  399 <collect_mm_slot+0x71>
     399:       f0 ff 4b 4c             lock decl 0x4c(%rbx)
     39d:       74 02                   je     3a1 <collect_mm_slot+0x79>
     39f:       eb 08                   jmp    3a9 <collect_mm_slot+0x81>
     3a1:       48 89 df                mov    %rbx,%rdi
     3a4:       e8 00 00 00 00          callq  3a9 <collect_mm_slot+0x81>
     3a9:       5b                      pop    %rbx
     3aa:       5d                      pop    %rbp
     3ab:       c3                      retq

which is list_del(&mm_slot->mm_node), I believe.


I attached the patch (just in case).

---
 mm/huge_memory.c | 87 +++++++++++++++++++++++++-------------------------------
 1 file changed, 39 insertions(+), 48 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 292cedd..1c82fa4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1938,7 +1938,8 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
 
 static inline int khugepaged_test_exit(struct mm_struct *mm)
 {
-	return atomic_read(&mm->mm_users) == 0;
+	/* the only pin is from khugepaged_scan_mm_slot */
+	return atomic_read(&mm->mm_users) <= 1;
 }
 
 int __khugepaged_enter(struct mm_struct *mm)
@@ -1950,8 +1951,6 @@ int __khugepaged_enter(struct mm_struct *mm)
 	if (!mm_slot)
 		return -ENOMEM;
 
-	/* __khugepaged_exit() must not run from under us */
-	VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
 	if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
 		free_mm_slot(mm_slot);
 		return 0;
@@ -1994,36 +1993,40 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 	return 0;
 }
 
-void __khugepaged_exit(struct mm_struct *mm)
+static void collect_mm_slot(struct mm_slot *mm_slot)
 {
-	struct mm_slot *mm_slot;
-	int free = 0;
+	struct mm_struct *mm = mm_slot->mm;
 
-	spin_lock(&khugepaged_mm_lock);
-	mm_slot = get_mm_slot(mm);
-	if (mm_slot && khugepaged_scan.mm_slot != mm_slot) {
+	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
+
+	if (khugepaged_test_exit(mm)) {
+		/* free mm_slot */
 		hash_del(&mm_slot->hash);
 		list_del(&mm_slot->mm_node);
-		free = 1;
-	}
-	spin_unlock(&khugepaged_mm_lock);
 
-	if (free) {
-		clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
-		free_mm_slot(mm_slot);
-		mmdrop(mm);
-	} else if (mm_slot) {
 		/*
-		 * This is required to serialize against
-		 * khugepaged_test_exit() (which is guaranteed to run
-		 * under mmap sem read mode). Stop here (after we
-		 * return all pagetables will be destroyed) until
-		 * khugepaged has finished working on the pagetables
-		 * under the mmap_sem.
+		 * Not strictly needed because the mm exited already.
+		 *
+		 * clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
 		 */
-		down_write(&mm->mmap_sem);
-		up_write(&mm->mmap_sem);
+
+		/* khugepaged_mm_lock actually not necessary for the below */
+		free_mm_slot(mm_slot);
+		mmdrop(mm);
+	}
+}
+
+void __khugepaged_exit(struct mm_struct *mm)
+{
+	struct mm_slot *mm_slot;
+
+	spin_lock(&khugepaged_mm_lock);
+	mm_slot = get_mm_slot(mm);
+	if (mm_slot) {
+		collect_mm_slot(mm_slot);
+		clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
 	}
+	spin_unlock(&khugepaged_mm_lock);
 }
 
 static void release_pte_page(struct page *page)
@@ -2738,29 +2741,6 @@ out:
 	return ret;
 }
 
-static void collect_mm_slot(struct mm_slot *mm_slot)
-{
-	struct mm_struct *mm = mm_slot->mm;
-
-	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
-
-	if (khugepaged_test_exit(mm)) {
-		/* free mm_slot */
-		hash_del(&mm_slot->hash);
-		list_del(&mm_slot->mm_node);
-
-		/*
-		 * Not strictly needed because the mm exited already.
-		 *
-		 * clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
-		 */
-
-		/* khugepaged_mm_lock actually not necessary for the below */
-		free_mm_slot(mm_slot);
-		mmdrop(mm);
-	}
-}
-
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 					    struct page **hpage)
 	__releases(&khugepaged_mm_lock)
@@ -2782,6 +2762,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 		khugepaged_scan.address = 0;
 		khugepaged_scan.mm_slot = mm_slot;
 	}
+
+	/*
+	 * Do not even try to do anything if the current mm is already
+	 * dead. khugepaged_mm_lock will make sure only this or
+	 * __khugepaged_exit does the unhasing.
+	 */
+	if (!atomic_inc_not_zero(&mm_slot->mm->mm_users)) {
+		collect_mm_slot(mm_slot);
+		return progress;
+	}
 	spin_unlock(&khugepaged_mm_lock);
 
 	mm = mm_slot->mm;
@@ -2865,6 +2855,7 @@ breakouterloop_mmap_sem:
 
 		collect_mm_slot(mm_slot);
 	}
+	mmput_async(mm);
 
 	return progress;
 }
-- 
2.9.0.rc1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2016-06-03  8:43 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-01  3:11 linux-next: Tree for Jun 1 Stephen Rothwell
2016-06-02  1:48 ` [linux-next: Tree for Jun 1] __khugepaged_exit rwsem_down_write_failed lockup Sergey Senozhatsky
2016-06-02  9:21   ` Michal Hocko
2016-06-02 12:08     ` Sergey Senozhatsky
2016-06-02 12:21       ` Michal Hocko
2016-06-03 13:51         ` Andrea Arcangeli
2016-06-03 14:46           ` Michal Hocko
2016-06-03 15:10             ` Andrea Arcangeli
2016-06-07  7:34               ` Michal Hocko
2016-06-08  8:19               ` Vlastimil Babka
2016-06-03  7:15     ` Sergey Senozhatsky
2016-06-03  7:25       ` Michal Hocko
2016-06-03  8:43         ` Sergey Senozhatsky [this message]
2016-06-03  9:55           ` Michal Hocko
2016-06-03 10:05             ` Michal Hocko
2016-06-03 13:38               ` Sergey Senozhatsky
2016-06-03 13:45                 ` Michal Hocko
2016-06-03 13:49                   ` Michal Hocko
2016-06-04  7:51                     ` Sergey Senozhatsky
2016-06-06  8:39                       ` Michal Hocko
2016-06-02 13:24   ` Vlastimil Babka
2016-06-02 18:58     ` Ebru Akagunduz
2016-06-03  1:00       ` Sergey Senozhatsky
2016-06-03  1:29         ` Sergey Senozhatsky
2016-06-03  4:14           ` Sergey Senozhatsky
2016-06-03 12:28     ` [PATCH] mm, thp: fix locking inconsistency in collapse_huge_page Ebru Akagunduz
2016-06-06 13:05       ` Vlastimil Babka
2016-06-09  3:51         ` Sergey Senozhatsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160603084347.GA502@swordfish \
    --to=sergey.senozhatsky.work@gmail.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-next@vger.kernel.org \
    --cc=mhocko@kernel.org \
    --cc=sfr@canb.auug.org.au \
    --cc=vbabka@suse.cz \
    --subject='Re: [linux-next: Tree for Jun 1] __khugepaged_exit rwsem_down_write_failed lockup' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
on how to clone and mirror all data and code used for this inbox