linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* KSM WARN_ON_ONCE(page_mapped(page)) in remove_stable_node()
@ 2019-11-13 10:34 Andrey Ryabinin
  2019-11-15 17:55 ` Hugh Dickins
  0 siblings, 1 reply; 2+ messages in thread
From: Andrey Ryabinin @ 2019-11-13 10:34 UTC (permalink / raw)
  To: Hugh Dickins, Andrea Arcangeli; +Cc: linux-mm, LKML

[-- Attachment #1: Type: text/plain, Size: 1989 bytes --]

When remove_stable_node() races with __mmput() and squeezes in between ksm_exit() and exit_mmap(),
the WARN_ON_ONCE(page_mapped(page)) in remove_stable_node() could be triggered.

Should we just remove the warning? It seems to be safe to do, all callers are able to handle -EBUSY,
or there is a better way to fix this?



It's easily reproducible with the following script:
(ksm_test.c attached)

	#!/bin/bash

	gcc -lnuma -O2 ksm_test.c -o ksm_test
	echo 1 > /sys/kernel/mm/ksm/run
	./ksm_test &
	sleep 1
	echo 2 > /sys/kernel/mm/ksm/run

and the patch bellow which provokes that race.

---
 include/linux/ksm.h      | 4 +++-
 include/linux/mm_types.h | 1 +
 kernel/fork.c            | 4 ++++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index e48b1e453ff5..18384ea472f8 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -33,8 +33,10 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 
 static inline void ksm_exit(struct mm_struct *mm)
 {
-	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+	if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
 		__ksm_exit(mm);
+		mm->ksm_wait = 1;
+	}
 }
 
 /*
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 270aa8fd2800..3df8290528c2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -463,6 +463,7 @@ struct mm_struct {
 
 		/* Architecture-specific MM context */
 		mm_context_t context;
+		unsigned long ksm_wait;
 
 		unsigned long flags; /* Must use atomic bitops to access */
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5fb7e1fa0b05..be6ef4e046f0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1074,6 +1074,10 @@ static inline void __mmput(struct mm_struct *mm)
 	uprobe_clear_state(mm);
 	exit_aio(mm);
 	ksm_exit(mm);
+
+	if (mm->ksm_wait)
+		schedule_timeout_uninterruptible(10*HZ);
+
 	khugepaged_exit(mm); /* must run before exit_mmap */
 	exit_mmap(mm);
 	mm_put_huge_zero_page(mm);
-- 
2.23.0

[-- Attachment #2: ksm_test.c --]
[-- Type: text/x-csrc, Size: 3070 bytes --]

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <numaif.h>
#include <sys/types.h>
#include <sys/wait.h>


//#define NR_NODES 4
#define NR_NODES 1

#define MAP_SIZE 4096

#define NR_THREADS 1024

pid_t pids[NR_THREADS];

int merge_and_migrate(void)
{
        void *p;
        unsigned long rnd;
        unsigned long old_node, new_node;
        pid_t p_pid, pid;
        int j;

        p = mmap(NULL, MAP_SIZE, PROT_READ|PROT_WRITE,
                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
        if (p == MAP_FAILED)
                perror("mmap"), exit(1);

        memset(p, 0xff, MAP_SIZE);
        if (madvise(p, MAP_SIZE, MADV_MERGEABLE))
                perror("madvise"), exit(1);
	sleep(1000000);

        while (1) {
                sleep(0);
                rnd = rand() % 2;
                switch (rnd) {
                case 0: {
                        rnd = rand() % 128;
                        memset(p, rnd, MAP_SIZE);
                        break;
                }
                case 1: {
                        j = rand()%NR_NODES;
                        old_node = 1 << j;
                        new_node = 1<<((j+1)%NR_NODES);

                        migrate_pages(0, NR_NODES, &old_node, &new_node);
                        break;
                }
                }
        }
        return 0;
}

int main(void)
{
        int i,ret,j;
        pid_t pid;
        int wstatus;
        unsigned long old_node, new_node;

        for (i = 0; i < NR_THREADS; i++) {
                pid = fork();
                if (pid < 0) {
                        perror("fork");
                        return 1;
                }
                if (pid) {
                        pids[i] = pid;
                        continue;
                } else
                        merge_and_migrate();
        }

        while (1) {
                pid = waitpid(-1, &wstatus, WNOHANG);
                if (pid < 0) {
                        perror("waitpid failed");
                        return 1;
                }
                if (pid) {
                        for (i = 0; i< NR_THREADS; i++) {
                                if (pids[i] == pid) {
                                        pid = fork();
                                        if (pid < 0) {
                                                perror("fork in while");
                                                return 1;
                                        }
                                        if (pid) {
                                                pids[i] = pid;
                                                break;
                                        } else
                                                merge_and_migrate();
                                }
                        }
                        continue; /*while(1)*/
                }
                i = rand()%NR_THREADS;
                kill(pids[i], SIGKILL);
        }
        return 0;
}

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: KSM WARN_ON_ONCE(page_mapped(page)) in remove_stable_node()
  2019-11-13 10:34 KSM WARN_ON_ONCE(page_mapped(page)) in remove_stable_node() Andrey Ryabinin
@ 2019-11-15 17:55 ` Hugh Dickins
  0 siblings, 0 replies; 2+ messages in thread
From: Hugh Dickins @ 2019-11-15 17:55 UTC (permalink / raw)
  To: Andrey Ryabinin; +Cc: Hugh Dickins, Andrea Arcangeli, linux-mm, LKML

On Wed, 13 Nov 2019, Andrey Ryabinin wrote:

> When remove_stable_node() races with __mmput() and squeezes in between ksm_exit() and exit_mmap(),
> the WARN_ON_ONCE(page_mapped(page)) in remove_stable_node() could be triggered.

Thanks for doing the work on that.

> 
> Should we just remove the warning? It seems to be safe to do, all callers are able to handle -EBUSY,
> or there is a better way to fix this?

Please just remove the warning and update the comment.

At first I thought it would still be worth leaving the warning in,
to explain rare EBUSY; but after looking (as you did) at exactly how
remove_stable_node() gets to be called, the places that care about its
failure already expect KSM to be quiesced, and tell the admin EBUSY if
not.  This case is not different, just slightly delayed.  So there's
no need to try to be any cleverer about it.

Though now that you have found the case to be a real one, I do think
it would be an improvement for remove_stable_node_chain() not to give
up at the first -EBUSY, but remove as much as it can before finishing.
But perhaps there's a subtlety that prevents that - I'm not familiar
with the stable node chains.

Hugh

> 
> 
> 
> It's easily reproducible with the following script:
> (ksm_test.c attached)
> 
> 	#!/bin/bash
> 
> 	gcc -lnuma -O2 ksm_test.c -o ksm_test
> 	echo 1 > /sys/kernel/mm/ksm/run
> 	./ksm_test &
> 	sleep 1
> 	echo 2 > /sys/kernel/mm/ksm/run
> 
> and the patch bellow which provokes that race.
> 
> ---
>  include/linux/ksm.h      | 4 +++-
>  include/linux/mm_types.h | 1 +
>  kernel/fork.c            | 4 ++++
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
> index e48b1e453ff5..18384ea472f8 100644
> --- a/include/linux/ksm.h
> +++ b/include/linux/ksm.h
> @@ -33,8 +33,10 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
>  
>  static inline void ksm_exit(struct mm_struct *mm)
>  {
> -	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
> +	if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
>  		__ksm_exit(mm);
> +		mm->ksm_wait = 1;
> +	}
>  }
>  
>  /*
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 270aa8fd2800..3df8290528c2 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -463,6 +463,7 @@ struct mm_struct {
>  
>  		/* Architecture-specific MM context */
>  		mm_context_t context;
> +		unsigned long ksm_wait;
>  
>  		unsigned long flags; /* Must use atomic bitops to access */
>  
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 5fb7e1fa0b05..be6ef4e046f0 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1074,6 +1074,10 @@ static inline void __mmput(struct mm_struct *mm)
>  	uprobe_clear_state(mm);
>  	exit_aio(mm);
>  	ksm_exit(mm);
> +
> +	if (mm->ksm_wait)
> +		schedule_timeout_uninterruptible(10*HZ);
> +
>  	khugepaged_exit(mm); /* must run before exit_mmap */
>  	exit_mmap(mm);
>  	mm_put_huge_zero_page(mm);
> -- 
> 2.23.0
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-11-15 17:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-13 10:34 KSM WARN_ON_ONCE(page_mapped(page)) in remove_stable_node() Andrey Ryabinin
2019-11-15 17:55 ` Hugh Dickins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).