All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: Axel Rasmussen <axelrasmussen@google.com>
Cc: "Alexander Viro" <viro@zeniv.linux.org.uk>,
	"Alexey Dobriyan" <adobriyan@gmail.com>,
	"Andrea Arcangeli" <aarcange@redhat.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Anshuman Khandual" <anshuman.khandual@arm.com>,
	"Catalin Marinas" <catalin.marinas@arm.com>,
	"Chinwen Chang" <chinwen.chang@mediatek.com>,
	"Huang Ying" <ying.huang@intel.com>,
	"Ingo Molnar" <mingo@redhat.com>, "Jann Horn" <jannh@google.com>,
	"Jerome Glisse" <jglisse@redhat.com>,
	"Lokesh Gidra" <lokeshgidra@google.com>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	"Michael Ellerman" <mpe@ellerman.id.au>,
	"Michal Koutný" <mkoutny@suse.com>,
	"Michel Lespinasse" <walken@google.com>,
	"Mike Kravetz" <mike.kravetz@oracle.com>,
	"Mike Rapoport" <rppt@linux.vnet.ibm.com>,
	"Nicholas Piggin" <npiggin@gmail.com>, "Shaohua Li" <shli@fb.com>,
	"Shawn Anastasio" <shawn@anastas.io>,
	"Steven Rostedt" <rostedt@goodmis.org>,
	"Steven Price" <steven.price@arm.com>,
	"Vlastimil Babka" <vbabka@suse.cz>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, "Adam Ruprecht" <ruprecht@google.com>,
	"Cannon Matthews" <cannonmatthews@google.com>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	"David Rientjes" <rientjes@google.com>,
	"Mina Almasry" <almasrymina@google.com>,
	"Oliver Upton" <oupton@google.com>
Subject: Re: [PATCH v4 08/10] userfaultfd: add UFFDIO_CONTINUE ioctl
Date: Mon, 8 Feb 2021 18:54:11 -0500	[thread overview]
Message-ID: <20210208235411.GC71523@xz-x1> (raw)
In-Reply-To: <20210204183433.1431202-9-axelrasmussen@google.com>

On Thu, Feb 04, 2021 at 10:34:31AM -0800, Axel Rasmussen wrote:
> +enum mcopy_atomic_mode {
> +	/* A normal copy_from_user into the destination range. */
> +	MCOPY_ATOMIC_NORMAL,
> +	/* Don't copy; map the destination range to the zero page. */
> +	MCOPY_ATOMIC_ZEROPAGE,
> +	/* Just setup the dst_vma, without modifying the underlying page(s). */

"setup the dst_vma" sounds odd.  How about "install pte with the existing page
in the page cache"?

> +	MCOPY_ATOMIC_CONTINUE,
> +};

[...]

> @@ -4749,22 +4754,27 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
>  		hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
>  	}
>  
> -	_dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE);
> -	if (dst_vma->vm_flags & VM_WRITE)
> +	dst_pte_flags = dst_vma->vm_flags & VM_WRITE;
> +	/* For CONTINUE on a non-shared VMA, don't set VM_WRITE for CoW. */
> +	if (mode == MCOPY_ATOMIC_CONTINUE && !vm_shared)
> +		dst_pte_flags &= ~VM_WRITE;

I agree it should work but it's odd to explicitly remove a VM_WRITE bit, since
imho what we want to do is not changing vma or vma flags but deciding whether
to keep the write bit in the ptes.  How about as simple as:

        bool writable;

        if (mode == MCOPY_ATOMIC_CONTINUE && !vm_shared)
            writable = false;
        else
            writable = dst_vma->vm_flags & VM_WRITE;

        _dst_pte = make_huge_pte(dst_vma, page, writable);
        if (writable)
        	_dst_pte = huge_pte_mkdirty(_dst_pte);

?

> +	_dst_pte = make_huge_pte(dst_vma, page, dst_pte_flags);
> +	if (dst_pte_flags & VM_WRITE)
>  		_dst_pte = huge_pte_mkdirty(_dst_pte);
>  	_dst_pte = pte_mkyoung(_dst_pte);
>  
>  	set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
>  
>  	(void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
> -					dst_vma->vm_flags & VM_WRITE);
> +					 dst_pte_flags);
>  	hugetlb_count_add(pages_per_huge_page(h), dst_mm);
>  
>  	/* No need to invalidate - it was non-present before */
>  	update_mmu_cache(dst_vma, dst_addr, dst_pte);
>  
>  	spin_unlock(ptl);
> -	set_page_huge_active(page);
> +	if (mode != MCOPY_ATOMIC_CONTINUE)
> +		set_page_huge_active(page);

This has been changed to SetHPageMigratable(page) in akpm-next by Mike's new
series.  So maybe it's time to rebase your series to that starting from the
next post.

>  	if (vm_shared)
>  		unlock_page(page);

After removing the shared restriction, I think we need:

        if (vm_shared || (mode == MCOPY_ATOMIC_CONTINUE))
        	unlock_page(page);

Since we seem to check (mode == MCOPY_ATOMIC_CONTINUE) a lot, maybe we can
introduce a temp var for that too.

>  	ret = 0;
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index b2ce61c1b50d..7bf83ffa456b 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -207,7 +207,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
>  					      unsigned long dst_start,
>  					      unsigned long src_start,
>  					      unsigned long len,
> -					      bool zeropage)
> +					      enum mcopy_atomic_mode mode)
>  {
>  	int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
>  	int vm_shared = dst_vma->vm_flags & VM_SHARED;
> @@ -227,7 +227,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
>  	 * by THP.  Since we can not reliably insert a zero page, this
>  	 * feature is not supported.
>  	 */
> -	if (zeropage) {
> +	if (mode == MCOPY_ATOMIC_ZEROPAGE) {
>  		mmap_read_unlock(dst_mm);
>  		return -EINVAL;
>  	}
> @@ -273,8 +273,6 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
>  	}
>  
>  	while (src_addr < src_start + len) {
> -		pte_t dst_pteval;
> -
>  		BUG_ON(dst_addr >= dst_start + len);
>  
>  		/*
> @@ -297,16 +295,17 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
>  			goto out_unlock;
>  		}
>  
> -		err = -EEXIST;
> -		dst_pteval = huge_ptep_get(dst_pte);
> -		if (!huge_pte_none(dst_pteval)) {
> -			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
> -			i_mmap_unlock_read(mapping);
> -			goto out_unlock;
> +		if (mode != MCOPY_ATOMIC_CONTINUE) {
> +			if (!huge_pte_none(huge_ptep_get(dst_pte))) {

Maybe merge the two "if"s?

> +				err = -EEXIST;
> +				mutex_unlock(&hugetlb_fault_mutex_table[hash]);
> +				i_mmap_unlock_read(mapping);
> +				goto out_unlock;
> +			}
>  		}
>  
>  		err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
> -						dst_addr, src_addr, &page);
> +					       dst_addr, src_addr, mode, &page);
>  
>  		mutex_unlock(&hugetlb_fault_mutex_table[hash]);
>  		i_mmap_unlock_read(mapping);
> @@ -408,7 +407,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
>  				      unsigned long dst_start,
>  				      unsigned long src_start,
>  				      unsigned long len,
> -				      bool zeropage);
> +				      enum mcopy_atomic_mode mode);
>  #endif /* CONFIG_HUGETLB_PAGE */
>  
>  static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
> @@ -417,10 +416,14 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
>  						unsigned long dst_addr,
>  						unsigned long src_addr,
>  						struct page **page,
> -						bool zeropage,
> +						enum mcopy_atomic_mode mode,
>  						bool wp_copy)
>  {
>  	ssize_t err;
> +	bool zeropage = (mode == MCOPY_ATOMIC_ZEROPAGE);
> +
> +	if (mode == MCOPY_ATOMIC_CONTINUE)
> +		return -EINVAL;

So you still passed in the mode into mfill_atomic_pte() just to make sure
CONTINUE is not called there.  It's okay, but again I think it's not extremely
necessary: we should make sure to fail early at the entry of uffdio_continue()
by checking against the vma type to be hugetlb, rather than reaching here.

Thanks,

-- 
Peter Xu


  reply	other threads:[~2021-02-08 23:56 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-04 18:34 [PATCH v4 00/10] userfaultfd: add minor fault handling Axel Rasmussen
2021-02-04 18:34 ` Axel Rasmussen
2021-02-04 18:34 ` [PATCH v4 01/10] hugetlb: Pass vma into huge_pte_alloc() and huge_pmd_share() Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-05  1:04   ` kernel test robot
2021-02-05  1:04     ` kernel test robot
2021-02-04 18:34 ` [PATCH v4 02/10] hugetlb/userfaultfd: Forbid huge pmd sharing when uffd enabled Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-04 18:34 ` [PATCH v4 03/10] mm/hugetlb: Move flush_hugetlb_tlb_range() into hugetlb.h Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-04 18:34 ` [PATCH v4 04/10] hugetlb/userfaultfd: Unshare all pmds for hugetlbfs when register wp Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-04 18:34 ` [PATCH v4 05/10] userfaultfd: add minor fault registration mode Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-09  0:00   ` Peter Xu
2021-02-04 18:34 ` [PATCH v4 06/10] userfaultfd: disable huge PMD sharing for MINOR registered VMAs Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-04 18:34 ` [PATCH v4 07/10] userfaultfd: hugetlbfs: only compile UFFD helpers if config enabled Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-04 18:34 ` [PATCH v4 08/10] userfaultfd: add UFFDIO_CONTINUE ioctl Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-08 23:54   ` Peter Xu [this message]
2021-02-10 18:00     ` Axel Rasmussen
2021-02-10 19:06       ` Peter Xu
2021-02-04 18:34 ` [PATCH v4 09/10] userfaultfd: update documentation to describe minor fault handling Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-04 19:57   ` Randy Dunlap
2021-02-04 21:04     ` Axel Rasmussen
2021-02-04 21:07       ` Randy Dunlap
2021-02-04 18:34 ` [PATCH v4 10/10] userfaultfd/selftests: add test exercising " Axel Rasmussen
2021-02-04 18:34   ` Axel Rasmussen
2021-02-04 18:38 ` [PATCH v4 00/10] userfaultfd: add " Axel Rasmussen
2021-02-04 18:38   ` Axel Rasmussen
2021-02-09  0:03 ` Peter Xu
2021-02-09  0:19   ` Axel Rasmussen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210208235411.GC71523@xz-x1 \
    --to=peterx@redhat.com \
    --cc=aarcange@redhat.com \
    --cc=adobriyan@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=almasrymina@google.com \
    --cc=anshuman.khandual@arm.com \
    --cc=axelrasmussen@google.com \
    --cc=cannonmatthews@google.com \
    --cc=catalin.marinas@arm.com \
    --cc=chinwen.chang@mediatek.com \
    --cc=dgilbert@redhat.com \
    --cc=jannh@google.com \
    --cc=jglisse@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lokeshgidra@google.com \
    --cc=mike.kravetz@oracle.com \
    --cc=mingo@redhat.com \
    --cc=mkoutny@suse.com \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=oupton@google.com \
    --cc=rientjes@google.com \
    --cc=rostedt@goodmis.org \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=ruprecht@google.com \
    --cc=shawn@anastas.io \
    --cc=shli@fb.com \
    --cc=steven.price@arm.com \
    --cc=vbabka@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    --cc=walken@google.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.