All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: Matthew Wilcox <willy@infradead.org>
Cc: Michael Larabel <Michael@michaellarabel.com>,
	Amir Goldstein <amir73il@gmail.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Ted Ts'o <tytso@google.com>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Ext4 Developers List <linux-ext4@vger.kernel.org>,
	Jan Kara <jack@suse.cz>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>,
	linux-mm@kvack.org
Subject: Re: Kernel Benchmarking
Date: Tue, 15 Sep 2020 12:39:38 +0200	[thread overview]
Message-ID: <20200915103938.GL4863@quack2.suse.cz> (raw)
In-Reply-To: <20200915033210.GA5449@casper.infradead.org>

Hi Matthew!

On Tue 15-09-20 04:32:10, Matthew Wilcox wrote:
> On Sat, Sep 12, 2020 at 09:44:15AM -0500, Michael Larabel wrote:
> > Interesting, I'll fire up some cross-filesystem benchmarks with those tests
> > today and report back shortly with the difference.
> 
> If you have time, perhaps you'd like to try this patch.  It tries to
> handle page faults locklessly when possible, which should be the case
> where you're seeing page lock contention.  I've tried to be fairly
> conservative in this patch; reducing page lock acquisition should be
> possible in more cases.

So I'd be somewhat uneasy with this optimization. The thing is that e.g.
page migration relies on page lock protecting page from being mapped? How
does your patch handle that? I'm also not sure if the rmap code is really
ready for new page reverse mapping being added without holding page lock...

								Honza
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index ca6e6a81576b..a14785b7fca7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -416,6 +416,7 @@ extern pgprot_t protection_map[16];
>   * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
>   * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
>   * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
> + * @FAULT_FLAG_UPTODATE_ONLY: The fault handler returned @VM_FAULT_UPTODATE.
>   *
>   * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
>   * whether we would allow page faults to retry by specifying these two
> @@ -446,6 +447,7 @@ extern pgprot_t protection_map[16];
>  #define FAULT_FLAG_REMOTE			0x80
>  #define FAULT_FLAG_INSTRUCTION  		0x100
>  #define FAULT_FLAG_INTERRUPTIBLE		0x200
> +#define FAULT_FLAG_UPTODATE_ONLY		0x400
>  
>  /*
>   * The default fault flags that should be used by most of the
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 496c3ff97cce..632eabcad2f7 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -689,6 +689,8 @@ typedef __bitwise unsigned int vm_fault_t;
>   * @VM_FAULT_NEEDDSYNC:		->fault did not modify page tables and needs
>   *				fsync() to complete (for synchronous page faults
>   *				in DAX)
> + * @VM_FAULT_UPTODATE:		Page is not locked; must check it is still
> + *				uptodate under the page table lock
>   * @VM_FAULT_HINDEX_MASK:	mask HINDEX value
>   *
>   */
> @@ -706,6 +708,7 @@ enum vm_fault_reason {
>  	VM_FAULT_FALLBACK       = (__force vm_fault_t)0x000800,
>  	VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
>  	VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
> +	VM_FAULT_UPTODATE       = (__force vm_fault_t)0x004000,
>  	VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
>  };
>  
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 1aaea26556cc..38f87dd86312 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2602,6 +2602,13 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
>  		}
>  	}
>  
> +	if (fpin)
> +		goto out_retry;
> +	if (likely(PageUptodate(page))) {
> +		ret |= VM_FAULT_UPTODATE;
> +		goto uptodate;
> +	}
> +
>  	if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
>  		goto out_retry;
>  
> @@ -2630,19 +2637,19 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
>  		goto out_retry;
>  	}
>  
> -	/*
> -	 * Found the page and have a reference on it.
> -	 * We must recheck i_size under page lock.
> -	 */
> +	ret |= VM_FAULT_LOCKED;
> +	/* Must recheck i_size after getting a stable reference to the page */
> +uptodate:
>  	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
>  	if (unlikely(offset >= max_off)) {
> -		unlock_page(page);
> +		if (ret & VM_FAULT_LOCKED)
> +			unlock_page(page);
>  		put_page(page);
>  		return VM_FAULT_SIGBUS;
>  	}
>  
>  	vmf->page = page;
> -	return ret | VM_FAULT_LOCKED;
> +	return ret;
>  
>  page_not_uptodate:
>  	/*
> diff --git a/mm/memory.c b/mm/memory.c
> index 469af373ae76..53c8ef2bb38b 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3460,11 +3460,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
>  		return VM_FAULT_HWPOISON;
>  	}
>  
> -	if (unlikely(!(ret & VM_FAULT_LOCKED)))
> -		lock_page(vmf->page);
> -	else
> -		VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page);
> -
>  	return ret;
>  }
>  
> @@ -3646,12 +3641,13 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
>  		return VM_FAULT_NOPAGE;
>  	}
>  
> -	flush_icache_page(vma, page);
> -	entry = mk_pte(page, vma->vm_page_prot);
> -	entry = pte_sw_mkyoung(entry);
> -	if (write)
> -		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> -	/* copy-on-write page */
> +	/*
> +	 * If the page isn't locked, truncate or invalidate2 may be
> +	 * trying to remove it at the same time.  Both paths will check
> +	 * the page's mapcount after clearing the PageUptodate bit,
> +	 * so if we increment the mapcount here before checking the
> +	 * Uptodate bit, the page will be unmapped by the other thread.
> +	 */
>  	if (write && !(vma->vm_flags & VM_SHARED)) {
>  		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
>  		page_add_new_anon_rmap(page, vma, vmf->address, false);
> @@ -3660,6 +3656,25 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
>  		inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
>  		page_add_file_rmap(page, false);
>  	}
> +	smp_mb__after_atomic();
> +
> +	if ((vmf->flags & FAULT_FLAG_UPTODATE_ONLY) && !PageUptodate(page)) {
> +		page_remove_rmap(page, false);
> +		if (write && !(vma->vm_flags & VM_SHARED)) {
> +			dec_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
> +			/* lru_cache_remove_inactive_or_unevictable? */
> +		} else {
> +			dec_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
> +		}
> +		return VM_FAULT_NOPAGE;
> +	}
> +
> +	flush_icache_page(vma, page);
> +	entry = mk_pte(page, vma->vm_page_prot);
> +	entry = pte_sw_mkyoung(entry);
> +	if (write)
> +		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> +	/* copy-on-write page */
>  	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
>  
>  	/* no need to invalidate: a not-present page won't be cached */
> @@ -3844,8 +3859,18 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf)
>  	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
>  		return ret;
>  
> +	if (ret & VM_FAULT_UPTODATE)
> +		vmf->flags |= FAULT_FLAG_UPTODATE_ONLY;
> +	else if (unlikely(!(ret & VM_FAULT_LOCKED)))
> +		lock_page(vmf->page);
> +	else
> +		VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page);
> +
>  	ret |= finish_fault(vmf);
> -	unlock_page(vmf->page);
> +	if (ret & VM_FAULT_UPTODATE)
> +		vmf->flags &= ~FAULT_FLAG_UPTODATE_ONLY;
> +	else
> +		unlock_page(vmf->page);
>  	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
>  		put_page(vmf->page);
>  	return ret;
> @@ -3875,6 +3900,11 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf)
>  	if (ret & VM_FAULT_DONE_COW)
>  		return ret;
>  
> +	if (!(ret & VM_FAULT_LOCKED))
> +		lock_page(vmf->page);
> +	else
> +		VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page);
> +
>  	copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma);
>  	__SetPageUptodate(vmf->cow_page);
>  
> @@ -3898,6 +3928,11 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
>  	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
>  		return ret;
>  
> +	if (!(ret & VM_FAULT_LOCKED))
> +		lock_page(vmf->page);
> +	else
> +		VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page);
> +
>  	/*
>  	 * Check if the backing address space wants to know that the page is
>  	 * about to become writable
> diff --git a/mm/truncate.c b/mm/truncate.c
> index dd9ebc1da356..96a0408804a7 100644
> --- a/mm/truncate.c
> +++ b/mm/truncate.c
> @@ -176,6 +176,8 @@ void do_invalidatepage(struct page *page, unsigned int offset,
>  static void
>  truncate_cleanup_page(struct address_space *mapping, struct page *page)
>  {
> +	ClearPageUptodate(page);
> +	smp_mb__before_atomic();
>  	if (page_mapped(page)) {
>  		pgoff_t nr = PageTransHuge(page) ? HPAGE_PMD_NR : 1;
>  		unmap_mapping_pages(mapping, page->index, nr, false);
> @@ -655,6 +657,12 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
>  		mapping->a_ops->freepage(page);
>  
>  	put_page(page);	/* pagecache ref */
> +
> +	/* An unlocked page fault may have inserted an entry */
> +	ClearPageUptodate(page);
> +	smp_mb__before_atomic();
> +	if (page_mapped(page))
> +		unmap_mapping_pages(mapping, page->index, 1, false);
>  	return 1;
>  failed:
>  	xa_unlock_irqrestore(&mapping->i_pages, flags);
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

  reply	other threads:[~2020-09-15 10:39 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CAHk-=wiZnE409WkTOG6fbF_eV1LgrHBvMtyKkpTqM9zT5hpf9A@mail.gmail.com>
     [not found] ` <4ced9401-de3d-b7c9-9976-2739e837fafc@MichaelLarabel.com>
     [not found]   ` <CAHk-=wj+Qj=wXByMrAx3T8jmw=soUetioRrbz6dQaECx+zjMtg@mail.gmail.com>
     [not found]     ` <CAHk-=wgOPjbJsj-LeLc-JMx9Sz9DjGF66Q+jQFJROt9X9utdBg@mail.gmail.com>
     [not found]       ` <CAHk-=wjjK7PTnDZNi039yBxSHtAqusFoRrZzgMNTiYkJYdNopw@mail.gmail.com>
     [not found]         ` <aa90f272-1186-f9e1-8fdb-eefd332fdae8@MichaelLarabel.com>
     [not found]           ` <CAHk-=wh_31_XBNHbdF7EUJceLpEpwRxVF+_1TONzyBUym6Pw4w@mail.gmail.com>
     [not found]             ` <e24ef34d-7b1d-dd99-082d-28ca285a79ff@MichaelLarabel.com>
     [not found]               ` <CAHk-=wgEE4GuNjcRaaAvaS97tW+239-+tjcPjTq2FGhEuM8HYg@mail.gmail.com>
     [not found]                 ` <6e1d8740-2594-c58b-ff02-a04df453d53c@MichaelLarabel.com>
     [not found]                   ` <CAHk-=wgJ3-cEkU-5zXFPvRCHKkCCuKxVauYWGphjePEhJJgtgQ@mail.gmail.com>
     [not found]                     ` <d2023f4c-ef14-b877-b5bb-e4f8af332abc@MichaelLarabel.com>
     [not found]                       ` <CAHk-=wiz=J=8mJ=zRG93nuJ9GtQAm5bSRAbWJbWZuN4Br38+EQ@mail.gmail.com>
2020-09-11  0:05                         ` Kernel Benchmarking Linus Torvalds
2020-09-11  0:49                           ` Michael Larabel
2020-09-11  2:20                             ` Linus Torvalds
     [not found]                               ` <0cbc959e-1b8d-8d7e-1dc6-672cf5b3899a@MichaelLarabel.com>
2020-09-11 16:19                                 ` Linus Torvalds
2020-09-11 22:07                                   ` Linus Torvalds
2020-09-11 22:37                                     ` Michael Larabel
2020-09-12  7:28                                       ` Amir Goldstein
2020-09-12 10:32                                         ` Michael Larabel
2020-09-12 14:37                                           ` Matthew Wilcox
2020-09-12 14:44                                             ` Michael Larabel
2020-09-15  3:32                                               ` Matthew Wilcox
2020-09-15 10:39                                                 ` Jan Kara [this message]
2020-09-15 13:52                                                   ` Matthew Wilcox
     [not found]                                             ` <658ae026-32d9-0a25-5a59-9c510d6898d5@MichaelLarabel.com>
2020-09-14 17:47                                               ` Linus Torvalds
2020-09-14 20:21                                                 ` Matthieu Baerts
2020-09-14 20:53                                                   ` Linus Torvalds
2020-09-15  0:42                                                     ` Linus Torvalds
2020-09-15 15:34                                                     ` Matthieu Baerts
2020-09-15 18:27                                                       ` Linus Torvalds
2020-09-15 18:47                                                         ` Linus Torvalds
2020-09-15 19:26                                                           ` Matthieu Baerts
2020-09-15 19:32                                                             ` Linus Torvalds
2020-09-15 19:56                                                               ` Matthieu Baerts
2020-09-15 23:35                                                                 ` Linus Torvalds
2020-09-16 10:34                                                                   ` Jan Kara
2020-09-16 18:47                                                                     ` Linus Torvalds
     [not found]                                                         ` <9a92bf16-02c5-ba38-33c7-f350588ac874@tessares.net>
2020-09-15 19:24                                                           ` Linus Torvalds
2020-09-15 19:38                                                             ` Matthieu Baerts
2020-09-15 18:31                                                       ` Linus Torvalds
2020-09-15 14:21                                                 ` Michael Larabel
2020-09-15 17:52                                                   ` Linus Torvalds
2020-09-17 17:51                                                 ` Linus Torvalds
2020-09-17 18:23                                                   ` Matthew Wilcox
2020-09-17 18:30                                                     ` Linus Torvalds
2020-09-17 18:50                                                       ` Matthew Wilcox
2020-09-17 19:00                                                         ` Linus Torvalds
2020-09-17 19:27                                                           ` Matthew Wilcox
2020-09-17 19:47                                                             ` Linus Torvalds
2020-09-18  0:39                                                               ` Sedat Dilek
2020-09-18  0:40                                                                 ` Sedat Dilek
2020-09-18 20:25                                                                   ` Sedat Dilek
2020-09-20 17:06                                                                     ` Linus Torvalds
2020-09-20 17:14                                                                       ` Sedat Dilek
2020-09-20 17:40                                                                         ` Linus Torvalds
2020-09-20 18:00                                                                           ` Sedat Dilek
2020-09-20 23:23                                                               ` Dave Chinner
2020-09-20 23:31                                                                 ` Linus Torvalds
2020-09-20 23:40                                                                   ` Linus Torvalds
2020-09-21  1:20                                                                   ` Dave Chinner
2020-09-12 15:53                                         ` Matthew Wilcox
2020-09-12 17:59                                         ` Linus Torvalds
2020-09-12 20:32                                           ` Rogério Brito
2020-09-14  9:33                                             ` Jan Kara
2020-09-12 20:58                                           ` Josh Triplett
2020-09-12 20:59                                           ` James Bottomley
2020-09-12 21:15                                             ` Linus Torvalds
2020-09-12 22:32                                           ` Matthew Wilcox
2020-09-13  0:40                                           ` Dave Chinner
2020-09-13  2:39                                             ` Linus Torvalds
2020-09-13  3:40                                               ` Matthew Wilcox
2020-09-13 23:45                                               ` Dave Chinner
2020-09-14  3:31                                                 ` Matthew Wilcox
2020-09-15 14:28                                                   ` Chris Mason
2020-09-15  9:27                                                 ` Jan Kara
2020-09-13  3:18                                             ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200915103938.GL4863@quack2.suse.cz \
    --to=jack@suse.cz \
    --cc=Michael@michaellarabel.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=amir73il@gmail.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@google.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.