Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: "Kirill A. Shutemov" <kirill@shutemov.name>
To: Matthew Wilcox <willy@infradead.org>
Cc: linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 13/25] fs: Add zero_user_large
Date: Tue, 18 Feb 2020 20:10:52 +0300
Message-ID: <20200218171052.lwd56nr332qjgs5j@box> (raw)
In-Reply-To: <20200218161349.GS7778@bombadil.infradead.org>

On Tue, Feb 18, 2020 at 08:13:49AM -0800, Matthew Wilcox wrote:
> On Tue, Feb 18, 2020 at 05:16:34PM +0300, Kirill A. Shutemov wrote:
> > > +               if (start1 >= PAGE_SIZE) {
> > > +                       start1 -= PAGE_SIZE;
> > > +                       end1 -= PAGE_SIZE;
> > > +                       if (start2) {
> > > +                               start2 -= PAGE_SIZE;
> > > +                               end2 -= PAGE_SIZE;
> > > +                       }
> > 
> > You assume start2/end2 is always after start1/end1 in the page.
> > Is it always true? If so, I would add BUG_ON() for it.
> 
> after or zero.  Yes, I should add a BUG_ON to check for that.
> 
> > Otherwise, looks good.
> 
> Here's what I currently have (I'll add the BUG_ON later):
> 
> commit 7fabe16755365cdc6e80343ef994843ecebde60a
> Author: Matthew Wilcox (Oracle) <willy@infradead.org>
> Date:   Sat Feb 1 03:38:49 2020 -0500
> 
>     fs: Support THPs in zero_user_segments
>     
>     We can only kmap() one subpage of a THP at a time, so loop over all
>     relevant subpages, skipping ones which don't need to be zeroed.  This is
>     too large to inline when THPs are enabled and we actually need highmem,
>     so put it in highmem.c.
>     
>     Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

> 
> diff --git a/include/linux/highmem.h b/include/linux/highmem.h
> index ea5cdbd8c2c3..74614903619d 100644
> --- a/include/linux/highmem.h
> +++ b/include/linux/highmem.h
> @@ -215,13 +215,18 @@ static inline void clear_highpage(struct page *page)
>         kunmap_atomic(kaddr);
>  }
>  
> +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
> +void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
> +               unsigned start2, unsigned end2);
> +#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */

This is a neat trick. I like it.

Although, it means non-inlined version will never get tested :/

>  static inline void zero_user_segments(struct page *page,
> -       unsigned start1, unsigned end1,
> -       unsigned start2, unsigned end2)
> +               unsigned start1, unsigned end1,
> +               unsigned start2, unsigned end2)
>  {
> +       unsigned long i;
>         void *kaddr = kmap_atomic(page);
>  
> -       BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
> +       BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
>  
>         if (end1 > start1)
>                 memset(kaddr + start1, 0, end1 - start1);
> @@ -230,8 +235,10 @@ static inline void zero_user_segments(struct page *page,
>                 memset(kaddr + start2, 0, end2 - start2);
>  
>         kunmap_atomic(kaddr);
> -       flush_dcache_page(page);
> +       for (i = 0; i < hpage_nr_pages(page); i++)
> +               flush_dcache_page(page + i);
>  }
> +#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
>  
>  static inline void zero_user_segment(struct page *page,
>         unsigned start, unsigned end)
> diff --git a/mm/highmem.c b/mm/highmem.c
> index 64d8dea47dd1..3a85c66ef532 100644
> --- a/mm/highmem.c
> +++ b/mm/highmem.c
> @@ -367,9 +367,67 @@ void kunmap_high(struct page *page)
>         if (need_wakeup)
>                 wake_up(pkmap_map_wait);
>  }
> -
>  EXPORT_SYMBOL(kunmap_high);
> -#endif
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
> +               unsigned start2, unsigned end2)
> +{
> +       unsigned int i;
> +
> +       BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
> +
> +       for (i = 0; i < hpage_nr_pages(page); i++) {
> +               void *kaddr;
> +               unsigned this_end;
> +
> +               if (end1 == 0 && start2 >= PAGE_SIZE) {
> +                       start2 -= PAGE_SIZE;
> +                       end2 -= PAGE_SIZE;
> +                       continue;
> +               }
> +
> +               if (start1 >= PAGE_SIZE) {
> +                       start1 -= PAGE_SIZE;
> +                       end1 -= PAGE_SIZE;
> +                       if (start2) {
> +                               start2 -= PAGE_SIZE;
> +                               end2 -= PAGE_SIZE;
> +                       }
> +                       continue;
> +               }
> +
> +               kaddr = kmap_atomic(page + i);
> +
> +               this_end = min_t(unsigned, end1, PAGE_SIZE);
> +               if (end1 > start1)
> +                       memset(kaddr + start1, 0, this_end - start1);
> +               end1 -= this_end;
> +               start1 = 0;
> +
> +               if (start2 >= PAGE_SIZE) {
> +                       start2 -= PAGE_SIZE;
> +                       end2 -= PAGE_SIZE;
> +               } else {
> +                       this_end = min_t(unsigned, end2, PAGE_SIZE);
> +                       if (end2 > start2)
> +                               memset(kaddr + start2, 0, this_end - start2);
> +                       end2 -= this_end;
> +                       start2 = 0;
> +               }
> +
> +               kunmap_atomic(kaddr);
> +               flush_dcache_page(page + i);
> +
> +               if (!end1 && !end2)
> +                       break;
> +       }
> +
> +       BUG_ON((start1 | start2 | end1 | end2) != 0);
> +}
> +EXPORT_SYMBOL(zero_user_segments);
> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> +#endif /* CONFIG_HIGHMEM */
>  
>  #if defined(HASHED_PAGE_VIRTUAL)
>  
> 
> 
> 
> > > +                       continue;
> > > +               }
> > > +
> > > +               kaddr = kmap_atomic(page + i);
> > > +
> > > +               this_end = min_t(unsigned, end1, PAGE_SIZE);
> > > +               if (end1 > start1)
> > > +                       memset(kaddr + start1, 0, this_end - start1);
> > > +               end1 -= this_end;
> > > +               start1 = 0;
> > > +
> > > +               if (start2 >= PAGE_SIZE) {
> > > +                       start2 -= PAGE_SIZE;
> > > +                       end2 -= PAGE_SIZE;
> > > +               } else {
> > > +                       this_end = min_t(unsigned, end2, PAGE_SIZE);
> > > +                       if (end2 > start2)
> > > +                               memset(kaddr + start2, 0, this_end - start2);
> > > +                       end2 -= this_end;
> > > +                       start2 = 0;
> > > +               }
> > > +
> > > +               kunmap_atomic(kaddr);
> > > +               flush_dcache_page(page + i);
> > > +
> > > +               if (!end1 && !end2)
> > > +                       break;
> > > +       }
> > > +
> > > +       BUG_ON((start1 | start2 | end1 | end2) != 0);
> > >  }
> > > 
> > > I think at this point it has to move out-of-line too.
> > > 
> > > > > +static inline void zero_user_large(struct page *page,
> > > > > +		unsigned start, unsigned size)
> > > > > +{
> > > > > +	unsigned int i;
> > > > > +
> > > > > +	for (i = 0; i < thp_order(page); i++) {
> > > > > +		if (start > PAGE_SIZE) {
> > > > 
> > > > Off-by-one? >= ?
> > > 
> > > Good catch; I'd also noticed that when I came to redo the zero_user_segments().
> > > 
> > 
> > -- 
> >  Kirill A. Shutemov

-- 
 Kirill A. Shutemov

  reply index

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-12  4:18 [PATCH v2 00/25] Large pages in the page cache Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 01/25] mm: Use vm_fault error code directly Matthew Wilcox
2020-02-12  7:34   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 02/25] mm: Optimise find_subpage for !THP Matthew Wilcox
2020-02-12  7:41   ` Christoph Hellwig
2020-02-12 13:02     ` Matthew Wilcox
2020-02-12 17:52       ` Christoph Hellwig
2020-02-13 13:50       ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 03/25] mm: Use VM_BUG_ON_PAGE in clear_page_dirty_for_io Matthew Wilcox
2020-02-12  7:38   ` Christoph Hellwig
2020-02-13 13:50   ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 04/25] mm: Unexport find_get_entry Matthew Wilcox
2020-02-12  7:37   ` Christoph Hellwig
2020-02-13 13:51   ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 05/25] mm: Fix documentation of FGP flags Matthew Wilcox
2020-02-12  7:42   ` Christoph Hellwig
2020-02-12 19:11     ` Matthew Wilcox
2020-02-13 14:00       ` Kirill A. Shutemov
2020-02-13 13:59   ` Kirill A. Shutemov
2020-02-13 14:34     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 06/25] mm: Allow hpages to be arbitrary order Matthew Wilcox
2020-02-13 14:11   ` Kirill A. Shutemov
2020-02-13 14:30     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 07/25] mm: Introduce thp_size Matthew Wilcox
2020-02-13 14:19   ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 08/25] mm: Introduce thp_order Matthew Wilcox
2020-02-13 14:20   ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 09/25] fs: Add a filesystem flag for large pages Matthew Wilcox
2020-02-12  7:43   ` Christoph Hellwig
2020-02-12 14:59     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 10/25] fs: Introduce i_blocks_per_page Matthew Wilcox
2020-02-12  7:44   ` Christoph Hellwig
2020-02-12 15:05     ` Matthew Wilcox
2020-02-12 17:54       ` Christoph Hellwig
2020-02-13 15:40   ` Kirill A. Shutemov
2020-02-13 16:07     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 11/25] fs: Make page_mkwrite_check_truncate thp-aware Matthew Wilcox
2020-02-13 15:44   ` Kirill A. Shutemov
2020-02-13 16:26     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 12/25] mm: Add file_offset_of_ helpers Matthew Wilcox
2020-02-12  7:46   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 13/25] fs: Add zero_user_large Matthew Wilcox
2020-02-14 13:52   ` Kirill A. Shutemov
2020-02-14 16:03     ` Matthew Wilcox
2020-02-18 14:16       ` Kirill A. Shutemov
2020-02-18 16:13         ` Matthew Wilcox
2020-02-18 17:10           ` Kirill A. Shutemov [this message]
2020-02-18 18:07             ` Matthew Wilcox
2020-02-21 12:42               ` Kirill A. Shutemov
2020-02-12  4:18 ` [PATCH v2 14/25] iomap: Support arbitrarily many blocks per page Matthew Wilcox
2020-02-12  8:05   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 15/25] iomap: Support large pages in iomap_adjust_read_range Matthew Wilcox
2020-02-12  8:11   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 16/25] iomap: Support large pages in read paths Matthew Wilcox
2020-02-12  8:13   ` Christoph Hellwig
2020-02-12 17:45     ` Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 17/25] iomap: Support large pages in write paths Matthew Wilcox
2020-02-12  8:17   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 18/25] iomap: Inline data shouldn't see large pages Matthew Wilcox
2020-02-12  8:05   ` Christoph Hellwig
2020-02-12  4:18 ` [PATCH v2 19/25] xfs: Support " Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 20/25] mm: Make prep_transhuge_page return its argument Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 21/25] mm: Add __page_cache_alloc_order Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 22/25] mm: Allow large pages to be added to the page cache Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 23/25] mm: Allow large pages to be removed from " Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 24/25] mm: Add large page readahead Matthew Wilcox
2020-02-12  4:18 ` [PATCH v2 25/25] mm: Align THP mappings for non-DAX Matthew Wilcox
2020-02-12  7:50   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200218171052.lwd56nr332qjgs5j@box \
    --to=kirill@shutemov.name \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git