* [PATCH v2] page cache: Store only head pages in i_pages
@ 2019-02-12 18:34 Matthew Wilcox
2019-02-13 14:41 ` Jan Kara
2019-02-14 13:30 ` Kirill A. Shutemov
0 siblings, 2 replies; 17+ messages in thread
From: Matthew Wilcox @ 2019-02-12 18:34 UTC (permalink / raw)
To: Kirill A . Shutemov, linux-mm, linux-fsdevel, linux-kernel
Cc: Matthew Wilcox, Hugh Dickins, William Kucharski
Transparent Huge Pages are currently stored in i_pages as pointers to
consecutive subpages. This patch changes that to storing consecutive
pointers to the head page in preparation for storing huge pages more
efficiently in i_pages.
Large parts of this are "inspired" by Kirill's patch
https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
v2: Rebase on top of linux-next 20190212
Fixed a missing s/head/page/ in filemap_map_pages
Include missing calls to xas_store() in __split_huge_page
include/linux/pagemap.h | 9 ++++
mm/filemap.c | 99 +++++++++++++----------------------------
mm/huge_memory.c | 3 ++
mm/khugepaged.c | 4 +-
mm/shmem.c | 2 +-
mm/swap_state.c | 2 +-
6 files changed, 46 insertions(+), 73 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index bcf909d0de5f..7d58e4e0b68e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -333,6 +333,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
+static inline struct page *find_subpage(struct page *page, pgoff_t offset)
+{
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ VM_BUG_ON_PAGE(page->index > offset, page);
+ VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) <= offset,
+ page);
+ return page - page->index + offset;
+}
+
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
diff --git a/mm/filemap.c b/mm/filemap.c
index 5673672fd444..ee28028c4323 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1491,7 +1491,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
{
XA_STATE(xas, &mapping->i_pages, offset);
- struct page *head, *page;
+ struct page *page;
rcu_read_lock();
repeat:
@@ -1506,25 +1506,19 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
if (!page || xa_is_value(page))
goto out;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
- goto repeat;
-
- /* The page was split under us? */
- if (compound_head(page) != head) {
- put_page(head);
+ if (!page_cache_get_speculative(page))
goto repeat;
- }
/*
- * Has the page moved?
+ * Has the page moved or been split?
* This is part of the lockless pagecache protocol. See
* include/linux/pagemap.h for details.
*/
if (unlikely(page != xas_reload(&xas))) {
- put_page(head);
+ put_page(page);
goto repeat;
}
+ page = find_subpage(page, offset);
out:
rcu_read_unlock();
@@ -1706,7 +1700,6 @@ unsigned find_get_entries(struct address_space *mapping,
rcu_read_lock();
xas_for_each(&xas, page, ULONG_MAX) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1717,17 +1710,13 @@ unsigned find_get_entries(struct address_space *mapping,
if (xa_is_value(page))
goto export;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
+ page = find_subpage(page, xas.xa_index);
export:
indices[ret] = xas.xa_index;
@@ -1736,7 +1725,7 @@ unsigned find_get_entries(struct address_space *mapping,
break;
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -1778,33 +1767,27 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
rcu_read_lock();
xas_for_each(&xas, page, end) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/* Skip over shadow, swap and DAX entries */
if (xa_is_value(page))
continue;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = page;
+ pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages) {
*start = page->index + 1;
goto out;
}
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -1849,7 +1832,6 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
rcu_read_lock();
for (page = xas_load(&xas); page; page = xas_next(&xas)) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1859,24 +1841,19 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
if (xa_is_value(page))
break;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = page;
+ pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages)
break;
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -1912,7 +1889,6 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
rcu_read_lock();
xas_for_each_marked(&xas, page, end, tag) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1923,26 +1899,21 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
if (xa_is_value(page))
continue;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = page;
+ pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages) {
*index = page->index + 1;
goto out;
}
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -1991,7 +1962,6 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
rcu_read_lock();
xas_for_each_marked(&xas, page, ULONG_MAX, tag) {
- struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -2002,17 +1972,13 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
if (xa_is_value(page))
goto export;
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto retry;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto put_page;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
+ page = find_subpage(page, xas.xa_index);
export:
indices[ret] = xas.xa_index;
@@ -2021,7 +1987,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
break;
continue;
put_page:
- put_page(head);
+ put_page(page);
retry:
xas_reset(&xas);
}
@@ -2686,7 +2652,7 @@ void filemap_map_pages(struct vm_fault *vmf,
pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
- struct page *head, *page;
+ struct page *page;
rcu_read_lock();
xas_for_each(&xas, page, end_pgoff) {
@@ -2695,24 +2661,19 @@ void filemap_map_pages(struct vm_fault *vmf,
if (xa_is_value(page))
goto next;
- head = compound_head(page);
-
/*
* Check for a locked page first, as a speculative
* reference may adversely influence page migration.
*/
- if (PageLocked(head))
+ if (PageLocked(page))
goto next;
- if (!page_cache_get_speculative(head))
+ if (!page_cache_get_speculative(page))
goto next;
- /* The page was split under us? */
- if (compound_head(page) != head)
- goto skip;
-
- /* Has the page moved? */
+ /* Has the page moved or been split? */
if (unlikely(page != xas_reload(&xas)))
goto skip;
+ page = find_subpage(page, xas.xa_index);
if (!PageUptodate(page) ||
PageReadahead(page) ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d4847026d4b1..7008174c033b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2458,6 +2458,9 @@ static void __split_huge_page(struct page *page, struct list_head *list,
if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
shmem_uncharge(head->mapping->host, 1);
put_page(head + i);
+ } else if (!PageAnon(page)) {
+ __xa_store(&head->mapping->i_pages, head[i].index,
+ head + i, 0);
}
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 449044378782..7ba7a1e4fa79 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1374,7 +1374,7 @@ static void collapse_shmem(struct mm_struct *mm,
result = SCAN_FAIL;
goto xa_locked;
}
- xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
+ xas_store(&xas, new_page);
nr_none++;
continue;
}
@@ -1450,7 +1450,7 @@ static void collapse_shmem(struct mm_struct *mm,
list_add_tail(&page->lru, &pagelist);
/* Finally, replace with the new page. */
- xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
+ xas_store(&xas, new_page);
continue;
out_unlock:
unlock_page(page);
diff --git a/mm/shmem.c b/mm/shmem.c
index c8cdaa012f18..a78d4f05a51f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -614,7 +614,7 @@ static int shmem_add_to_page_cache(struct page *page,
if (xas_error(&xas))
goto unlock;
next:
- xas_store(&xas, page + i);
+ xas_store(&xas, page);
if (++i < nr) {
xas_next(&xas);
goto next;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 85245fdec8d9..c5da342b5dba 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -132,7 +132,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
for (i = 0; i < nr; i++) {
VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
set_page_private(page + i, entry.val + i);
- xas_store(&xas, page + i);
+ xas_store(&xas, page);
xas_next(&xas);
}
address_space->nrpages += nr;
--
2.20.1
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-12 18:34 [PATCH v2] page cache: Store only head pages in i_pages Matthew Wilcox
@ 2019-02-13 14:41 ` Jan Kara
2019-02-13 20:17 ` Matthew Wilcox
2019-02-14 13:30 ` Kirill A. Shutemov
1 sibling, 1 reply; 17+ messages in thread
From: Jan Kara @ 2019-02-13 14:41 UTC (permalink / raw)
To: Matthew Wilcox
Cc: Kirill A . Shutemov, linux-mm, linux-fsdevel, linux-kernel,
Hugh Dickins, William Kucharski
On Tue 12-02-19 10:34:54, Matthew Wilcox wrote:
> Transparent Huge Pages are currently stored in i_pages as pointers to
> consecutive subpages. This patch changes that to storing consecutive
> pointers to the head page in preparation for storing huge pages more
> efficiently in i_pages.
>
> Large parts of this are "inspired" by Kirill's patch
> https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/
>
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
I like the idea!
> @@ -1778,33 +1767,27 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
>
> rcu_read_lock();
> xas_for_each(&xas, page, end) {
> - struct page *head;
> if (xas_retry(&xas, page))
> continue;
> /* Skip over shadow, swap and DAX entries */
> if (xa_is_value(page))
> continue;
>
> - head = compound_head(page);
> - if (!page_cache_get_speculative(head))
> + if (!page_cache_get_speculative(page))
> goto retry;
>
> - /* The page was split under us? */
> - if (compound_head(page) != head)
> - goto put_page;
> -
> - /* Has the page moved? */
> + /* Has the page moved or been split? */
> if (unlikely(page != xas_reload(&xas)))
> goto put_page;
>
> - pages[ret] = page;
> + pages[ret] = find_subpage(page, xas.xa_index);
> if (++ret == nr_pages) {
> *start = page->index + 1;
> goto out;
> }
So this subtly changes the behavior because now we will be returning in
'*start' a different index. So you should rather use 'pages[ret]->index'
instead.
> @@ -1923,26 +1899,21 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
> if (xa_is_value(page))
> continue;
>
> - head = compound_head(page);
> - if (!page_cache_get_speculative(head))
> + if (!page_cache_get_speculative(page))
> goto retry;
>
> - /* The page was split under us? */
> - if (compound_head(page) != head)
> - goto put_page;
> -
> - /* Has the page moved? */
> + /* Has the page moved or been split? */
> if (unlikely(page != xas_reload(&xas)))
> goto put_page;
>
> - pages[ret] = page;
> + pages[ret] = find_subpage(page, xas.xa_index);
> if (++ret == nr_pages) {
> *index = page->index + 1;
> goto out;
> }
Ditto here.
Otherwise the patch looks good to me so feel free to add:
Acked-by: Jan Kara <jack@suse.cz>
after fixing these two.
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-13 14:41 ` Jan Kara
@ 2019-02-13 20:17 ` Matthew Wilcox
2019-02-14 16:27 ` Jan Kara
0 siblings, 1 reply; 17+ messages in thread
From: Matthew Wilcox @ 2019-02-13 20:17 UTC (permalink / raw)
To: Jan Kara
Cc: Kirill A . Shutemov, linux-mm, linux-fsdevel, linux-kernel,
Hugh Dickins, William Kucharski
On Wed, Feb 13, 2019 at 03:41:02PM +0100, Jan Kara wrote:
> On Tue 12-02-19 10:34:54, Matthew Wilcox wrote:
> > Transparent Huge Pages are currently stored in i_pages as pointers to
> > consecutive subpages. This patch changes that to storing consecutive
> > pointers to the head page in preparation for storing huge pages more
> > efficiently in i_pages.
> >
> > Large parts of this are "inspired" by Kirill's patch
> > https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/
> >
> > Signed-off-by: Matthew Wilcox <willy@infradead.org>
>
> I like the idea!
Thanks! It's a step towards reducing the overhead of huge pages in the
page cache from (on x86) 520 pointers to a mere 8. Still not as good as
hugetlbfs, but I'm working on that too.
> > - pages[ret] = page;
> > + pages[ret] = find_subpage(page, xas.xa_index);
> > if (++ret == nr_pages) {
> > *start = page->index + 1;
> > goto out;
> > }
>
> So this subtly changes the behavior because now we will be returning in
> '*start' a different index. So you should rather use 'pages[ret]->index'
> instead.
You're right, I made a mistake there. However, seeing this:
https://lore.kernel.org/lkml/20190110030838.84446-1-yuzhao@google.com/
makes me think that I should be using xa_index + 1 there.
> Otherwise the patch looks good to me so feel free to add:
>
> Acked-by: Jan Kara <jack@suse.cz>
>
> after fixing these two.
Thanks!
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-13 20:17 ` Matthew Wilcox
@ 2019-02-14 16:27 ` Jan Kara
0 siblings, 0 replies; 17+ messages in thread
From: Jan Kara @ 2019-02-14 16:27 UTC (permalink / raw)
To: Matthew Wilcox
Cc: Jan Kara, Kirill A . Shutemov, linux-mm, linux-fsdevel,
linux-kernel, Hugh Dickins, William Kucharski
On Wed 13-02-19 12:17:15, Matthew Wilcox wrote:
> > > - pages[ret] = page;
> > > + pages[ret] = find_subpage(page, xas.xa_index);
> > > if (++ret == nr_pages) {
> > > *start = page->index + 1;
> > > goto out;
> > > }
> >
> > So this subtly changes the behavior because now we will be returning in
> > '*start' a different index. So you should rather use 'pages[ret]->index'
> > instead.
>
> You're right, I made a mistake there. However, seeing this:
> https://lore.kernel.org/lkml/20190110030838.84446-1-yuzhao@google.com/
>
> makes me think that I should be using xa_index + 1 there.
Yeah, you're right. Thanks!
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-12 18:34 [PATCH v2] page cache: Store only head pages in i_pages Matthew Wilcox
2019-02-13 14:41 ` Jan Kara
@ 2019-02-14 13:30 ` Kirill A. Shutemov
2019-02-14 20:53 ` Matthew Wilcox
` (3 more replies)
1 sibling, 4 replies; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-02-14 13:30 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Tue, Feb 12, 2019 at 10:34:54AM -0800, Matthew Wilcox wrote:
> Transparent Huge Pages are currently stored in i_pages as pointers to
> consecutive subpages. This patch changes that to storing consecutive
> pointers to the head page in preparation for storing huge pages more
> efficiently in i_pages.
>
> Large parts of this are "inspired" by Kirill's patch
> https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/
>
> Signed-off-by: Matthew Wilcox <willy@infradead.org>
I believe I found few missing pieces:
- page_cache_delete_batch() will blow up on
VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
!= pvec->pages[i]->index, page);
- migrate_page_move_mapping() has to be converted too.
The rest *looks* fine to me. But it needs a lot of testing.
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 13:30 ` Kirill A. Shutemov
@ 2019-02-14 20:53 ` Matthew Wilcox
2019-02-14 22:03 ` Kirill A. Shutemov
2019-02-14 21:17 ` Matthew Wilcox
` (2 subsequent siblings)
3 siblings, 1 reply; 17+ messages in thread
From: Matthew Wilcox @ 2019-02-14 20:53 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> - page_cache_delete_batch() will blow up on
>
> VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
> != pvec->pages[i]->index, page);
Quite right. I decided to rewrite page_cache_delete_batch. What do you
(and Jan!) think to this? Compile-tested only.
diff --git a/mm/filemap.c b/mm/filemap.c
index 0d71b1acf811..facaa6913ffa 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -279,11 +279,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
* @pvec: pagevec with pages to delete
*
* The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index.
+ * from the mapping. The function expects @pvec to be sorted by page index
+ * and is optimised for it to be dense.
* It tolerates holes in @pvec (mapping entries at those indices are not
* modified). The function expects only THP head pages to be present in the
- * @pvec and takes care to delete all corresponding tail pages from the
- * mapping as well.
+ * @pvec.
*
* The function expects the i_pages lock to be held.
*/
@@ -292,40 +292,36 @@ static void page_cache_delete_batch(struct address_space *mapping,
{
XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
int total_pages = 0;
- int i = 0, tail_pages = 0;
+ int i = 0;
struct page *page;
mapping_set_update(&xas, mapping);
xas_for_each(&xas, page, ULONG_MAX) {
- if (i >= pagevec_count(pvec) && !tail_pages)
+ if (i >= pagevec_count(pvec))
break;
+
+ /* A swap/dax/shadow entry got inserted? Skip it. */
if (xa_is_value(page))
continue;
- if (!tail_pages) {
- /*
- * Some page got inserted in our range? Skip it. We
- * have our pages locked so they are protected from
- * being removed.
- */
- if (page != pvec->pages[i]) {
- VM_BUG_ON_PAGE(page->index >
- pvec->pages[i]->index, page);
- continue;
- }
- WARN_ON_ONCE(!PageLocked(page));
- if (PageTransHuge(page) && !PageHuge(page))
- tail_pages = HPAGE_PMD_NR - 1;
+ /*
+ * A page got inserted in our range? Skip it. We have our
+ * pages locked so they are protected from being removed.
+ */
+ if (page != pvec->pages[i]) {
+ VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
+ page);
+ continue;
+ }
+
+ WARN_ON_ONCE(!PageLocked(page));
+
+ if (page->index == xas.xa_index)
page->mapping = NULL;
- /*
- * Leave page->index set: truncation lookup relies
- * upon it
- */
+ /* Leave page->index set: truncation lookup relies on it */
+
+ if (page->index + (1UL << compound_order(page)) - 1 ==
+ xas.xa_index)
i++;
- } else {
- VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
- != pvec->pages[i]->index, page);
- tail_pages--;
- }
xas_store(&xas, NULL);
total_pages++;
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 20:53 ` Matthew Wilcox
@ 2019-02-14 22:03 ` Kirill A. Shutemov
2019-02-14 22:25 ` Matthew Wilcox
0 siblings, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-02-14 22:03 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Thu, Feb 14, 2019 at 12:53:31PM -0800, Matthew Wilcox wrote:
> On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> > - page_cache_delete_batch() will blow up on
> >
> > VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
> > != pvec->pages[i]->index, page);
>
> Quite right. I decided to rewrite page_cache_delete_batch. What do you
> (and Jan!) think to this? Compile-tested only.
>
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 0d71b1acf811..facaa6913ffa 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -279,11 +279,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
> * @pvec: pagevec with pages to delete
> *
> * The function walks over mapping->i_pages and removes pages passed in @pvec
> - * from the mapping. The function expects @pvec to be sorted by page index.
> + * from the mapping. The function expects @pvec to be sorted by page index
> + * and is optimised for it to be dense.
> * It tolerates holes in @pvec (mapping entries at those indices are not
> * modified). The function expects only THP head pages to be present in the
> - * @pvec and takes care to delete all corresponding tail pages from the
> - * mapping as well.
> + * @pvec.
> *
> * The function expects the i_pages lock to be held.
> */
> @@ -292,40 +292,36 @@ static void page_cache_delete_batch(struct address_space *mapping,
> {
> XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
> int total_pages = 0;
> - int i = 0, tail_pages = 0;
> + int i = 0;
> struct page *page;
>
> mapping_set_update(&xas, mapping);
> xas_for_each(&xas, page, ULONG_MAX) {
> - if (i >= pagevec_count(pvec) && !tail_pages)
> + if (i >= pagevec_count(pvec))
> break;
> +
> + /* A swap/dax/shadow entry got inserted? Skip it. */
> if (xa_is_value(page))
> continue;
> - if (!tail_pages) {
> - /*
> - * Some page got inserted in our range? Skip it. We
> - * have our pages locked so they are protected from
> - * being removed.
> - */
> - if (page != pvec->pages[i]) {
> - VM_BUG_ON_PAGE(page->index >
> - pvec->pages[i]->index, page);
> - continue;
> - }
> - WARN_ON_ONCE(!PageLocked(page));
> - if (PageTransHuge(page) && !PageHuge(page))
> - tail_pages = HPAGE_PMD_NR - 1;
> + /*
> + * A page got inserted in our range? Skip it. We have our
> + * pages locked so they are protected from being removed.
> + */
> + if (page != pvec->pages[i]) {
Maybe a comment for the VM_BUG while you're there?
> + VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
> + page);
> + continue;
> + }
> +
> + WARN_ON_ONCE(!PageLocked(page));
> +
> + if (page->index == xas.xa_index)
> page->mapping = NULL;
> - /*
> - * Leave page->index set: truncation lookup relies
> - * upon it
> - */
> + /* Leave page->index set: truncation lookup relies on it */
> +
> + if (page->index + (1UL << compound_order(page)) - 1 ==
> + xas.xa_index)
It's 1am here and I'm slow, but it took me few minutes to understand how
it works. Please add a comment.
> i++;
> - } else {
> - VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
> - != pvec->pages[i]->index, page);
> - tail_pages--;
> - }
> xas_store(&xas, NULL);
> total_pages++;
> }
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 22:03 ` Kirill A. Shutemov
@ 2019-02-14 22:25 ` Matthew Wilcox
0 siblings, 0 replies; 17+ messages in thread
From: Matthew Wilcox @ 2019-02-14 22:25 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Fri, Feb 15, 2019 at 01:03:44AM +0300, Kirill A. Shutemov wrote:
> > + /*
> > + * A page got inserted in our range? Skip it. We have our
> > + * pages locked so they are protected from being removed.
> > + */
> > + if (page != pvec->pages[i]) {
>
> Maybe a comment for the VM_BUG while you're there?
Great idea. I didn't understand it the first time I looked at it either,
but I forgot to write a comment when I figured it out.
/*
* A page got inserted in our range? Skip it. We have our
* pages locked so they are protected from being removed.
+ * If we see a page whose index is higher than ours, it
+ * means our page has been removed, which shouldn't be
+ * possible because we're holding the PageLock.
*/
if (page != pvec->pages[i]) {
> > + /* Leave page->index set: truncation lookup relies on it */
> > +
> > + if (page->index + (1UL << compound_order(page)) - 1 ==
> > + xas.xa_index)
>
> It's 1am here and I'm slow, but it took me few minutes to understand how
> it works. Please add a comment.
I should get you to review at 1am more often! You're quite right.
Sleep-deprived Kirill spots problems that normal people would encounter.
/* Leave page->index set: truncation lookup relies on it */
+ /*
+ * Move to the next page in the vector if this is a small page
+ * or the index is of the last page in this compound page).
+ */
if (page->index + (1UL << compound_order(page)) - 1 ==
xas.xa_index)
i++;
Thanks for the review.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 13:30 ` Kirill A. Shutemov
2019-02-14 20:53 ` Matthew Wilcox
@ 2019-02-14 21:17 ` Matthew Wilcox
2019-02-14 22:08 ` Kirill A. Shutemov
2019-02-14 22:29 ` Kirill A. Shutemov
2019-02-14 22:41 ` Kirill A. Shutemov
3 siblings, 1 reply; 17+ messages in thread
From: Matthew Wilcox @ 2019-02-14 21:17 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> - migrate_page_move_mapping() has to be converted too.
I think that's as simple as:
+++ b/mm/migrate.c
@@ -465,7 +465,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
for (i = 1; i < HPAGE_PMD_NR; i++) {
xas_next(&xas);
- xas_store(&xas, newpage + i);
+ xas_store(&xas, newpage);
}
}
or do you see something else I missed?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 21:17 ` Matthew Wilcox
@ 2019-02-14 22:08 ` Kirill A. Shutemov
2019-02-14 22:11 ` Matthew Wilcox
0 siblings, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-02-14 22:08 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Thu, Feb 14, 2019 at 01:17:57PM -0800, Matthew Wilcox wrote:
> On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> > - migrate_page_move_mapping() has to be converted too.
>
> I think that's as simple as:
>
> +++ b/mm/migrate.c
> @@ -465,7 +465,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
>
> for (i = 1; i < HPAGE_PMD_NR; i++) {
> xas_next(&xas);
> - xas_store(&xas, newpage + i);
> + xas_store(&xas, newpage);
> }
> }
>
>
> or do you see something else I missed?
Looks right to me.
BTW, maybe some add syntax sugar from XArray side?
Replace the loop and xas_store() before it with:
xas_fill(&xas, newpage, 1UL << compound_order(newpage));
or something similar?
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 22:08 ` Kirill A. Shutemov
@ 2019-02-14 22:11 ` Matthew Wilcox
2019-02-14 22:29 ` Kirill A. Shutemov
0 siblings, 1 reply; 17+ messages in thread
From: Matthew Wilcox @ 2019-02-14 22:11 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Fri, Feb 15, 2019 at 01:08:10AM +0300, Kirill A. Shutemov wrote:
> On Thu, Feb 14, 2019 at 01:17:57PM -0800, Matthew Wilcox wrote:
> > On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> > > - migrate_page_move_mapping() has to be converted too.
> >
> > I think that's as simple as:
> >
> > +++ b/mm/migrate.c
> > @@ -465,7 +465,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
> >
> > for (i = 1; i < HPAGE_PMD_NR; i++) {
> > xas_next(&xas);
> > - xas_store(&xas, newpage + i);
> > + xas_store(&xas, newpage);
> > }
> > }
> >
> >
> > or do you see something else I missed?
>
> Looks right to me.
>
> BTW, maybe some add syntax sugar from XArray side?
>
> Replace the loop and xas_store() before it with:
>
> xas_fill(&xas, newpage, 1UL << compound_order(newpage));
>
> or something similar?
If we were keeping this code longterm, then yes, something like that
would be great. I'm hoping this code is a mere stepping stone towards
using multi-slot entries for the page cache.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 22:11 ` Matthew Wilcox
@ 2019-02-14 22:29 ` Kirill A. Shutemov
0 siblings, 0 replies; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-02-14 22:29 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Thu, Feb 14, 2019 at 02:11:58PM -0800, Matthew Wilcox wrote:
> On Fri, Feb 15, 2019 at 01:08:10AM +0300, Kirill A. Shutemov wrote:
> > On Thu, Feb 14, 2019 at 01:17:57PM -0800, Matthew Wilcox wrote:
> > > On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> > > > - migrate_page_move_mapping() has to be converted too.
> > >
> > > I think that's as simple as:
> > >
> > > +++ b/mm/migrate.c
> > > @@ -465,7 +465,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
> > >
> > > for (i = 1; i < HPAGE_PMD_NR; i++) {
> > > xas_next(&xas);
> > > - xas_store(&xas, newpage + i);
> > > + xas_store(&xas, newpage);
> > > }
> > > }
> > >
> > >
> > > or do you see something else I missed?
> >
> > Looks right to me.
> >
> > BTW, maybe some add syntax sugar from XArray side?
> >
> > Replace the loop and xas_store() before it with:
> >
> > xas_fill(&xas, newpage, 1UL << compound_order(newpage));
> >
> > or something similar?
>
> If we were keeping this code longterm, then yes, something like that
> would be great. I'm hoping this code is a mere stepping stone towards
> using multi-slot entries for the page cache.
Fair enough.
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 13:30 ` Kirill A. Shutemov
2019-02-14 20:53 ` Matthew Wilcox
2019-02-14 21:17 ` Matthew Wilcox
@ 2019-02-14 22:29 ` Kirill A. Shutemov
2019-02-15 20:28 ` Matthew Wilcox
2019-02-14 22:41 ` Kirill A. Shutemov
3 siblings, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-02-14 22:29 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> On Tue, Feb 12, 2019 at 10:34:54AM -0800, Matthew Wilcox wrote:
> > Transparent Huge Pages are currently stored in i_pages as pointers to
> > consecutive subpages. This patch changes that to storing consecutive
> > pointers to the head page in preparation for storing huge pages more
> > efficiently in i_pages.
> >
> > Large parts of this are "inspired" by Kirill's patch
> > https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/
> >
> > Signed-off-by: Matthew Wilcox <willy@infradead.org>
>
> I believe I found few missing pieces:
>
> - page_cache_delete_batch() will blow up on
>
> VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
> != pvec->pages[i]->index, page);
>
> - migrate_page_move_mapping() has to be converted too.
Other missing pieces are memfd_wait_for_pins() and memfd_tag_pins()
We need to call page_mapcount() for tail pages there.
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 22:29 ` Kirill A. Shutemov
@ 2019-02-15 20:28 ` Matthew Wilcox
0 siblings, 0 replies; 17+ messages in thread
From: Matthew Wilcox @ 2019-02-15 20:28 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Fri, Feb 15, 2019 at 01:29:44AM +0300, Kirill A. Shutemov wrote:
> On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> > On Tue, Feb 12, 2019 at 10:34:54AM -0800, Matthew Wilcox wrote:
> > > Transparent Huge Pages are currently stored in i_pages as pointers to
> > > consecutive subpages. This patch changes that to storing consecutive
> > > pointers to the head page in preparation for storing huge pages more
> > > efficiently in i_pages.
> > >
> > > Large parts of this are "inspired" by Kirill's patch
> > > https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/
> > >
> > > Signed-off-by: Matthew Wilcox <willy@infradead.org>
> >
> > I believe I found few missing pieces:
> >
> > - page_cache_delete_batch() will blow up on
> >
> > VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
> > != pvec->pages[i]->index, page);
> >
> > - migrate_page_move_mapping() has to be converted too.
>
> Other missing pieces are memfd_wait_for_pins() and memfd_tag_pins()
> We need to call page_mapcount() for tail pages there.
@@ -39,6 +39,7 @@ static void memfd_tag_pins(struct xa_state *xas)
xas_for_each(xas, page, ULONG_MAX) {
if (xa_is_value(page))
continue;
+ page = find_subpage(page, xas.xa_index);
if (page_count(page) - page_mapcount(page) > 1)
xas_set_mark(xas, MEMFD_TAG_PINNED);
@@ -88,6 +89,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
bool clear = true;
if (xa_is_value(page))
continue;
+ page = find_subpage(page, xas.xa_index);
if (page_count(page) - page_mapcount(page) != 1) {
/*
* On the last scan, we clean up all those tags
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 13:30 ` Kirill A. Shutemov
` (2 preceding siblings ...)
2019-02-14 22:29 ` Kirill A. Shutemov
@ 2019-02-14 22:41 ` Kirill A. Shutemov
2019-02-15 20:20 ` Matthew Wilcox
3 siblings, 1 reply; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-02-14 22:41 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Thu, Feb 14, 2019 at 04:30:04PM +0300, Kirill A. Shutemov wrote:
> On Tue, Feb 12, 2019 at 10:34:54AM -0800, Matthew Wilcox wrote:
> > Transparent Huge Pages are currently stored in i_pages as pointers to
> > consecutive subpages. This patch changes that to storing consecutive
> > pointers to the head page in preparation for storing huge pages more
> > efficiently in i_pages.
> >
> > Large parts of this are "inspired" by Kirill's patch
> > https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/
> >
> > Signed-off-by: Matthew Wilcox <willy@infradead.org>
>
> I believe I found few missing pieces:
>
> - page_cache_delete_batch() will blow up on
>
> VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
> != pvec->pages[i]->index, page);
>
> - migrate_page_move_mapping() has to be converted too.
- __delete_from_swap_cache() will blow up on
VM_BUG_ON_PAGE(entry != page + i, entry);
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-14 22:41 ` Kirill A. Shutemov
@ 2019-02-15 20:20 ` Matthew Wilcox
2019-02-15 21:17 ` Kirill A. Shutemov
0 siblings, 1 reply; 17+ messages in thread
From: Matthew Wilcox @ 2019-02-15 20:20 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Fri, Feb 15, 2019 at 01:41:15AM +0300, Kirill A. Shutemov wrote:
> - __delete_from_swap_cache() will blow up on
>
> VM_BUG_ON_PAGE(entry != page + i, entry);
Right.
@@ -167,7 +167,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
for (i = 0; i < nr; i++) {
void *entry = xas_store(&xas, NULL);
- VM_BUG_ON_PAGE(entry != page + i, entry);
+ VM_BUG_ON_PAGE(entry != page, entry);
set_page_private(page + i, 0);
xas_next(&xas);
}
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v2] page cache: Store only head pages in i_pages
2019-02-15 20:20 ` Matthew Wilcox
@ 2019-02-15 21:17 ` Kirill A. Shutemov
0 siblings, 0 replies; 17+ messages in thread
From: Kirill A. Shutemov @ 2019-02-15 21:17 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-mm, linux-fsdevel, linux-kernel, Hugh Dickins, William Kucharski
On Fri, Feb 15, 2019 at 12:20:57PM -0800, Matthew Wilcox wrote:
> On Fri, Feb 15, 2019 at 01:41:15AM +0300, Kirill A. Shutemov wrote:
> > - __delete_from_swap_cache() will blow up on
> >
> > VM_BUG_ON_PAGE(entry != page + i, entry);
>
> Right.
Thanks. I think this is the last one I found.
Could you send v3 with all fixups applied?
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2019-02-15 21:17 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-12 18:34 [PATCH v2] page cache: Store only head pages in i_pages Matthew Wilcox
2019-02-13 14:41 ` Jan Kara
2019-02-13 20:17 ` Matthew Wilcox
2019-02-14 16:27 ` Jan Kara
2019-02-14 13:30 ` Kirill A. Shutemov
2019-02-14 20:53 ` Matthew Wilcox
2019-02-14 22:03 ` Kirill A. Shutemov
2019-02-14 22:25 ` Matthew Wilcox
2019-02-14 21:17 ` Matthew Wilcox
2019-02-14 22:08 ` Kirill A. Shutemov
2019-02-14 22:11 ` Matthew Wilcox
2019-02-14 22:29 ` Kirill A. Shutemov
2019-02-14 22:29 ` Kirill A. Shutemov
2019-02-15 20:28 ` Matthew Wilcox
2019-02-14 22:41 ` Kirill A. Shutemov
2019-02-15 20:20 ` Matthew Wilcox
2019-02-15 21:17 ` Kirill A. Shutemov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).