* [PATCH v2 1/8] mm: Factor find_get_incore_page out of mincore_page
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
@ 2020-09-10 18:33 ` Matthew Wilcox (Oracle)
2020-09-10 18:33 ` [PATCH v2 2/8] mm: Use find_get_incore_page in memcontrol Matthew Wilcox (Oracle)
` (7 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-10 18:33 UTC (permalink / raw)
To: linux-mm
Cc: Matthew Wilcox (Oracle),
Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
Provide this functionality from the swap cache. It's useful for
more than just mincore().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/swap.h | 7 +++++++
mm/mincore.c | 28 ++--------------------------
mm/swap_state.c | 32 ++++++++++++++++++++++++++++++++
3 files changed, 41 insertions(+), 26 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 661046994db4..df87de38dca5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -427,6 +427,7 @@ extern void free_pages_and_swap_cache(struct page **, int);
extern struct page *lookup_swap_cache(swp_entry_t entry,
struct vm_area_struct *vma,
unsigned long addr);
+struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index);
extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
struct vm_area_struct *vma, unsigned long addr,
bool do_poll);
@@ -569,6 +570,12 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp,
return NULL;
}
+static inline
+struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
+{
+ return find_get_page(mapping, index);
+}
+
static inline int add_to_swap(struct page *page)
{
return 0;
diff --git a/mm/mincore.c b/mm/mincore.c
index 453ff112470f..02db1a834021 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -48,7 +48,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
* and is up to date; i.e. that no page-in operation would be required
* at this time if an application were to map and access this page.
*/
-static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
+static unsigned char mincore_page(struct address_space *mapping, pgoff_t index)
{
unsigned char present = 0;
struct page *page;
@@ -59,31 +59,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
* any other file mapping (ie. marked !present and faulted in with
* tmpfs's .fault). So swapped out tmpfs mappings are tested here.
*/
-#ifdef CONFIG_SWAP
- if (shmem_mapping(mapping)) {
- page = find_get_entry(mapping, pgoff);
- /*
- * shmem/tmpfs may return swap: account for swapcache
- * page too.
- */
- if (xa_is_value(page)) {
- swp_entry_t swp = radix_to_swp_entry(page);
- struct swap_info_struct *si;
-
- /* Prevent swap device to being swapoff under us */
- si = get_swap_device(swp);
- if (si) {
- page = find_get_page(swap_address_space(swp),
- swp_offset(swp));
- put_swap_device(si);
- } else
- page = NULL;
- }
- } else
- page = find_get_page(mapping, pgoff);
-#else
- page = find_get_page(mapping, pgoff);
-#endif
+ page = find_get_incore_page(mapping, index);
if (page) {
present = PageUptodate(page);
put_page(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index c16eebb81d8b..c79e2242dd04 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
+#include <linux/shmem_fs.h>
#include "internal.h"
/*
@@ -414,6 +415,37 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
return page;
}
+/**
+ * find_get_incore_page - Find and get a page from the page or swap caches.
+ * @mapping: The address_space to search.
+ * @index: The page cache index.
+ *
+ * This differs from find_get_page() in that it will also look for the
+ * page in the swap cache.
+ *
+ * Return: The found page or %NULL.
+ */
+struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
+{
+ swp_entry_t swp;
+ struct swap_info_struct *si;
+ struct page *page = find_get_entry(mapping, index);
+
+ if (!xa_is_value(page))
+ return page;
+ if (!shmem_mapping(mapping))
+ return NULL;
+
+ swp = radix_to_swp_entry(page);
+ /* Prevent swapoff from happening to us */
+ si = get_swap_device(swp);
+ if (!si)
+ return NULL;
+ page = find_get_page(swap_address_space(swp), swp_offset(swp));
+ put_swap_device(si);
+ return page;
+}
+
struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct vm_area_struct *vma, unsigned long addr,
bool *new_page_allocated)
--
2.28.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 2/8] mm: Use find_get_incore_page in memcontrol
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
2020-09-10 18:33 ` [PATCH v2 1/8] mm: Factor find_get_incore_page out of mincore_page Matthew Wilcox (Oracle)
@ 2020-09-10 18:33 ` Matthew Wilcox (Oracle)
2020-09-10 18:33 ` [PATCH v2 3/8] mm: Optimise madvise WILLNEED Matthew Wilcox (Oracle)
` (6 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-10 18:33 UTC (permalink / raw)
To: linux-mm
Cc: Matthew Wilcox (Oracle),
Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
The current code does not protect against swapoff of the underlying
swap device, so this is a bug fix as well as a worthwhile reduction in
code complexity.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/memcontrol.c | 24 ++----------------------
1 file changed, 2 insertions(+), 22 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b807952b4d43..2f02eaee7115 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5539,35 +5539,15 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, swp_entry_t *entry)
{
- struct page *page = NULL;
- struct address_space *mapping;
- pgoff_t pgoff;
-
if (!vma->vm_file) /* anonymous vma */
return NULL;
if (!(mc.flags & MOVE_FILE))
return NULL;
- mapping = vma->vm_file->f_mapping;
- pgoff = linear_page_index(vma, addr);
-
/* page is moved even if it's not RSS of this task(page-faulted). */
-#ifdef CONFIG_SWAP
/* shmem/tmpfs may report page out on swap: account for that too. */
- if (shmem_mapping(mapping)) {
- page = find_get_entry(mapping, pgoff);
- if (xa_is_value(page)) {
- swp_entry_t swp = radix_to_swp_entry(page);
- *entry = swp;
- page = find_get_page(swap_address_space(swp),
- swp_offset(swp));
- }
- } else
- page = find_get_page(mapping, pgoff);
-#else
- page = find_get_page(mapping, pgoff);
-#endif
- return page;
+ return find_get_incore_page(vma->vm_file->f_mapping,
+ linear_page_index(vma, addr));
}
/**
--
2.28.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 3/8] mm: Optimise madvise WILLNEED
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
2020-09-10 18:33 ` [PATCH v2 1/8] mm: Factor find_get_incore_page out of mincore_page Matthew Wilcox (Oracle)
2020-09-10 18:33 ` [PATCH v2 2/8] mm: Use find_get_incore_page in memcontrol Matthew Wilcox (Oracle)
@ 2020-09-10 18:33 ` Matthew Wilcox (Oracle)
2020-09-14 16:17 ` Qian Cai
2020-09-10 18:33 ` [PATCH v2 4/8] proc: Optimise smaps for shmem entries Matthew Wilcox (Oracle)
` (5 subsequent siblings)
8 siblings, 1 reply; 18+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-10 18:33 UTC (permalink / raw)
To: linux-mm
Cc: Matthew Wilcox (Oracle),
Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
Instead of calling find_get_entry() for every page index, use an XArray
iterator to skip over NULL entries, and avoid calling get_page(),
because we only want the swap entries.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
mm/madvise.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/mm/madvise.c b/mm/madvise.c
index dd1d43cf026d..96189acd6969 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -224,25 +224,28 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct address_space *mapping)
{
- pgoff_t index;
+ XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start));
+ pgoff_t end_index = end / PAGE_SIZE;
struct page *page;
- swp_entry_t swap;
- for (; start < end; start += PAGE_SIZE) {
- index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ rcu_read_lock();
+ xas_for_each(&xas, page, end_index) {
+ swp_entry_t swap;
- page = find_get_entry(mapping, index);
- if (!xa_is_value(page)) {
- if (page)
- put_page(page);
+ if (!xa_is_value(page))
continue;
- }
+ rcu_read_unlock();
+
swap = radix_to_swp_entry(page);
page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
NULL, 0, false);
if (page)
put_page(page);
+
+ rcu_read_lock();
+ xas_reset(&xas);
}
+ rcu_read_unlock();
lru_add_drain(); /* Push any new pages onto the LRU now */
}
--
2.28.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/8] mm: Optimise madvise WILLNEED
2020-09-10 18:33 ` [PATCH v2 3/8] mm: Optimise madvise WILLNEED Matthew Wilcox (Oracle)
@ 2020-09-14 16:17 ` Qian Cai
2020-09-14 16:47 ` Qian Cai
2020-09-14 16:50 ` Matthew Wilcox
0 siblings, 2 replies; 18+ messages in thread
From: Qian Cai @ 2020-09-14 16:17 UTC (permalink / raw)
To: Matthew Wilcox (Oracle), linux-mm
Cc: intel-gfx, Huang Ying, Hugh Dickins, linux-kernel, Chris Wilson,
William Kucharski, Johannes Weiner, cgroups, Andrew Morton,
Alexey Dobriyan, Matthew Auld
On Thu, 2020-09-10 at 19:33 +0100, Matthew Wilcox (Oracle) wrote:
> Instead of calling find_get_entry() for every page index, use an XArray
> iterator to skip over NULL entries, and avoid calling get_page(),
> because we only want the swap entries.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reverting the "Return head pages from find_*_entry" patchset [1] up to this
patch fixed the issue that LTP madvise06 test [2] would trigger endless soft-
lockups below. It does not help after applied patches fixed other separate
issues in the patchset [3][4].
[1] https://lore.kernel.org/intel-gfx/20200910183318.20139-1-willy@infradead.org/
[2] https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/madvise/madvise06.c
[3] https://lore.kernel.org/intel-gfx/20200914112738.GM6583@casper.infradead.org/
[4] https://lore.kernel.org/lkml/20200914115559.GN6583@casper.infradead.org/
[ 2653.179563][ C4] CPU: 4 PID: 23320 Comm: madvise06 Not tainted 5.9.0-rc5-next-20200914+ #2
[ 2653.220176][ C4] Hardware name: HP ProLiant BL660c Gen9, BIOS I38 10/17/2018
[ 2653.254908][ C4] RIP: 0010:lock_acquire+0x211/0x8e0
[ 2653.278534][ C4] Code: 83 c0 03 38 d0 7c 08 84 d2 0f 85 3a 05 00 00 8b 85 04 08 00 00 83 e8 01 89 85 04 08 00 00 66 85 c0 0f 85 9a 04 00 00 41 52 9d <48> b8 00 00 00 00 00 fc ff df 48 01 c3 c7 03 00 00 00 00 c7 43 08
[ 2653.369929][ C4] RSP: 0018:ffffc9000e1bf9f0 EFLAGS: 00000246
[ 2653.399398][ C4] RAX: 0000000000000000 RBX: 1ffff92001c37f41 RCX: 1ffff92001c37f27
[ 2653.437720][ C4] RDX: 0000000000000000 RSI: 0000000029956a3e RDI: ffff889042f40844
[ 2653.475829][ C4] RBP: ffff889042f40040 R08: fffffbfff5083905 R09: fffffbfff5083905
[ 2653.511611][ C4] R10: 0000000000000246 R11: fffffbfff5083904 R12: ffffffffa74ce320
[ 2653.547396][ C4] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 2653.582938][ C4] FS: 00007f1fc85e4600(0000) GS:ffff88881e100000(0000) knlGS:0000000000000000
[ 2653.622910][ C4] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2653.652310][ C4] CR2: 0000000000620050 CR3: 000000054d438002 CR4: 00000000001706e0
[ 2653.688228][ C4] Call Trace:
[ 2653.702537][ C4] ? rcu_read_unlock+0x40/0x40
[ 2653.723647][ C4] ? find_held_lock+0x33/0x1c0
[ 2653.744708][ C4] ? __read_swap_cache_async+0x18f/0x870
[ 2653.770547][ C4] get_swap_device+0xf5/0x280
rcu_read_lock at include/linux/rcupdate.h:642
(inlined by) get_swap_device at mm/swapfile.c:1303
[ 2653.791303][ C4] ? get_swap_device+0xce/0x280
[ 2653.812693][ C4] ? swap_page_trans_huge_swapped+0x2a0/0x2a0
[ 2653.839963][ C4] __read_swap_cache_async+0x10c/0x870
__read_swap_cache_async at mm/swap_state.c:469
[ 2653.864243][ C4] ? rcu_read_lock_sched_held+0x9c/0xd0
[ 2653.890657][ C4] ? find_get_incore_page+0x220/0x220
[ 2653.916978][ C4] ? rcu_read_lock_held+0x9c/0xb0
[ 2653.940235][ C4] ? find_held_lock+0x33/0x1c0
[ 2653.961325][ C4] ? do_madvise.part.30+0xd11/0x1b70
[ 2653.984922][ C4] ? lock_downgrade+0x730/0x730
[ 2654.006502][ C4] read_swap_cache_async+0x60/0xb0
read_swap_cache_async at mm/swap_state.c:564
[ 2654.029694][ C4] ? __read_swap_cache_async+0x870/0x870
[ 2654.055486][ C4] ? xas_find+0x410/0x6c0
[ 2654.074663][ C4] do_madvise.part.30+0xd47/0x1b70
force_shm_swapin_readahead at mm/madvise.c:243
(inlined by) madvise_willneed at mm/madvise.c:277
(inlined by) madvise_vma at mm/madvise.c:939
(inlined by) do_madvise at mm/madvise.c:1142
[ 2654.097959][ C4] ? find_held_lock+0x33/0x1c0
[ 2654.119031][ C4] ? swapin_walk_pmd_entry+0x430/0x430
[ 2654.143518][ C4] ? down_read_nested+0x420/0x420
[ 2654.165748][ C4] ? rcu_read_lock_sched_held+0x9c/0xd0
[ 2654.190523][ C4] ? __x64_sys_madvise+0xa1/0x110
[ 2654.212973][ C4] __x64_sys_madvise+0xa1/0x110
[ 2654.233976][ C4] ? syscall_enter_from_user_mode+0x1c/0x50
[ 2654.260983][ C4] do_syscall_64+0x33/0x40
[ 2654.281132][ C4] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 2654.307623][ C4] RIP: 0033:0x7f1fc80fca6b
[ 2654.327125][ C4] Code: 64 89 02 b8 ff ff ff ff c3 48 8b 15 17 54 2c 00 f7 d8 64 89 02 b8 ff ff ff ff eb bc 0f 1f 00 f3 0f 1e fa b8 1c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed 53 2c 00 f7 d8 64 89 01 48
[ 2654.420246][ C4] RSP: 002b:00007fff53609998 EFLAGS: 00000202 ORIG_RAX: 000000000000001c
[ 2654.458926][ C4] RAX: ffffffffffffffda RBX: 00007f1fc85e4580 RCX: 00007f1fc80fca6b
[ 2654.494295][ C4] RDX: 0000000000000003 RSI: 0000000019000000 RDI: 00007f1faf006000
[ 2654.530104][ C4] RBP: 00007f1faf006000 R08: 0000000000000000 R09: 00007fff53609284
[ 2654.566057][ C4] R10: 0000000000000003 R11: 0000000000000202 R12: 0000000000000000
[ 2654.601697][ C4] R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000
...
[ 2846.587644][ T353] Showing all locks held in the system:
[ 2846.622367][ T353] 1 lock held by khungtaskd/353:
[ 2846.644378][ T353] #0: ffffffffa74ce320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire.constprop.51+0x0/0x30
[ 2846.695738][ T353] 1 lock held by khugepaged/361:
[ 2846.718056][ T353] #0: ffffffffa75418e8 (lock#4){+.+.}-{3:3}, at: lru_add_drain_all+0x55/0x5f0
[ 2846.758184][ T353] 1 lock held by madvise06/23320:
[ 2846.780486][ T353]
[ 2846.790445][ T353] =============================================
> ---
> mm/madvise.c | 21 ++++++++++++---------
> 1 file changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/mm/madvise.c b/mm/madvise.c
> index dd1d43cf026d..96189acd6969 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -224,25 +224,28 @@ static void force_shm_swapin_readahead(struct
> vm_area_struct *vma,
> unsigned long start, unsigned long end,
> struct address_space *mapping)
> {
> - pgoff_t index;
> + XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start));
> + pgoff_t end_index = end / PAGE_SIZE;
> struct page *page;
> - swp_entry_t swap;
>
> - for (; start < end; start += PAGE_SIZE) {
> - index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> + rcu_read_lock();
> + xas_for_each(&xas, page, end_index) {
> + swp_entry_t swap;
>
> - page = find_get_entry(mapping, index);
> - if (!xa_is_value(page)) {
> - if (page)
> - put_page(page);
> + if (!xa_is_value(page))
> continue;
> - }
> + rcu_read_unlock();
> +
> swap = radix_to_swp_entry(page);
> page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
> NULL, 0, false);
> if (page)
> put_page(page);
> +
> + rcu_read_lock();
> + xas_reset(&xas);
> }
> + rcu_read_unlock();
>
> lru_add_drain(); /* Push any new pages onto the LRU now */
> }
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/8] mm: Optimise madvise WILLNEED
2020-09-14 16:17 ` Qian Cai
@ 2020-09-14 16:47 ` Qian Cai
2020-09-14 16:50 ` Matthew Wilcox
1 sibling, 0 replies; 18+ messages in thread
From: Qian Cai @ 2020-09-14 16:47 UTC (permalink / raw)
To: Matthew Wilcox (Oracle), linux-mm
Cc: intel-gfx, Huang Ying, Hugh Dickins, linux-kernel, Chris Wilson,
William Kucharski, Johannes Weiner, cgroups, Andrew Morton,
Alexey Dobriyan, Matthew Auld
On Mon, 2020-09-14 at 12:17 -0400, Qian Cai wrote:
> On Thu, 2020-09-10 at 19:33 +0100, Matthew Wilcox (Oracle) wrote:
> > Instead of calling find_get_entry() for every page index, use an XArray
> > iterator to skip over NULL entries, and avoid calling get_page(),
> > because we only want the swap entries.
> >
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > Acked-by: Johannes Weiner <hannes@cmpxchg.org>
>
> Reverting the "Return head pages from find_*_entry" patchset [1] up to this
> patch fixed the issue that LTP madvise06 test [2] would trigger endless soft-
> lockups below. It does not help after applied patches fixed other separate
> issues in the patchset [3][4].
Forgot to send this piece of RCU stall traces as well which might help
debugging.
00: [ 2852.137748] madvise06 (62712): drop_caches: 3
01: [ 2928.208367] rcu: INFO: rcu_sched self-detected stall on CPU
01: [ 2928.210083] rcu: 1-....: (6499 ticks this GP) idle=036/1/0x4000000000
01: 000002 softirq=1741392/1741392 fqs=3161
01: [ 2928.210610] (t=6500 jiffies g=610849 q=12529)
01: [ 2928.210620] Task dump for CPU 1:
01: [ 2928.210630] task:madvise06 state:R running task stack:53320 pi
01: d:62712 ppid: 62711 flags:0x00000004
01: [ 2928.210676] Call Trace:
01: [ 2928.210693] [<00000000af57ec88>] show_stack+0x158/0x1f0
01: [ 2928.210703] [<00000000ae55b692>] sched_show_task+0x3d2/0x4c8
01: [ 2928.210710] [<00000000af5846aa>] rcu_dump_cpu_stacks+0x26a/0x2a8
01: [ 2928.210718] [<00000000ae64fa62>] rcu_sched_clock_irq+0x1c92/0x2188
01: [ 2928.210726] [<00000000ae6662ee>] update_process_times+0x4e/0x148
01: [ 2928.210734] [<00000000ae690c26>] tick_sched_timer+0x86/0x188
01: [ 2928.210741] [<00000000ae66989c>] __hrtimer_run_queues+0x84c/0x10b8
01: [ 2928.210748] [<00000000ae66c80a>] hrtimer_interrupt+0x38a/0x860
01: [ 2928.210758] [<00000000ae48dbf2>] do_IRQ+0x152/0x1c8
01: [ 2928.210767] [<00000000af5b00ea>] ext_int_handler+0x18e/0x194
01: [ 2928.210774] [<00000000ae5e332e>] arch_local_irq_restore+0x86/0xa0
01: [ 2928.210782] [<00000000af58da04>] lock_is_held_type+0xe4/0x130
01: [ 2928.210791] [<00000000ae63355a>] rcu_read_lock_held+0xba/0xd8
01: [ 2928.210799] [<00000000af0125fc>] xas_descend+0x244/0x2c8
01: [ 2928.210806] [<00000000af012754>] xas_load+0xd4/0x148
01: [ 2928.210812] [<00000000af014490>] xas_find+0x5d0/0x818
01: [ 2928.210822] [<00000000ae97e644>] do_madvise+0xd5c/0x1600
01: [ 2928.210828] [<00000000ae97f2d2>] __s390x_sys_madvise+0x72/0x98
01: [ 2928.210835] [<00000000af5af844>] system_call+0xdc/0x278
01: [ 2928.210841] 3 locks held by madvise06/62712:
01: [ 2928.216406] #0: 00000001437fca18 (&mm->mmap_lock){++++}-{3:3}, at: do_m
01: dvise+0x18c/0x1600
01: [ 2928.216430] #1: 00000000afbdd3e0 (rcu_read_lock){....}-{1:2}, at: do_mad
01: vise+0xe72/0x1600
01: [ 2928.216449] #2: 00000000afbe0818 (rcu_node_1){-.-.}-{2:2}, at: rcu_dump_
01: cpu_stacks+0xb2/0x2a8
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/8] mm: Optimise madvise WILLNEED
2020-09-14 16:17 ` Qian Cai
2020-09-14 16:47 ` Qian Cai
@ 2020-09-14 16:50 ` Matthew Wilcox
2020-09-14 19:44 ` Qian Cai
1 sibling, 1 reply; 18+ messages in thread
From: Matthew Wilcox @ 2020-09-14 16:50 UTC (permalink / raw)
To: Qian Cai
Cc: linux-mm, intel-gfx, Huang Ying, Hugh Dickins, linux-kernel,
Chris Wilson, William Kucharski, Johannes Weiner, cgroups,
Andrew Morton, Alexey Dobriyan, Matthew Auld
On Mon, Sep 14, 2020 at 12:17:07PM -0400, Qian Cai wrote:
> Reverting the "Return head pages from find_*_entry" patchset [1] up to this
> patch fixed the issue that LTP madvise06 test [2] would trigger endless soft-
> lockups below. It does not help after applied patches fixed other separate
> issues in the patchset [3][4].
Thanks for the report. Could you try this?
diff --git a/mm/madvise.c b/mm/madvise.c
index 96189acd6969..2d9ceccb338d 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -234,6 +234,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
if (!xa_is_value(page))
continue;
+ xas_pause(&xas);
rcu_read_unlock();
swap = radix_to_swp_entry(page);
@@ -243,7 +244,6 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
put_page(page);
rcu_read_lock();
- xas_reset(&xas);
}
rcu_read_unlock();
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 3/8] mm: Optimise madvise WILLNEED
2020-09-14 16:50 ` Matthew Wilcox
@ 2020-09-14 19:44 ` Qian Cai
0 siblings, 0 replies; 18+ messages in thread
From: Qian Cai @ 2020-09-14 19:44 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linux-mm, intel-gfx, Huang Ying, Hugh Dickins, linux-kernel,
Chris Wilson, William Kucharski, Johannes Weiner, cgroups,
Andrew Morton, Alexey Dobriyan, Matthew Auld
On Mon, 2020-09-14 at 17:50 +0100, Matthew Wilcox wrote:
> On Mon, Sep 14, 2020 at 12:17:07PM -0400, Qian Cai wrote:
> > Reverting the "Return head pages from find_*_entry" patchset [1] up to this
> > patch fixed the issue that LTP madvise06 test [2] would trigger endless
> > soft-
> > lockups below. It does not help after applied patches fixed other separate
> > issues in the patchset [3][4].
>
> Thanks for the report. Could you try this?
It works fine.
>
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 96189acd6969..2d9ceccb338d 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -234,6 +234,7 @@ static void force_shm_swapin_readahead(struct
> vm_area_struct *vma,
>
> if (!xa_is_value(page))
> continue;
> + xas_pause(&xas);
> rcu_read_unlock();
>
> swap = radix_to_swp_entry(page);
> @@ -243,7 +244,6 @@ static void force_shm_swapin_readahead(struct
> vm_area_struct *vma,
> put_page(page);
>
> rcu_read_lock();
> - xas_reset(&xas);
> }
> rcu_read_unlock();
>
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 4/8] proc: Optimise smaps for shmem entries
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
` (2 preceding siblings ...)
2020-09-10 18:33 ` [PATCH v2 3/8] mm: Optimise madvise WILLNEED Matthew Wilcox (Oracle)
@ 2020-09-10 18:33 ` Matthew Wilcox (Oracle)
2020-09-10 18:33 ` [PATCH v2 5/8] i915: Use find_lock_page instead of find_lock_entry Matthew Wilcox (Oracle)
` (4 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-10 18:33 UTC (permalink / raw)
To: linux-mm
Cc: Matthew Wilcox (Oracle),
Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
Avoid bumping the refcount on pages when we're only interested in the
swap entries.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
fs/proc/task_mmu.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5066b0251ed8..e42d9e5e9a3c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -520,16 +520,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
page = device_private_entry_to_page(swpent);
} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
&& pte_none(*pte))) {
- page = find_get_entry(vma->vm_file->f_mapping,
+ page = xa_load(&vma->vm_file->f_mapping->i_pages,
linear_page_index(vma, addr));
- if (!page)
- return;
-
if (xa_is_value(page))
mss->swap += PAGE_SIZE;
- else
- put_page(page);
-
return;
}
--
2.28.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 5/8] i915: Use find_lock_page instead of find_lock_entry
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
` (3 preceding siblings ...)
2020-09-10 18:33 ` [PATCH v2 4/8] proc: Optimise smaps for shmem entries Matthew Wilcox (Oracle)
@ 2020-09-10 18:33 ` Matthew Wilcox (Oracle)
2020-09-10 18:33 ` [PATCH v2 6/8] mm: Convert find_get_entry to return the head page Matthew Wilcox (Oracle)
` (3 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-10 18:33 UTC (permalink / raw)
To: linux-mm
Cc: Matthew Wilcox (Oracle),
Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
i915 does not want to see value entries. Switch it to use
find_lock_page() instead, and remove the export of find_lock_entry().
Move find_lock_entry() and find_get_entry() to mm/internal.h to discourage
any future use.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 4 ++--
include/linux/pagemap.h | 2 --
mm/filemap.c | 1 -
mm/internal.h | 3 +++
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 38113d3c0138..75e8b71c18b9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -258,8 +258,8 @@ shmem_writeback(struct drm_i915_gem_object *obj)
for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
struct page *page;
- page = find_lock_entry(mapping, i);
- if (!page || xa_is_value(page))
+ page = find_lock_page(mapping, i);
+ if (!page)
continue;
if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 12ab56c3a86f..905a64030647 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -384,8 +384,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
return head + (index & (thp_nr_pages(head) - 1));
}
-struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
unsigned int nr_entries, struct page **entries,
pgoff_t *indices);
diff --git a/mm/filemap.c b/mm/filemap.c
index 78d07a712112..d64f6f76bc0b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1648,7 +1648,6 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
}
return page;
}
-EXPORT_SYMBOL(find_lock_entry);
/**
* pagecache_get_page - Find and get a reference to a page.
diff --git a/mm/internal.h b/mm/internal.h
index ab4beb7c5cd2..6345b08ce86c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -60,6 +60,9 @@ static inline void force_page_cache_readahead(struct address_space *mapping,
force_page_cache_ra(&ractl, &file->f_ra, nr_to_read);
}
+struct page *find_get_entry(struct address_space *mapping, pgoff_t index);
+struct page *find_lock_entry(struct address_space *mapping, pgoff_t index);
+
/**
* page_evictable - test whether a page is evictable
* @page: the page to test
--
2.28.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 6/8] mm: Convert find_get_entry to return the head page
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
` (4 preceding siblings ...)
2020-09-10 18:33 ` [PATCH v2 5/8] i915: Use find_lock_page instead of find_lock_entry Matthew Wilcox (Oracle)
@ 2020-09-10 18:33 ` Matthew Wilcox (Oracle)
[not found] ` <20200914085545.GB28738@shao2-debian>
2020-09-10 18:33 ` [PATCH v2 7/8] mm/shmem: Return head page from find_lock_entry Matthew Wilcox (Oracle)
` (2 subsequent siblings)
8 siblings, 1 reply; 18+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-10 18:33 UTC (permalink / raw)
To: linux-mm
Cc: Matthew Wilcox (Oracle),
Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
There are only four callers remaining of find_get_entry().
get_shadow_from_swap_cache() only wants to see shadow entries and doesn't
care about which page is returned. Push the find_subpage() call into
find_lock_entry(), find_get_incore_page() and pagecache_get_page().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/filemap.c | 13 +++++++------
mm/swap_state.c | 2 +-
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index d64f6f76bc0b..2f134383b0ae 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1567,19 +1567,19 @@ EXPORT_SYMBOL(page_cache_prev_miss);
/**
* find_get_entry - find and get a page cache entry
* @mapping: the address_space to search
- * @offset: the page cache index
+ * @index: The page cache index.
*
* Looks up the page cache slot at @mapping & @offset. If there is a
- * page cache page, it is returned with an increased refcount.
+ * page cache page, the head page is returned with an increased refcount.
*
* If the slot holds a shadow entry of a previously evicted page, or a
* swap entry from shmem/tmpfs, it is returned.
*
- * Return: the found page or shadow entry, %NULL if nothing is found.
+ * Return: The head page or shadow entry, %NULL if nothing is found.
*/
-struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
+struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
{
- XA_STATE(xas, &mapping->i_pages, offset);
+ XA_STATE(xas, &mapping->i_pages, index);
struct page *page;
rcu_read_lock();
@@ -1607,7 +1607,6 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
put_page(page);
goto repeat;
}
- page = find_subpage(page, offset);
out:
rcu_read_unlock();
@@ -1644,6 +1643,7 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
put_page(page);
goto repeat;
}
+ page = find_subpage(page, offset);
VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
}
return page;
@@ -1690,6 +1690,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
page = NULL;
if (!page)
goto no_page;
+ page = find_subpage(page, index);
if (fgp_flags & FGP_LOCK) {
if (fgp_flags & FGP_NOWAIT) {
diff --git a/mm/swap_state.c b/mm/swap_state.c
index c79e2242dd04..c8cf1757ca06 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -432,7 +432,7 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
struct page *page = find_get_entry(mapping, index);
if (!xa_is_value(page))
- return page;
+ return find_subpage(page, index);
if (!shmem_mapping(mapping))
return NULL;
--
2.28.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 7/8] mm/shmem: Return head page from find_lock_entry
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
` (5 preceding siblings ...)
2020-09-10 18:33 ` [PATCH v2 6/8] mm: Convert find_get_entry to return the head page Matthew Wilcox (Oracle)
@ 2020-09-10 18:33 ` Matthew Wilcox (Oracle)
2020-09-12 3:20 ` Matthew Wilcox
2020-09-10 18:33 ` [PATCH v2 8/8] mm: Add find_lock_head Matthew Wilcox (Oracle)
2020-09-15 8:10 ` [PATCH v2 0/8] Return head pages from find_*_entry Hugh Dickins
8 siblings, 1 reply; 18+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-10 18:33 UTC (permalink / raw)
To: linux-mm
Cc: Matthew Wilcox (Oracle),
Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
Convert shmem_getpage_gfp() (the only remaining caller of
find_lock_entry()) to cope with a head page being returned instead of
the subpage for the index.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/pagemap.h | 9 +++++++++
mm/filemap.c | 25 +++++++++++--------------
mm/shmem.c | 20 +++++++++-----------
3 files changed, 29 insertions(+), 25 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 905a64030647..f374618b2c93 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -371,6 +371,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
+/* Does this page contain this index? */
+static inline bool thp_contains(struct page *head, pgoff_t index)
+{
+ /* HugeTLBfs indexes the page cache in units of hpage_size */
+ if (PageHuge(head))
+ return head->index == index;
+ return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
+}
+
/*
* Given the page we found in the page cache, return the page corresponding
* to this index in the file
diff --git a/mm/filemap.c b/mm/filemap.c
index 2f134383b0ae..453535170b8d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1614,37 +1614,34 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
}
/**
- * find_lock_entry - locate, pin and lock a page cache entry
- * @mapping: the address_space to search
- * @offset: the page cache index
+ * find_lock_entry - Locate and lock a page cache entry.
+ * @mapping: The address_space to search.
+ * @index: The page cache index.
*
- * Looks up the page cache slot at @mapping & @offset. If there is a
- * page cache page, it is returned locked and with an increased
- * refcount.
+ * Looks up the page at @mapping & @index. If there is a page in the
+ * cache, the head page is returned locked and with an increased refcount.
*
* If the slot holds a shadow entry of a previously evicted page, or a
* swap entry from shmem/tmpfs, it is returned.
*
- * find_lock_entry() may sleep.
- *
- * Return: the found page or shadow entry, %NULL if nothing is found.
+ * Context: May sleep.
+ * Return: The head page or shadow entry, %NULL if nothing is found.
*/
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
+struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
{
struct page *page;
repeat:
- page = find_get_entry(mapping, offset);
+ page = find_get_entry(mapping, index);
if (page && !xa_is_value(page)) {
lock_page(page);
/* Has the page been truncated? */
- if (unlikely(page_mapping(page) != mapping)) {
+ if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto repeat;
}
- page = find_subpage(page, offset);
- VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+ VM_BUG_ON_PAGE(!thp_contains(page, index), page);
}
return page;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 271548ca20f3..d2a46ef7df43 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1793,7 +1793,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct mm_struct *charge_mm;
struct page *page;
enum sgp_type sgp_huge = sgp;
- pgoff_t hindex = index;
+ pgoff_t hindex;
int error;
int once = 0;
int alloced = 0;
@@ -1833,10 +1833,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
put_page(page);
page = NULL;
}
- if (page || sgp == SGP_READ) {
- *pagep = page;
- return 0;
- }
+ if (page || sgp == SGP_READ)
+ goto out;
/*
* Fast cache lookup did not find it:
@@ -1961,14 +1959,13 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
* it now, lest undo on failure cancel our earlier guarantee.
*/
if (sgp != SGP_WRITE && !PageUptodate(page)) {
- struct page *head = compound_head(page);
int i;
- for (i = 0; i < compound_nr(head); i++) {
- clear_highpage(head + i);
- flush_dcache_page(head + i);
+ for (i = 0; i < compound_nr(page); i++) {
+ clear_highpage(page + i);
+ flush_dcache_page(page + i);
}
- SetPageUptodate(head);
+ SetPageUptodate(page);
}
/* Perhaps the file has been truncated since we checked */
@@ -1984,7 +1981,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
error = -EINVAL;
goto unlock;
}
- *pagep = page + index - hindex;
+out:
+ *pagep = page + index - page->index;
return 0;
/*
--
2.28.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 7/8] mm/shmem: Return head page from find_lock_entry
2020-09-10 18:33 ` [PATCH v2 7/8] mm/shmem: Return head page from find_lock_entry Matthew Wilcox (Oracle)
@ 2020-09-12 3:20 ` Matthew Wilcox
0 siblings, 0 replies; 18+ messages in thread
From: Matthew Wilcox @ 2020-09-12 3:20 UTC (permalink / raw)
To: linux-mm
Cc: Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
On Thu, Sep 10, 2020 at 07:33:17PM +0100, Matthew Wilcox (Oracle) wrote:
> Convert shmem_getpage_gfp() (the only remaining caller of
> find_lock_entry()) to cope with a head page being returned instead of
> the subpage for the index.
This version was buggy. Apparently I was too focused on running the test suite against XFS and neglected to run it against tmpfs, which crashed instantly.
Here's the patch I should have sent.
commit 7bfa655881da76f3386e6d4c07e38a165b4a6ca8
Author: Matthew Wilcox (Oracle) <willy@infradead.org>
Date: Sun Aug 2 07:22:34 2020 -0400
mm/shmem: Return head page from find_lock_entry
Convert shmem_getpage_gfp() (the only remaining caller of
find_lock_entry()) to cope with a head page being returned instead of
the subpage for the index.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 905a64030647..f374618b2c93 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -371,6 +371,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
+/* Does this page contain this index? */
+static inline bool thp_contains(struct page *head, pgoff_t index)
+{
+ /* HugeTLBfs indexes the page cache in units of hpage_size */
+ if (PageHuge(head))
+ return head->index == index;
+ return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
+}
+
/*
* Given the page we found in the page cache, return the page corresponding
* to this index in the file
diff --git a/mm/filemap.c b/mm/filemap.c
index 2f134383b0ae..453535170b8d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1614,37 +1614,34 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
}
/**
- * find_lock_entry - locate, pin and lock a page cache entry
- * @mapping: the address_space to search
- * @offset: the page cache index
+ * find_lock_entry - Locate and lock a page cache entry.
+ * @mapping: The address_space to search.
+ * @index: The page cache index.
*
- * Looks up the page cache slot at @mapping & @offset. If there is a
- * page cache page, it is returned locked and with an increased
- * refcount.
+ * Looks up the page at @mapping & @index. If there is a page in the
+ * cache, the head page is returned locked and with an increased refcount.
*
* If the slot holds a shadow entry of a previously evicted page, or a
* swap entry from shmem/tmpfs, it is returned.
*
- * find_lock_entry() may sleep.
- *
- * Return: the found page or shadow entry, %NULL if nothing is found.
+ * Context: May sleep.
+ * Return: The head page or shadow entry, %NULL if nothing is found.
*/
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
+struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
{
struct page *page;
repeat:
- page = find_get_entry(mapping, offset);
+ page = find_get_entry(mapping, index);
if (page && !xa_is_value(page)) {
lock_page(page);
/* Has the page been truncated? */
- if (unlikely(page_mapping(page) != mapping)) {
+ if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto repeat;
}
- page = find_subpage(page, offset);
- VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+ VM_BUG_ON_PAGE(!thp_contains(page, index), page);
}
return page;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 271548ca20f3..58bc9e326d0d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1822,6 +1822,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
return error;
}
+ if (page)
+ hindex = page->index;
if (page && sgp == SGP_WRITE)
mark_page_accessed(page);
@@ -1832,11 +1834,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
unlock_page(page);
put_page(page);
page = NULL;
+ hindex = index;
}
- if (page || sgp == SGP_READ) {
- *pagep = page;
- return 0;
- }
+ if (page || sgp == SGP_READ)
+ goto out;
/*
* Fast cache lookup did not find it:
@@ -1961,14 +1962,13 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
* it now, lest undo on failure cancel our earlier guarantee.
*/
if (sgp != SGP_WRITE && !PageUptodate(page)) {
- struct page *head = compound_head(page);
int i;
- for (i = 0; i < compound_nr(head); i++) {
- clear_highpage(head + i);
- flush_dcache_page(head + i);
+ for (i = 0; i < compound_nr(page); i++) {
+ clear_highpage(page + i);
+ flush_dcache_page(page + i);
}
- SetPageUptodate(head);
+ SetPageUptodate(page);
}
/* Perhaps the file has been truncated since we checked */
@@ -1984,6 +1984,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
error = -EINVAL;
goto unlock;
}
+out:
*pagep = page + index - hindex;
return 0;
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 8/8] mm: Add find_lock_head
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
` (6 preceding siblings ...)
2020-09-10 18:33 ` [PATCH v2 7/8] mm/shmem: Return head page from find_lock_entry Matthew Wilcox (Oracle)
@ 2020-09-10 18:33 ` Matthew Wilcox (Oracle)
2020-09-15 8:10 ` [PATCH v2 0/8] Return head pages from find_*_entry Hugh Dickins
8 siblings, 0 replies; 18+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-10 18:33 UTC (permalink / raw)
To: linux-mm
Cc: Matthew Wilcox (Oracle),
Andrew Morton, Hugh Dickins, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, cgroups, linux-kernel
Add a new FGP_HEAD flag which avoids calling find_subpage() and add a
convenience wrapper for it.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/pagemap.h | 32 ++++++++++++++++++++++++++------
mm/filemap.c | 9 ++++++---
2 files changed, 32 insertions(+), 9 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index f374618b2c93..4e52a3ff92fb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -278,6 +278,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
#define FGP_NOFS 0x00000010
#define FGP_NOWAIT 0x00000020
#define FGP_FOR_MMAP 0x00000040
+#define FGP_HEAD 0x00000080
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
int fgp_flags, gfp_t cache_gfp_mask);
@@ -309,18 +310,37 @@ static inline struct page *find_get_page_flags(struct address_space *mapping,
* @mapping: the address_space to search
* @offset: the page index
*
- * Looks up the page cache slot at @mapping & @offset. If there is a
+ * Looks up the page cache entry at @mapping & @offset. If there is a
* page cache page, it is returned locked and with an increased
* refcount.
*
- * Otherwise, %NULL is returned.
- *
- * find_lock_page() may sleep.
+ * Context: May sleep.
+ * Return: A struct page or %NULL if there is no page in the cache for this
+ * index.
*/
static inline struct page *find_lock_page(struct address_space *mapping,
- pgoff_t offset)
+ pgoff_t index)
+{
+ return pagecache_get_page(mapping, index, FGP_LOCK, 0);
+}
+
+/**
+ * find_lock_head - Locate, pin and lock a pagecache page.
+ * @mapping: The address_space to search.
+ * @offset: The page index.
+ *
+ * Looks up the page cache entry at @mapping & @offset. If there is a
+ * page cache page, its head page is returned locked and with an increased
+ * refcount.
+ *
+ * Context: May sleep.
+ * Return: A struct page which is !PageTail, or %NULL if there is no page
+ * in the cache for this index.
+ */
+static inline struct page *find_lock_head(struct address_space *mapping,
+ pgoff_t index)
{
- return pagecache_get_page(mapping, offset, FGP_LOCK, 0);
+ return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0);
}
/**
diff --git a/mm/filemap.c b/mm/filemap.c
index 453535170b8d..e429e02317ef 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1659,6 +1659,8 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
*
* * %FGP_ACCESSED - The page will be marked accessed.
* * %FGP_LOCK - The page is returned locked.
+ * * %FGP_HEAD - If the page is present and a THP, return the head page
+ * rather than the exact page specified by the index.
* * %FGP_CREAT - If no page is present then a new page is allocated using
* @gfp_mask and added to the page cache and the VM's LRU list.
* The page is returned locked and with an increased refcount.
@@ -1687,7 +1689,6 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
page = NULL;
if (!page)
goto no_page;
- page = find_subpage(page, index);
if (fgp_flags & FGP_LOCK) {
if (fgp_flags & FGP_NOWAIT) {
@@ -1700,12 +1701,12 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
}
/* Has the page been truncated? */
- if (unlikely(compound_head(page)->mapping != mapping)) {
+ if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto repeat;
}
- VM_BUG_ON_PAGE(page->index != index, page);
+ VM_BUG_ON_PAGE(!thp_contains(page, index), page);
}
if (fgp_flags & FGP_ACCESSED)
@@ -1715,6 +1716,8 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
if (page_is_idle(page))
clear_page_idle(page);
}
+ if (!(fgp_flags & FGP_HEAD))
+ page = find_subpage(page, index);
no_page:
if (!page && (fgp_flags & FGP_CREAT)) {
--
2.28.0
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 0/8] Return head pages from find_*_entry
2020-09-10 18:33 [PATCH v2 0/8] Return head pages from find_*_entry Matthew Wilcox (Oracle)
` (7 preceding siblings ...)
2020-09-10 18:33 ` [PATCH v2 8/8] mm: Add find_lock_head Matthew Wilcox (Oracle)
@ 2020-09-15 8:10 ` Hugh Dickins
2020-09-15 12:53 ` Naresh Kamboju
8 siblings, 1 reply; 18+ messages in thread
From: Hugh Dickins @ 2020-09-15 8:10 UTC (permalink / raw)
To: Matthew Wilcox (Oracle)
Cc: linux-mm, Andrew Morton, Hugh Dickins, William Kucharski,
Jani Nikula, Alexey Dobriyan, Johannes Weiner, Chris Wilson,
Matthew Auld, Huang Ying, intel-gfx, cgroups, linux-kernel
On Thu, 10 Sep 2020, Matthew Wilcox (Oracle) wrote:
> This patch series started out as part of the THP patch set, but it has
> some nice effects along the way and it seems worth splitting it out and
> submitting separately.
>
> Currently find_get_entry() and find_lock_entry() return the page
> corresponding to the requested index, but the first thing most callers do
> is find the head page, which we just threw away. As part of auditing
> all the callers, I found some misuses of the APIs and some plain
> inefficiencies that I've fixed.
>
> The diffstat is unflattering, but I added more kernel-doc and a new wrapper.
>
> v2:
> - Rework how shmem_getpage_gfp() handles getting a head page back from
> find_lock_entry()
> - Renamed find_get_swap_page() to find_get_incore_page()
> - Make sure find_get_incore_page() doesn't return a head page
> - Fix the missing include of linux/shmem_fs.h
> - Move find_get_entry and find_lock_entry prototypes to mm/internal.h
> - Rename thp_valid_index() to thp_contains()
> - Fix thp_contains() for hugetlbfs and swapcache
> - Add find_lock_head() wrapper around pagecache_get_page()
>
> Matthew Wilcox (Oracle) (8):
> mm: Factor find_get_incore_page out of mincore_page
> mm: Use find_get_incore_page in memcontrol
> mm: Optimise madvise WILLNEED
> proc: Optimise smaps for shmem entries
> i915: Use find_lock_page instead of find_lock_entry
> mm: Convert find_get_entry to return the head page
> mm/shmem: Return head page from find_lock_entry
> mm: Add find_lock_head
>
> drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 4 +--
> fs/proc/task_mmu.c | 8 +----
> include/linux/pagemap.h | 43 +++++++++++++++++-----
> include/linux/swap.h | 7 ++++
> mm/filemap.c | 44 +++++++++++------------
> mm/internal.h | 3 ++
> mm/madvise.c | 21 ++++++-----
> mm/memcontrol.c | 24 ++-----------
> mm/mincore.c | 28 ++-------------
> mm/shmem.c | 20 +++++------
> mm/swap_state.c | 32 +++++++++++++++++
> 11 files changed, 127 insertions(+), 107 deletions(-)
>
> --
> 2.28.0
I was testing mmotm today (plus the shmem.c and swap_state.c
fixes that you posted, but I did not try the madvise.c one) -
my usual tmpfs swapping loads (plus hyperactive khugepaged to
maximize the THPs). It behaved well, no problems found.
But I probably won't get to try your series of 12 for a few days.
Hugh
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 0/8] Return head pages from find_*_entry
2020-09-15 8:10 ` [PATCH v2 0/8] Return head pages from find_*_entry Hugh Dickins
@ 2020-09-15 12:53 ` Naresh Kamboju
2020-09-15 13:08 ` Matthew Wilcox
0 siblings, 1 reply; 18+ messages in thread
From: Naresh Kamboju @ 2020-09-15 12:53 UTC (permalink / raw)
To: Hugh Dickins, Matthew Wilcox
Cc: linux-mm, Andrew Morton, William Kucharski, Jani Nikula,
Alexey Dobriyan, Johannes Weiner, Chris Wilson, Matthew Auld,
Huang Ying, intel-gfx, Cgroups, open list, lkft-triage,
Linux-Next Mailing List, Stephen Rothwell, ricardo.canuelo
On Tue, 15 Sep 2020 at 13:56, Hugh Dickins <hughd@google.com> wrote:
>
> On Thu, 10 Sep 2020, Matthew Wilcox (Oracle) wrote:
>
> > This patch series started out as part of the THP patch set, but it has
> > some nice effects along the way and it seems worth splitting it out and
> > submitting separately.
> >
> > Currently find_get_entry() and find_lock_entry() return the page
> > corresponding to the requested index, but the first thing most callers do
> > is find the head page, which we just threw away. As part of auditing
> > all the callers, I found some misuses of the APIs and some plain
> > inefficiencies that I've fixed.
> >
> > The diffstat is unflattering, but I added more kernel-doc and a new wrapper.
> >
> > v2:
> > - Rework how shmem_getpage_gfp() handles getting a head page back from
> > find_lock_entry()
> > - Renamed find_get_swap_page() to find_get_incore_page()
> > - Make sure find_get_incore_page() doesn't return a head page
> > - Fix the missing include of linux/shmem_fs.h
> > - Move find_get_entry and find_lock_entry prototypes to mm/internal.h
> > - Rename thp_valid_index() to thp_contains()
> > - Fix thp_contains() for hugetlbfs and swapcache
> > - Add find_lock_head() wrapper around pagecache_get_page()
> >
> > Matthew Wilcox (Oracle) (8):
> > mm: Factor find_get_incore_page out of mincore_page
> > mm: Use find_get_incore_page in memcontrol
> > mm: Optimise madvise WILLNEED
> > proc: Optimise smaps for shmem entries
> > i915: Use find_lock_page instead of find_lock_entry
> > mm: Convert find_get_entry to return the head page
> > mm/shmem: Return head page from find_lock_entry
> > mm: Add find_lock_head
While running kselftest mincore tests the following kernel BUG reported on the
linux next-20200915 tag on x86_64, i386 and arm64.
metadata:
git branch: master
git repo: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
git commit: 6b02addb1d1748d21dd1261e46029b264be4e5a0
git describe: next-20200915
make_kernelversion: 5.9.0-rc5
kernel-config:
http://snapshots.linaro.org/openembedded/lkft/lkft/sumo/intel-corei7-64/lkft/linux-next/860/config
Test case:
---------------
* Tests the user interface. This test triggers most of the documented
* error conditions in mincore().
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/plain/tools/testing/selftests/mincore/mincore_selftest.c
kernel BUG:
-----------------
[ 710.472860] kselftest: Running tests in mincore
[ 710.554790] BUG: kernel NULL pointer dereference, address: 0000000000000000
[ 710.561765] #PF: supervisor read access in kernel mode
[ 710.566920] #PF: error_code(0x0000) - not-present page
[ 710.572065] PGD 80000003fd5b9067 P4D 80000003fd5b9067 PUD 45903f067 PMD 0
[ 710.578957] Oops: 0000 [#1] SMP PTI
[ 710.582450] CPU: 0 PID: 19025 Comm: mincore_selftes Tainted: G
W K 5.9.0-rc5-next-20200915 #1
[ 710.592094] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.0b 07/27/2017
[ 710.599574] RIP: 0010:PageHuge+0x6/0x40
[ 710.603411] Code: c3 0f 1f 00 0f 1f 44 00 00 55 48 89 d6 48 89 e5
e8 ef fe ff ff 5d c3 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44
00 00 55 <48> 8b 07 48 89 e5 a9 00 00 01 00 75 09 48 8b 47 08 83 e0 01
74 17
[ 710.622149] RSP: 0018:ffffb0e2002bfcc0 EFLAGS: 00010246
[ 710.627373] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[ 710.634498] RDX: ffff9f231b2518c0 RSI: ffffffffa272b340 RDI: 0000000000000000
[ 710.641620] RBP: ffffb0e2002bfce8 R08: 0000000000000002 R09: 0000000000000000
[ 710.648747] R10: ffffb0e2002bfb20 R11: ffffffffa272b340 R12: ffff9f23193c5e68
[ 710.655876] R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000001
[ 710.663003] FS: 00007fa4c9ea24c0(0000) GS:ffff9f231fc00000(0000)
knlGS:0000000000000000
[ 710.671088] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 710.676824] CR2: 0000000000000000 CR3: 00000004044d0004 CR4: 00000000003706f0
[ 710.683949] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 710.691073] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 710.698196] Call Trace:
[ 710.700644] ? find_get_incore_page+0xc6/0x120
[ 710.705089] mincore_page+0x12/0x60
[ 710.708580] __mincore_unmapped_range+0x78/0xc0
[ 710.713105] mincore_pte_range+0x269/0x300
[ 710.717206] __walk_page_range+0x5ab/0xb60
[ 710.721308] walk_page_range+0xab/0x150
[ 710.725152] __x64_sys_mincore+0x13c/0x330
[ 710.729251] do_syscall_64+0x37/0x50
[ 710.732831] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 710.737882] RIP: 0033:0x7fa4c99be2d7
[ 710.741462] Code: 73 01 c3 48 8b 0d c1 fb 2b 00 f7 d8 64 89 01 48
83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 1b 00 00
00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 fb 2b 00 f7 d8 64 89
01 48
[ 710.760206] RSP: 002b:00007ffcb103baf8 EFLAGS: 00000203 ORIG_RAX:
000000000000001b
[ 710.767770] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa4c99be2d7
[ 710.774897] RDX: 0000000000000000 RSI: 0000000000001000 RDI: 00007fa4c9ea6000
[ 710.782020] RBP: 00007ffcb103bc40 R08: 00000000ffffffff R09: 0000000000000000
[ 710.789144] R10: 0000000000000021 R11: 0000000000000203 R12: 0000000000400d00
[ 710.796268] R13: 00007ffcb103be10 R14: 0000000000000000 R15: 0000000000000000
[ 710.803395] Modules linked in: sch_fq 8021q iptable_filter xt_mark
ip_tables cls_bpf sch_ingress veth algif_hash x86_pkg_temp_thermal
fuse [last unloaded: memory_notifier_error_inject]
[ 710.819814] CR2: 0000000000000000
[ 710.823128] ---[ end trace 67d1a6d0ea1b24e3 ]---
[ 710.827746] RIP: 0010:PageHuge+0x6/0x40
[ 710.831584] Code: c3 0f 1f 00 0f 1f 44 00 00 55 48 89 d6 48 89 e5
e8 ef fe ff ff 5d c3 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44
00 00 55 <48> 8b 07 48 89 e5 a9 00 00 01 00 75 09 48 8b 47 08 83 e0 01
74 17
[ 710.850322] RSP: 0018:ffffb0e2002bfcc0 EFLAGS: 00010246
[ 710.855546] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[ 710.862672] RDX: ffff9f231b2518c0 RSI: ffffffffa272b340 RDI: 0000000000000000
[ 710.869803] RBP: ffffb0e2002bfce8 R08: 0000000000000002 R09: 0000000000000000
[ 710.876928] R10: ffffb0e2002bfb20 R11: ffffffffa272b340 R12: ffff9f23193c5e68
[ 710.884050] R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000001
[ 710.891175] FS: 00007fa4c9ea24c0(0000) GS:ffff9f231fc00000(0000)
knlGS:0000000000000000
[ 710.899253] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 710.904990] CR2: 0000000000000000 CR3: 00000004044d0004 CR4: 00000000003706f0
[ 710.912113] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 710.919236] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 710.926360] note: mincore_selftes[19025] exited with preempt_count 1
[ 710.932704] BUG: sleeping function called from invalid context at
/usr/src/kernel/include/linux/percpu-rwsem.h:49
[ 710.942950] in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid:
19025, name: mincore_selftes
[ 710.951631] INFO: lockdep is turned off.
[ 710.955551] irq event stamp: 190
[ 710.958785] hardirqs last enabled at (189): [<ffffffffa0bda53c>]
get_page_from_freelist+0x24c/0x14b0
[ 710.967995] hardirqs last disabled at (190): [<ffffffffa18c7921>]
irqentry_enter+0x21/0x50
[ 710.976247] softirqs last enabled at (36): [<ffffffffa1c00308>]
__do_softirq+0x308/0x42a
[ 710.984419] softirqs last disabled at (11): [<ffffffffa1a00f82>]
asm_call_on_stack+0x12/0x20
[ 710.992852] CPU: 0 PID: 19025 Comm: mincore_selftes Tainted: G
D W K 5.9.0-rc5-next-20200915 #1
[ 711.002496] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.0b 07/27/2017
[ 711.009967] Call Trace:
[ 711.012412] dump_stack+0x7d/0x9f
[ 711.015724] ___might_sleep+0x163/0x250
[ 711.019562] __might_sleep+0x4a/0x80
[ 711.023141] exit_signals+0x33/0x2f0
[ 711.026713] do_exit+0xa9/0xcb0
[ 711.029858] ? __x64_sys_mincore+0x13c/0x330
[ 711.034125] rewind_stack_do_exit+0x17/0x20
[ 711.038308] RIP: 0033:0x7fa4c99be2d7
[ 711.041888] Code: 73 01 c3 48 8b 0d c1 fb 2b 00 f7 d8 64 89 01 48
83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 1b 00 00
00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 fb 2b 00 f7 d8 64 89
01 48
[ 711.060633] RSP: 002b:00007ffcb103baf8 EFLAGS: 00000203 ORIG_RAX:
000000000000001b
[ 711.068197] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa4c99be2d7
[ 711.075321] RDX: 0000000000000000 RSI: 0000000000001000 RDI: 00007fa4c9ea6000
[ 711.082445] RBP: 00007ffcb103bc40 R08: 00000000ffffffff R09: 0000000000000000
[ 711.089570] R10: 0000000000000021 R11: 0000000000000203 R12: 0000000000400d00
[ 711.096693] R13: 00007ffcb103be10 R14: 0000000000000000 R15: 0000000000000000
[ 737.104310] rcu: INFO: rcu_sched self-detected stall on CPU
[ 737.109887] rcu: 0-....: (26000 ticks this GP)
idle=1a2/1/0x4000000000000000 softirq=102446/102446 fqs=6463
[ 737.119792] (t=26016 jiffies g=173197 q=1522)
[ 737.124238] NMI backtrace for cpu 0
[ 737.127731] CPU: 0 PID: 19025 Comm: mincore_selftes Tainted: G
D W K 5.9.0-rc5-next-20200915 #1
[ 737.137376] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.0b 07/27/2017
[ 737.144856] Call Trace:
[ 737.147309] <IRQ>
[ 737.149330] dump_stack+0x7d/0x9f
[ 737.152649] nmi_cpu_backtrace+0xa4/0xc0
[ 737.156574] ? lapic_can_unplug_cpu+0xa0/0xa0
[ 737.160931] nmi_trigger_cpumask_backtrace+0x97/0xd0
[ 737.165889] arch_trigger_cpumask_backtrace+0x19/0x20
[ 737.170933] rcu_dump_cpu_stacks+0xbc/0xec
[ 737.175025] rcu_sched_clock_irq+0x729/0x9c0
[ 737.179296] ? account_system_index_time+0x112/0x1f0
[ 737.184256] ? tick_sched_do_timer+0x60/0x60
[ 737.188528] update_process_times+0x28/0x60
[ 737.192711] tick_sched_handle.isra.21+0x34/0x50
[ 737.197322] tick_sched_timer+0x6d/0x80
[ 737.201154] __hrtimer_run_queues+0x1d0/0x450
[ 737.205514] hrtimer_interrupt+0xe7/0x240
[ 737.209527] __sysvec_apic_timer_interrupt+0x79/0x1f0
[ 737.214578] asm_call_on_stack+0x12/0x20
[ 737.218504] </IRQ>
[ 737.220611] sysvec_apic_timer_interrupt+0x75/0xa0
[ 737.225402] asm_sysvec_apic_timer_interrupt+0x12/0x20
[ 737.230541] RIP: 0010:queued_spin_lock_slowpath+0x41/0x1a0
[ 737.236017] Code: f6 85 f6 75 3e f0 0f ba 2f 08 0f 92 c0 0f b6 c0
c1 e0 08 89 c2 8b 07 30 e4 09 d0 a9 00 01 ff ff 75 18 85 c0 75 04 eb
08 f3 90 <8b> 07 84 c0 75 f8 b8 01 00 00 00 66 89 07 5d c3 f6 c4 01 75
04 c6
[ 737.254755] RSP: 0018:ffffb0e2002bfc48 EFLAGS: 00000202
[ 737.259980] RAX: 0000000000000101 RBX: ffff9f23167158c0 RCX: 0000000000000000
[ 737.267103] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9f23167158c0
[ 737.274230] RBP: ffffb0e2002bfc48 R08: 0000000000000001 R09: 0000000000000000
[ 737.281353] R10: 0000000000000000 R11: 0000000000000000 R12: 00007fa4c9ea1000
[ 737.288478] R13: 00007fa4c9ea1000 R14: ffffb0e2002bfe20 R15: 00007fa4c9ea1000
[ 737.295605] do_raw_spin_lock+0xb6/0xc0
[ 737.299440] _raw_spin_lock+0x37/0x40
[ 737.303098] ? unmap_page_range+0x4a6/0xd00
[ 737.307275] unmap_page_range+0x4a6/0xd00
[ 737.311294] unmap_single_vma+0x7d/0xf0
[ 737.315138] unmap_vmas+0xd4/0x160
[ 737.318544] exit_mmap+0xb1/0x1c0
[ 737.321866] mmput+0x6a/0x130
[ 737.324843] do_exit+0x359/0xcb0
[ 737.328076] rewind_stack_do_exit+0x17/0x20
[ 737.332260] RIP: 0033:0x7fa4c99be2d7
[ 737.335831] Code: Bad RIP value.
[ 737.339054] RSP: 002b:00007ffcb103baf8 EFLAGS: 00000203 ORIG_RAX:
000000000000001b
[ 737.346613] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa4c99be2d7
[ 737.353743] RDX: 0000000000000000 RSI: 0000000000001000 RDI: 00007fa4c9ea6000
[ 737.360867] RBP: 00007ffcb103bc40 R08: 00000000ffffffff R09: 0000000000000000
[ 737.367992] R10: 0000000000000021 R11: 0000000000000203 R12: 0000000000400d00
[ 737.375116] R13: 00007ffcb103be10 R14: 0000000000000000 R15: 0000000000000000
[ 815.107312] rcu: INFO: rcu_sched self-detected stall on CPU
[ 815.112890] rcu: 0-....: (103727 ticks this GP)
idle=1a2/1/0x4000000000000000 softirq=102446/102446 fqs=25897
[ 815.122966] (t=104019 jiffies g=173197 q=1545)
[ 815.127492] NMI backtrace for cpu 0
[ 815.130985] CPU: 0 PID: 19025 Comm: mincore_selftes Tainted: G
D W K 5.9.0-rc5-next-20200915 #1
[ 815.140628] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.0b 07/27/2017
[ 815.148099] Call Trace:
[ 815.150542] <IRQ>
[ 815.152556] dump_stack+0x7d/0x9f
[ 815.155876] nmi_cpu_backtrace+0xa4/0xc0
[ 815.159799] ? lapic_can_unplug_cpu+0xa0/0xa0
[ 815.164150] nmi_trigger_cpumask_backtrace+0x97/0xd0
[ 815.169108] arch_trigger_cpumask_backtrace+0x19/0x20
[ 815.174151] rcu_dump_cpu_stacks+0xbc/0xec
[ 815.178245] rcu_sched_clock_irq+0x729/0x9c0
[ 815.182515] ? account_system_index_time+0x112/0x1f0
[ 815.187473] ? tick_sched_do_timer+0x60/0x60
[ 815.191744] update_process_times+0x28/0x60
[ 815.195922] tick_sched_handle.isra.21+0x34/0x50
[ 815.200533] tick_sched_timer+0x6d/0x80
[ 815.204365] __hrtimer_run_queues+0x1d0/0x450
[ 815.208725] hrtimer_interrupt+0xe7/0x240
[ 815.212738] __sysvec_apic_timer_interrupt+0x79/0x1f0
[ 815.217789] asm_call_on_stack+0x12/0x20
[ 815.221713] </IRQ>
[ 815.223811] sysvec_apic_timer_interrupt+0x75/0xa0
[ 815.228597] asm_sysvec_apic_timer_interrupt+0x12/0x20
[ 815.233734] RIP: 0010:queued_spin_lock_slowpath+0x41/0x1a0
full test log link,
https://lkft.validation.linaro.org/scheduler/job/1765602#L12129
https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20200915/testrun/3198585/suite/linux-log-parser/test/check-kernel-warning-1765604/log
https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20200915/testrun/3198610/suite/linux-log-parser/test/check-kernel-oops-1765633/log
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
--
Linaro LKFT
https://lkft.linaro.org
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 0/8] Return head pages from find_*_entry
2020-09-15 12:53 ` Naresh Kamboju
@ 2020-09-15 13:08 ` Matthew Wilcox
0 siblings, 0 replies; 18+ messages in thread
From: Matthew Wilcox @ 2020-09-15 13:08 UTC (permalink / raw)
To: Naresh Kamboju
Cc: Hugh Dickins, linux-mm, Andrew Morton, William Kucharski,
Jani Nikula, Alexey Dobriyan, Johannes Weiner, Chris Wilson,
Matthew Auld, Huang Ying, intel-gfx, Cgroups, open list,
lkft-triage, Linux-Next Mailing List, Stephen Rothwell,
ricardo.canuelo
On Tue, Sep 15, 2020 at 06:23:27PM +0530, Naresh Kamboju wrote:
> While running kselftest mincore tests the following kernel BUG reported on the
> linux next-20200915 tag on x86_64, i386 and arm64.
https://lore.kernel.org/linux-mm/20200914112738.GM6583@casper.infradead.org/
^ permalink raw reply [flat|nested] 18+ messages in thread