All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm: page_owner: detect page_owner recursion via task_struct
@ 2021-04-01 22:30 Sergei Trofimovich
  2021-04-02  0:05 ` Andrew Morton
  0 siblings, 1 reply; 6+ messages in thread
From: Sergei Trofimovich @ 2021-04-01 22:30 UTC (permalink / raw)
  To: linux-mm
  Cc: linux-kernel, Sergei Trofimovich, Ingo Molnar, Peter Zijlstra,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira,
	Andrew Morton

Before the change page_owner recursion was detected via fetching
backtrace and inspecting it for current instruction pointer.
It has a few problems:
- it is slightly slow as it requires extra backtrace and a linear
  stack scan of the result
- it is too late to check if backtrace fetching required memory
  allocation itself (ia64's unwinder requires it).

To simplify recursion tracking let's use page_owner recursion depth
as a counter in 'struct task_struct'.

The change make page_owner=on work on ia64 bu avoiding infinite
recursion in:
  kmalloc()
  -> __set_page_owner()
  -> save_stack()
  -> unwind() [ia64-specific]
  -> build_script()
  -> kmalloc()
  -> __set_page_owner() [we short-circuit here]
  -> save_stack()
  -> unwind() [recursion]

CC: Ingo Molnar <mingo@redhat.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Juri Lelli <juri.lelli@redhat.com>
CC: Vincent Guittot <vincent.guittot@linaro.org>
CC: Dietmar Eggemann <dietmar.eggemann@arm.com>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ben Segall <bsegall@google.com>
CC: Mel Gorman <mgorman@suse.de>
CC: Daniel Bristot de Oliveira <bristot@redhat.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: linux-mm@kvack.org
Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
---
 include/linux/sched.h |  9 +++++++++
 init/init_task.c      |  3 +++
 mm/page_owner.c       | 41 +++++++++++++++++------------------------
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ef00bb22164c..35771703fd89 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1371,6 +1371,15 @@ struct task_struct {
 	struct llist_head               kretprobe_instances;
 #endif
 
+#ifdef CONFIG_PAGE_OWNER
+	/*
+	 * Used by page_owner=on to detect recursion in page tracking.
+	 * Is it fine to have non-atomic ops here if we ever access
+	 * this variable via current->page_owner_depth?
+	 */
+	unsigned int page_owner_depth;
+#endif
+
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
diff --git a/init/init_task.c b/init/init_task.c
index 3711cdaafed2..f579f2b2eca8 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -213,6 +213,9 @@ struct task_struct init_task
 #ifdef CONFIG_SECCOMP
 	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
 #endif
+#ifdef CONFIG_PAGE_OWNER
+	.page_owner_depth	= 0,
+#endif
 };
 EXPORT_SYMBOL(init_task);
 
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 7147fd34a948..422558605fcc 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -20,6 +20,16 @@
  */
 #define PAGE_OWNER_STACK_DEPTH (16)
 
+/*
+ * How many reenters we allow to page_owner.
+ *
+ * Sometimes metadata allocation tracking requires more memory to be allocated:
+ * - when new stack trace is saved to stack depot
+ * - when backtrace itself is calculated (ia64)
+ * Instead of falling to infinite recursion give it a chance to recover.
+ */
+#define PAGE_OWNER_MAX_RECURSION_DEPTH (1)
+
 struct page_owner {
 	unsigned short order;
 	short last_migrate_reason;
@@ -97,42 +107,25 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
 	return (void *)page_ext + page_owner_ops.offset;
 }
 
-static inline bool check_recursive_alloc(unsigned long *entries,
-					 unsigned int nr_entries,
-					 unsigned long ip)
-{
-	unsigned int i;
-
-	for (i = 0; i < nr_entries; i++) {
-		if (entries[i] == ip)
-			return true;
-	}
-	return false;
-}
-
 static noinline depot_stack_handle_t save_stack(gfp_t flags)
 {
 	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
 	depot_stack_handle_t handle;
 	unsigned int nr_entries;
 
-	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
-
-	/*
-	 * We need to check recursion here because our request to
-	 * stackdepot could trigger memory allocation to save new
-	 * entry. New memory allocation would reach here and call
-	 * stack_depot_save_entries() again if we don't catch it. There is
-	 * still not enough memory in stackdepot so it would try to
-	 * allocate memory again and loop forever.
-	 */
-	if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
+	/* Avoid recursion. Used in stack trace generation code. */
+	if (current->page_owner_depth >= PAGE_OWNER_MAX_RECURSION_DEPTH)
 		return dummy_handle;
 
+	current->page_owner_depth++;
+
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
+
 	handle = stack_depot_save(entries, nr_entries, flags);
 	if (!handle)
 		handle = failure_handle;
 
+	current->page_owner_depth--;
 	return handle;
 }
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mm: page_owner: detect page_owner recursion via task_struct
  2021-04-01 22:30 [PATCH] mm: page_owner: detect page_owner recursion via task_struct Sergei Trofimovich
@ 2021-04-02  0:05 ` Andrew Morton
  2021-04-02 11:50   ` Sergei Trofimovich
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2021-04-02  0:05 UTC (permalink / raw)
  To: Sergei Trofimovich
  Cc: linux-mm, linux-kernel, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira

On Thu,  1 Apr 2021 23:30:10 +0100 Sergei Trofimovich <slyfox@gentoo.org> wrote:

> Before the change page_owner recursion was detected via fetching
> backtrace and inspecting it for current instruction pointer.
> It has a few problems:
> - it is slightly slow as it requires extra backtrace and a linear
>   stack scan of the result
> - it is too late to check if backtrace fetching required memory
>   allocation itself (ia64's unwinder requires it).
> 
> To simplify recursion tracking let's use page_owner recursion depth
> as a counter in 'struct task_struct'.

Seems like a better approach.

> The change make page_owner=on work on ia64 bu avoiding infinite
> recursion in:
>   kmalloc()
>   -> __set_page_owner()
>   -> save_stack()
>   -> unwind() [ia64-specific]
>   -> build_script()
>   -> kmalloc()
>   -> __set_page_owner() [we short-circuit here]
>   -> save_stack()
>   -> unwind() [recursion]
> 
> ...
>
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1371,6 +1371,15 @@ struct task_struct {
>  	struct llist_head               kretprobe_instances;
>  #endif
>  
> +#ifdef CONFIG_PAGE_OWNER
> +	/*
> +	 * Used by page_owner=on to detect recursion in page tracking.
> +	 * Is it fine to have non-atomic ops here if we ever access
> +	 * this variable via current->page_owner_depth?

Yes, it is fine.  This part of the comment can be removed.

> +	 */
> +	unsigned int page_owner_depth;
> +#endif

Adding to the task_struct has a cost.  But I don't expect that
PAGE_OWNER is commonly used in prodction builds (correct?).

> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -213,6 +213,9 @@ struct task_struct init_task
>  #ifdef CONFIG_SECCOMP
>  	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
>  #endif
> +#ifdef CONFIG_PAGE_OWNER
> +	.page_owner_depth	= 0,
> +#endif
>  };
>  EXPORT_SYMBOL(init_task);

It will be initialized to zero by the compiler.  We can omit this hunk
entirely.

> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -20,6 +20,16 @@
>   */
>  #define PAGE_OWNER_STACK_DEPTH (16)
>  
> +/*
> + * How many reenters we allow to page_owner.
> + *
> + * Sometimes metadata allocation tracking requires more memory to be allocated:
> + * - when new stack trace is saved to stack depot
> + * - when backtrace itself is calculated (ia64)
> + * Instead of falling to infinite recursion give it a chance to recover.
> + */
> +#define PAGE_OWNER_MAX_RECURSION_DEPTH (1)

So this is presently a boolean.  Is there any expectation that
PAGE_OWNER_MAX_RECURSION_DEPTH will ever be greater than 1?  If not, we
could use a single bit in the task_struct.  Add it to the
"Unserialized, strictly 'current'" bitfields.  Could make it a 2-bit field if we want
to permit PAGE_OWNER_MAX_RECURSION_DEPTH=larger.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mm: page_owner: detect page_owner recursion via task_struct
  2021-04-02  0:05 ` Andrew Morton
@ 2021-04-02 11:50   ` Sergei Trofimovich
  2021-04-02 11:53     ` [PATCH v2] " Sergei Trofimovich
  2021-04-07 12:25     ` [PATCH] " Vlastimil Babka
  0 siblings, 2 replies; 6+ messages in thread
From: Sergei Trofimovich @ 2021-04-02 11:50 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, linux-kernel, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira

On Thu, 1 Apr 2021 17:05:19 -0700
Andrew Morton <akpm@linux-foundation.org> wrote:

> On Thu,  1 Apr 2021 23:30:10 +0100 Sergei Trofimovich <slyfox@gentoo.org> wrote:
> 
> > Before the change page_owner recursion was detected via fetching
> > backtrace and inspecting it for current instruction pointer.
> > It has a few problems:
> > - it is slightly slow as it requires extra backtrace and a linear
> >   stack scan of the result
> > - it is too late to check if backtrace fetching required memory
> >   allocation itself (ia64's unwinder requires it).
> > 
> > To simplify recursion tracking let's use page_owner recursion depth
> > as a counter in 'struct task_struct'.  
> 
> Seems like a better approach.
> 
> > The change make page_owner=on work on ia64 bu avoiding infinite
> > recursion in:
> >   kmalloc()  
> >   -> __set_page_owner()
> >   -> save_stack()
> >   -> unwind() [ia64-specific]
> >   -> build_script()
> >   -> kmalloc()
> >   -> __set_page_owner() [we short-circuit here]
> >   -> save_stack()
> >   -> unwind() [recursion]  
> > 
> > ...
> >
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -1371,6 +1371,15 @@ struct task_struct {
> >  	struct llist_head               kretprobe_instances;
> >  #endif
> >  
> > +#ifdef CONFIG_PAGE_OWNER
> > +	/*
> > +	 * Used by page_owner=on to detect recursion in page tracking.
> > +	 * Is it fine to have non-atomic ops here if we ever access
> > +	 * this variable via current->page_owner_depth?  
> 
> Yes, it is fine.  This part of the comment can be removed.

Cool! Will do.

> > +	 */
> > +	unsigned int page_owner_depth;
> > +#endif  
> 
> Adding to the task_struct has a cost.  But I don't expect that
> PAGE_OWNER is commonly used in prodction builds (correct?).

Yeah, PAGE_OWNER should not be enabled for production kernels.

Not having extra memory overhead (or layout disruption) is a nice
benefit though. I'll switch to "Unserialized, strictly 'current'" bitfield.

> > --- a/init/init_task.c
> > +++ b/init/init_task.c
> > @@ -213,6 +213,9 @@ struct task_struct init_task
> >  #ifdef CONFIG_SECCOMP
> >  	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
> >  #endif
> > +#ifdef CONFIG_PAGE_OWNER
> > +	.page_owner_depth	= 0,
> > +#endif
> >  };
> >  EXPORT_SYMBOL(init_task);  
> 
> It will be initialized to zero by the compiler.  We can omit this hunk
> entirely.
> 
> > --- a/mm/page_owner.c
> > +++ b/mm/page_owner.c
> > @@ -20,6 +20,16 @@
> >   */
> >  #define PAGE_OWNER_STACK_DEPTH (16)
> >  
> > +/*
> > + * How many reenters we allow to page_owner.
> > + *
> > + * Sometimes metadata allocation tracking requires more memory to be allocated:
> > + * - when new stack trace is saved to stack depot
> > + * - when backtrace itself is calculated (ia64)
> > + * Instead of falling to infinite recursion give it a chance to recover.
> > + */
> > +#define PAGE_OWNER_MAX_RECURSION_DEPTH (1)  
> 
> So this is presently a boolean.  Is there any expectation that
> PAGE_OWNER_MAX_RECURSION_DEPTH will ever be greater than 1?  If not, we
> could use a single bit in the task_struct.  Add it to the
> "Unserialized, strictly 'current'" bitfields.  Could make it a 2-bit field if we want
> to permit PAGE_OWNER_MAX_RECURSION_DEPTH=larger.

Let's settle on depth=1. depth>1 is not trivial for other reasons I don't
completely understand.

Follow-up patch incoming.

-- 

  Sergei

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2] mm: page_owner: detect page_owner recursion via task_struct
  2021-04-02 11:50   ` Sergei Trofimovich
@ 2021-04-02 11:53     ` Sergei Trofimovich
  2021-04-07 12:32       ` Vlastimil Babka
  2021-04-07 12:25     ` [PATCH] " Vlastimil Babka
  1 sibling, 1 reply; 6+ messages in thread
From: Sergei Trofimovich @ 2021-04-02 11:53 UTC (permalink / raw)
  To: Andrew Morton, linux-mm
  Cc: linux-kernel, Sergei Trofimovich, Ingo Molnar, Peter Zijlstra,
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Ben Segall, Mel Gorman, Daniel Bristot de Oliveira

Before the change page_owner recursion was detected via fetching
backtrace and inspecting it for current instruction pointer.
It has a few problems:
- it is slightly slow as it requires extra backtrace and a linear
  stack scan of the result
- it is too late to check if backtrace fetching required memory
  allocation itself (ia64's unwinder requires it).

To simplify recursion tracking let's use page_owner recursion flag
in 'struct task_struct'.

The change make page_owner=on work on ia64 by avoiding infinite
recursion in:
  kmalloc()
  -> __set_page_owner()
  -> save_stack()
  -> unwind() [ia64-specific]
  -> build_script()
  -> kmalloc()
  -> __set_page_owner() [we short-circuit here]
  -> save_stack()
  -> unwind() [recursion]

CC: Ingo Molnar <mingo@redhat.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Juri Lelli <juri.lelli@redhat.com>
CC: Vincent Guittot <vincent.guittot@linaro.org>
CC: Dietmar Eggemann <dietmar.eggemann@arm.com>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ben Segall <bsegall@google.com>
CC: Mel Gorman <mgorman@suse.de>
CC: Daniel Bristot de Oliveira <bristot@redhat.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: linux-mm@kvack.org
Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
---
Change since v1:
- use bit from task_struct instead of a new field
- track only one recursion depth level so far

 include/linux/sched.h |  4 ++++
 mm/page_owner.c       | 32 ++++++++++----------------------
 2 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ef00bb22164c..00986450677c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -841,6 +841,10 @@ struct task_struct {
 	/* Stalled due to lack of memory */
 	unsigned			in_memstall:1;
 #endif
+#ifdef CONFIG_PAGE_OWNER
+	/* Used by page_owner=on to detect recursion in page tracking. */
+	unsigned			in_page_owner:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 7147fd34a948..64b2e4c6afb7 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -97,42 +97,30 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
 	return (void *)page_ext + page_owner_ops.offset;
 }
 
-static inline bool check_recursive_alloc(unsigned long *entries,
-					 unsigned int nr_entries,
-					 unsigned long ip)
-{
-	unsigned int i;
-
-	for (i = 0; i < nr_entries; i++) {
-		if (entries[i] == ip)
-			return true;
-	}
-	return false;
-}
-
 static noinline depot_stack_handle_t save_stack(gfp_t flags)
 {
 	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
 	depot_stack_handle_t handle;
 	unsigned int nr_entries;
 
-	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
-
 	/*
-	 * We need to check recursion here because our request to
-	 * stackdepot could trigger memory allocation to save new
-	 * entry. New memory allocation would reach here and call
-	 * stack_depot_save_entries() again if we don't catch it. There is
-	 * still not enough memory in stackdepot so it would try to
-	 * allocate memory again and loop forever.
+	 * Avoid recursion.
+	 *
+	 * Sometimes page metadata allocation tracking requires more
+	 * memory to be allocated:
+	 * - when new stack trace is saved to stack depot
+	 * - when backtrace itself is calculated (ia64)
 	 */
-	if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
+	if (current->in_page_owner)
 		return dummy_handle;
+	current->in_page_owner = 1;
 
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
 	handle = stack_depot_save(entries, nr_entries, flags);
 	if (!handle)
 		handle = failure_handle;
 
+	current->in_page_owner = 0;
 	return handle;
 }
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] mm: page_owner: detect page_owner recursion via task_struct
  2021-04-02 11:50   ` Sergei Trofimovich
  2021-04-02 11:53     ` [PATCH v2] " Sergei Trofimovich
@ 2021-04-07 12:25     ` Vlastimil Babka
  1 sibling, 0 replies; 6+ messages in thread
From: Vlastimil Babka @ 2021-04-07 12:25 UTC (permalink / raw)
  To: Sergei Trofimovich, Andrew Morton
  Cc: linux-mm, linux-kernel, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira

On 4/2/21 1:50 PM, Sergei Trofimovich wrote:
> On Thu, 1 Apr 2021 17:05:19 -0700
> Andrew Morton <akpm@linux-foundation.org> wrote:
> 
>> On Thu,  1 Apr 2021 23:30:10 +0100 Sergei Trofimovich <slyfox@gentoo.org> wrote:
>> 
>> > Before the change page_owner recursion was detected via fetching
>> > backtrace and inspecting it for current instruction pointer.
>> > It has a few problems:
>> > - it is slightly slow as it requires extra backtrace and a linear
>> >   stack scan of the result
>> > - it is too late to check if backtrace fetching required memory
>> >   allocation itself (ia64's unwinder requires it).
>> > 
>> > To simplify recursion tracking let's use page_owner recursion depth
>> > as a counter in 'struct task_struct'.  
>> 
>> Seems like a better approach.
>> 
>> > The change make page_owner=on work on ia64 bu avoiding infinite
>> > recursion in:
>> >   kmalloc()  
>> >   -> __set_page_owner()
>> >   -> save_stack()
>> >   -> unwind() [ia64-specific]
>> >   -> build_script()
>> >   -> kmalloc()
>> >   -> __set_page_owner() [we short-circuit here]
>> >   -> save_stack()
>> >   -> unwind() [recursion]  
>> > 
>> > ...
>> >
>> > --- a/include/linux/sched.h
>> > +++ b/include/linux/sched.h
>> > @@ -1371,6 +1371,15 @@ struct task_struct {
>> >  	struct llist_head               kretprobe_instances;
>> >  #endif
>> >  
>> > +#ifdef CONFIG_PAGE_OWNER
>> > +	/*
>> > +	 * Used by page_owner=on to detect recursion in page tracking.
>> > +	 * Is it fine to have non-atomic ops here if we ever access
>> > +	 * this variable via current->page_owner_depth?  
>> 
>> Yes, it is fine.  This part of the comment can be removed.
> 
> Cool! Will do.
> 
>> > +	 */
>> > +	unsigned int page_owner_depth;
>> > +#endif  
>> 
>> Adding to the task_struct has a cost.  But I don't expect that
>> PAGE_OWNER is commonly used in prodction builds (correct?).
> 
> Yeah, PAGE_OWNER should not be enabled for production kernels.

Note that it was converted to use a static key exactly so that it can be always
built in production kernels, and simply enabled on boot when needed. Our kernels
have it enabled.

> Not having extra memory overhead (or layout disruption) is a nice
> benefit though. I'll switch to "Unserialized, strictly 'current'" bitfield.
> 
>> > --- a/init/init_task.c
>> > +++ b/init/init_task.c
>> > @@ -213,6 +213,9 @@ struct task_struct init_task
>> >  #ifdef CONFIG_SECCOMP
>> >  	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
>> >  #endif
>> > +#ifdef CONFIG_PAGE_OWNER
>> > +	.page_owner_depth	= 0,
>> > +#endif
>> >  };
>> >  EXPORT_SYMBOL(init_task);  
>> 
>> It will be initialized to zero by the compiler.  We can omit this hunk
>> entirely.
>> 
>> > --- a/mm/page_owner.c
>> > +++ b/mm/page_owner.c
>> > @@ -20,6 +20,16 @@
>> >   */
>> >  #define PAGE_OWNER_STACK_DEPTH (16)
>> >  
>> > +/*
>> > + * How many reenters we allow to page_owner.
>> > + *
>> > + * Sometimes metadata allocation tracking requires more memory to be allocated:
>> > + * - when new stack trace is saved to stack depot
>> > + * - when backtrace itself is calculated (ia64)
>> > + * Instead of falling to infinite recursion give it a chance to recover.
>> > + */
>> > +#define PAGE_OWNER_MAX_RECURSION_DEPTH (1)  
>> 
>> So this is presently a boolean.  Is there any expectation that
>> PAGE_OWNER_MAX_RECURSION_DEPTH will ever be greater than 1?  If not, we
>> could use a single bit in the task_struct.  Add it to the
>> "Unserialized, strictly 'current'" bitfields.  Could make it a 2-bit field if we want
>> to permit PAGE_OWNER_MAX_RECURSION_DEPTH=larger.
> 
> Let's settle on depth=1. depth>1 is not trivial for other reasons I don't
> completely understand.

That's fine, I don't think depth>1 would bring us much benefit anyway.

> Follow-up patch incoming.
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] mm: page_owner: detect page_owner recursion via task_struct
  2021-04-02 11:53     ` [PATCH v2] " Sergei Trofimovich
@ 2021-04-07 12:32       ` Vlastimil Babka
  0 siblings, 0 replies; 6+ messages in thread
From: Vlastimil Babka @ 2021-04-07 12:32 UTC (permalink / raw)
  To: Sergei Trofimovich, Andrew Morton, linux-mm
  Cc: linux-kernel, Ingo Molnar, Peter Zijlstra, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall,
	Mel Gorman, Daniel Bristot de Oliveira

On 4/2/21 1:53 PM, Sergei Trofimovich wrote:
> Before the change page_owner recursion was detected via fetching
> backtrace and inspecting it for current instruction pointer.
> It has a few problems:
> - it is slightly slow as it requires extra backtrace and a linear
>   stack scan of the result
> - it is too late to check if backtrace fetching required memory
>   allocation itself (ia64's unwinder requires it).
> 
> To simplify recursion tracking let's use page_owner recursion flag
> in 'struct task_struct'.
> 
> The change make page_owner=on work on ia64 by avoiding infinite
> recursion in:
>   kmalloc()
>   -> __set_page_owner()
>   -> save_stack()
>   -> unwind() [ia64-specific]
>   -> build_script()
>   -> kmalloc()
>   -> __set_page_owner() [we short-circuit here]
>   -> save_stack()
>   -> unwind() [recursion]
> 
> CC: Ingo Molnar <mingo@redhat.com>
> CC: Peter Zijlstra <peterz@infradead.org>
> CC: Juri Lelli <juri.lelli@redhat.com>
> CC: Vincent Guittot <vincent.guittot@linaro.org>
> CC: Dietmar Eggemann <dietmar.eggemann@arm.com>
> CC: Steven Rostedt <rostedt@goodmis.org>
> CC: Ben Segall <bsegall@google.com>
> CC: Mel Gorman <mgorman@suse.de>
> CC: Daniel Bristot de Oliveira <bristot@redhat.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: linux-mm@kvack.org
> Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>

Much better indeed, thanks.
Acked-by: Vlastimil Babka <vbabka@suse.cz>

> ---
> Change since v1:
> - use bit from task_struct instead of a new field
> - track only one recursion depth level so far
> 
>  include/linux/sched.h |  4 ++++
>  mm/page_owner.c       | 32 ++++++++++----------------------
>  2 files changed, 14 insertions(+), 22 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index ef00bb22164c..00986450677c 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -841,6 +841,10 @@ struct task_struct {
>  	/* Stalled due to lack of memory */
>  	unsigned			in_memstall:1;
>  #endif
> +#ifdef CONFIG_PAGE_OWNER
> +	/* Used by page_owner=on to detect recursion in page tracking. */
> +	unsigned			in_page_owner:1;
> +#endif
>  
>  	unsigned long			atomic_flags; /* Flags requiring atomic access. */
>  
> diff --git a/mm/page_owner.c b/mm/page_owner.c
> index 7147fd34a948..64b2e4c6afb7 100644
> --- a/mm/page_owner.c
> +++ b/mm/page_owner.c
> @@ -97,42 +97,30 @@ static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
>  	return (void *)page_ext + page_owner_ops.offset;
>  }
>  
> -static inline bool check_recursive_alloc(unsigned long *entries,
> -					 unsigned int nr_entries,
> -					 unsigned long ip)
> -{
> -	unsigned int i;
> -
> -	for (i = 0; i < nr_entries; i++) {
> -		if (entries[i] == ip)
> -			return true;
> -	}
> -	return false;
> -}
> -
>  static noinline depot_stack_handle_t save_stack(gfp_t flags)
>  {
>  	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
>  	depot_stack_handle_t handle;
>  	unsigned int nr_entries;
>  
> -	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
> -
>  	/*
> -	 * We need to check recursion here because our request to
> -	 * stackdepot could trigger memory allocation to save new
> -	 * entry. New memory allocation would reach here and call
> -	 * stack_depot_save_entries() again if we don't catch it. There is
> -	 * still not enough memory in stackdepot so it would try to
> -	 * allocate memory again and loop forever.
> +	 * Avoid recursion.
> +	 *
> +	 * Sometimes page metadata allocation tracking requires more
> +	 * memory to be allocated:
> +	 * - when new stack trace is saved to stack depot
> +	 * - when backtrace itself is calculated (ia64)
>  	 */
> -	if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
> +	if (current->in_page_owner)
>  		return dummy_handle;
> +	current->in_page_owner = 1;
>  
> +	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
>  	handle = stack_depot_save(entries, nr_entries, flags);
>  	if (!handle)
>  		handle = failure_handle;
>  
> +	current->in_page_owner = 0;
>  	return handle;
>  }
>  
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-04-07 12:32 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-01 22:30 [PATCH] mm: page_owner: detect page_owner recursion via task_struct Sergei Trofimovich
2021-04-02  0:05 ` Andrew Morton
2021-04-02 11:50   ` Sergei Trofimovich
2021-04-02 11:53     ` [PATCH v2] " Sergei Trofimovich
2021-04-07 12:32       ` Vlastimil Babka
2021-04-07 12:25     ` [PATCH] " Vlastimil Babka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.