All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm,oom: add tracepoints for oom reaper-related events
@ 2017-05-30 12:05 ` Roman Gushchin
  0 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-05-30 12:05 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Roman Gushchin, Tetsuo Handa, Johannes Weiner, Vladimir Davydov,
	kernel-team, linux-kernel, linux-mm

Add tracepoints to simplify the debugging of the oom reaper code.

Trace the following events:
1) a process is marked as an oom victim,
2) a process is added to the oom reaper list,
3) the oom reaper starts reaping process's mm,
4) the oom reaper finished reaping,
5) the oom reaper skips reaping.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: kernel-team@fb.com
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/oom_kill.c              |  7 ++++
 2 files changed, 87 insertions(+)

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 38baeb2..c3c19d4 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
 			__entry->wmark_check)
 );
 
+TRACE_EVENT(mark_victim,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(wake_reaper,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(start_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(finish_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(skip_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 04c9143..409b685 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		ret = false;
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
@@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	if (!mmget_not_zero(mm)) {
 		up_read(&mm->mmap_sem);
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
+	trace_start_task_reaping(tsk->pid);
+
 	/*
 	 * Tell all users of get_user/copy_from_user etc... that the content
 	 * is no longer stable. No barriers really needed because unmapping
@@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 * put the oom_reaper out of the way.
 	 */
 	mmput_async(mm);
+	trace_finish_task_reaping(tsk->pid);
 unlock_oom:
 	mutex_unlock(&oom_lock);
 	return ret;
@@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
 	tsk->oom_reaper_list = oom_reaper_list;
 	oom_reaper_list = tsk;
 	spin_unlock(&oom_reaper_lock);
+	trace_wake_reaper(tsk->pid);
 	wake_up(&oom_reaper_wait);
 }
 
@@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
 	 */
 	__thaw_task(tsk);
 	atomic_inc(&oom_victims);
+	trace_mark_victim(tsk->pid);
 }
 
 /**
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] mm,oom: add tracepoints for oom reaper-related events
@ 2017-05-30 12:05 ` Roman Gushchin
  0 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-05-30 12:05 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Roman Gushchin, Tetsuo Handa, Johannes Weiner, Vladimir Davydov,
	kernel-team, linux-kernel, linux-mm

Add tracepoints to simplify the debugging of the oom reaper code.

Trace the following events:
1) a process is marked as an oom victim,
2) a process is added to the oom reaper list,
3) the oom reaper starts reaping process's mm,
4) the oom reaper finished reaping,
5) the oom reaper skips reaping.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: kernel-team@fb.com
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/oom_kill.c              |  7 ++++
 2 files changed, 87 insertions(+)

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 38baeb2..c3c19d4 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
 			__entry->wmark_check)
 );
 
+TRACE_EVENT(mark_victim,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(wake_reaper,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(start_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(finish_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(skip_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 04c9143..409b685 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		ret = false;
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
@@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	if (!mmget_not_zero(mm)) {
 		up_read(&mm->mmap_sem);
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
+	trace_start_task_reaping(tsk->pid);
+
 	/*
 	 * Tell all users of get_user/copy_from_user etc... that the content
 	 * is no longer stable. No barriers really needed because unmapping
@@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 * put the oom_reaper out of the way.
 	 */
 	mmput_async(mm);
+	trace_finish_task_reaping(tsk->pid);
 unlock_oom:
 	mutex_unlock(&oom_lock);
 	return ret;
@@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
 	tsk->oom_reaper_list = oom_reaper_list;
 	oom_reaper_list = tsk;
 	spin_unlock(&oom_reaper_lock);
+	trace_wake_reaper(tsk->pid);
 	wake_up(&oom_reaper_wait);
 }
 
@@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
 	 */
 	__thaw_task(tsk);
 	atomic_inc(&oom_victims);
+	trace_mark_victim(tsk->pid);
 }
 
 /**
-- 
2.7.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm,oom: add tracepoints for oom reaper-related events
  2017-05-30 12:05 ` Roman Gushchin
@ 2017-05-30 12:34   ` Michal Hocko
  -1 siblings, 0 replies; 20+ messages in thread
From: Michal Hocko @ 2017-05-30 12:34 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Tue 30-05-17 13:05:32, Roman Gushchin wrote:
> Add tracepoints to simplify the debugging of the oom reaper code.
> 
> Trace the following events:
> 1) a process is marked as an oom victim,
> 2) a process is added to the oom reaper list,
> 3) the oom reaper starts reaping process's mm,
> 4) the oom reaper finished reaping,
> 5) the oom reaper skips reaping.

I am not against but could you explain why the current printks are not
sufficient? We do not have any explicit printk for the 2) and 3) but
are those really necessary?

In other words could you describe the situation when you found these
tracepoints more useful than what the kernel log offers already?

> Signed-off-by: Roman Gushchin <guro@fb.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
> Cc: kernel-team@fb.com
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-mm@kvack.org
> ---
>  include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
>  mm/oom_kill.c              |  7 ++++
>  2 files changed, 87 insertions(+)
> 
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index 38baeb2..c3c19d4 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
>  			__entry->wmark_check)
>  );
>  
> +TRACE_EVENT(mark_victim,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(wake_reaper,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(start_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(finish_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(skip_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
>  #ifdef CONFIG_COMPACTION
>  TRACE_EVENT(compact_retry,
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 04c9143..409b685 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  
>  	if (!down_read_trylock(&mm->mmap_sem)) {
>  		ret = false;
> +		trace_skip_task_reaping(tsk->pid);
>  		goto unlock_oom;
>  	}
>  
> @@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  	 */
>  	if (!mmget_not_zero(mm)) {
>  		up_read(&mm->mmap_sem);
> +		trace_skip_task_reaping(tsk->pid);
>  		goto unlock_oom;
>  	}
>  
> +	trace_start_task_reaping(tsk->pid);
> +
>  	/*
>  	 * Tell all users of get_user/copy_from_user etc... that the content
>  	 * is no longer stable. No barriers really needed because unmapping
> @@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  	 * put the oom_reaper out of the way.
>  	 */
>  	mmput_async(mm);
> +	trace_finish_task_reaping(tsk->pid);
>  unlock_oom:
>  	mutex_unlock(&oom_lock);
>  	return ret;
> @@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
>  	tsk->oom_reaper_list = oom_reaper_list;
>  	oom_reaper_list = tsk;
>  	spin_unlock(&oom_reaper_lock);
> +	trace_wake_reaper(tsk->pid);
>  	wake_up(&oom_reaper_wait);
>  }
>  
> @@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
>  	 */
>  	__thaw_task(tsk);
>  	atomic_inc(&oom_victims);
> +	trace_mark_victim(tsk->pid);
>  }
>  
>  /**
> -- 
> 2.7.4
> 

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm,oom: add tracepoints for oom reaper-related events
@ 2017-05-30 12:34   ` Michal Hocko
  0 siblings, 0 replies; 20+ messages in thread
From: Michal Hocko @ 2017-05-30 12:34 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Tue 30-05-17 13:05:32, Roman Gushchin wrote:
> Add tracepoints to simplify the debugging of the oom reaper code.
> 
> Trace the following events:
> 1) a process is marked as an oom victim,
> 2) a process is added to the oom reaper list,
> 3) the oom reaper starts reaping process's mm,
> 4) the oom reaper finished reaping,
> 5) the oom reaper skips reaping.

I am not against but could you explain why the current printks are not
sufficient? We do not have any explicit printk for the 2) and 3) but
are those really necessary?

In other words could you describe the situation when you found these
tracepoints more useful than what the kernel log offers already?

> Signed-off-by: Roman Gushchin <guro@fb.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
> Cc: kernel-team@fb.com
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-mm@kvack.org
> ---
>  include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
>  mm/oom_kill.c              |  7 ++++
>  2 files changed, 87 insertions(+)
> 
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index 38baeb2..c3c19d4 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
>  			__entry->wmark_check)
>  );
>  
> +TRACE_EVENT(mark_victim,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(wake_reaper,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(start_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(finish_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(skip_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
>  #ifdef CONFIG_COMPACTION
>  TRACE_EVENT(compact_retry,
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 04c9143..409b685 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  
>  	if (!down_read_trylock(&mm->mmap_sem)) {
>  		ret = false;
> +		trace_skip_task_reaping(tsk->pid);
>  		goto unlock_oom;
>  	}
>  
> @@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  	 */
>  	if (!mmget_not_zero(mm)) {
>  		up_read(&mm->mmap_sem);
> +		trace_skip_task_reaping(tsk->pid);
>  		goto unlock_oom;
>  	}
>  
> +	trace_start_task_reaping(tsk->pid);
> +
>  	/*
>  	 * Tell all users of get_user/copy_from_user etc... that the content
>  	 * is no longer stable. No barriers really needed because unmapping
> @@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  	 * put the oom_reaper out of the way.
>  	 */
>  	mmput_async(mm);
> +	trace_finish_task_reaping(tsk->pid);
>  unlock_oom:
>  	mutex_unlock(&oom_lock);
>  	return ret;
> @@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
>  	tsk->oom_reaper_list = oom_reaper_list;
>  	oom_reaper_list = tsk;
>  	spin_unlock(&oom_reaper_lock);
> +	trace_wake_reaper(tsk->pid);
>  	wake_up(&oom_reaper_wait);
>  }
>  
> @@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
>  	 */
>  	__thaw_task(tsk);
>  	atomic_inc(&oom_victims);
> +	trace_mark_victim(tsk->pid);
>  }
>  
>  /**
> -- 
> 2.7.4
> 

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm,oom: add tracepoints for oom reaper-related events
  2017-05-30 12:34   ` Michal Hocko
@ 2017-05-30 13:17     ` Tetsuo Handa
  -1 siblings, 0 replies; 20+ messages in thread
From: Tetsuo Handa @ 2017-05-30 13:17 UTC (permalink / raw)
  To: mhocko, guro; +Cc: hannes, vdavydov.dev, kernel-team, linux-kernel, linux-mm

Michal Hocko wrote:
> On Tue 30-05-17 13:05:32, Roman Gushchin wrote:
> > Add tracepoints to simplify the debugging of the oom reaper code.
> > 
> > Trace the following events:
> > 1) a process is marked as an oom victim,
> > 2) a process is added to the oom reaper list,
> > 3) the oom reaper starts reaping process's mm,
> > 4) the oom reaper finished reaping,
> > 5) the oom reaper skips reaping.
> 
> I am not against but could you explain why the current printks are not
> sufficient? We do not have any explicit printk for the 2) and 3) but
> are those really necessary?
> 
> In other words could you describe the situation when you found these
> tracepoints more useful than what the kernel log offers already?

Guessing from "to simplify the debugging of the oom reaper code",
Roman is facing some unknown bugs or problems?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm,oom: add tracepoints for oom reaper-related events
@ 2017-05-30 13:17     ` Tetsuo Handa
  0 siblings, 0 replies; 20+ messages in thread
From: Tetsuo Handa @ 2017-05-30 13:17 UTC (permalink / raw)
  To: mhocko, guro; +Cc: hannes, vdavydov.dev, kernel-team, linux-kernel, linux-mm

Michal Hocko wrote:
> On Tue 30-05-17 13:05:32, Roman Gushchin wrote:
> > Add tracepoints to simplify the debugging of the oom reaper code.
> > 
> > Trace the following events:
> > 1) a process is marked as an oom victim,
> > 2) a process is added to the oom reaper list,
> > 3) the oom reaper starts reaping process's mm,
> > 4) the oom reaper finished reaping,
> > 5) the oom reaper skips reaping.
> 
> I am not against but could you explain why the current printks are not
> sufficient? We do not have any explicit printk for the 2) and 3) but
> are those really necessary?
> 
> In other words could you describe the situation when you found these
> tracepoints more useful than what the kernel log offers already?

Guessing from "to simplify the debugging of the oom reaper code",
Roman is facing some unknown bugs or problems?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm,oom: add tracepoints for oom reaper-related events
  2017-05-30 12:34   ` Michal Hocko
@ 2017-05-30 13:33     ` Roman Gushchin
  -1 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-05-30 13:33 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Tue, May 30, 2017 at 02:34:16PM +0200, Michal Hocko wrote:
> On Tue 30-05-17 13:05:32, Roman Gushchin wrote:
> > Add tracepoints to simplify the debugging of the oom reaper code.
> > 
> > Trace the following events:
> > 1) a process is marked as an oom victim,
> > 2) a process is added to the oom reaper list,
> > 3) the oom reaper starts reaping process's mm,
> > 4) the oom reaper finished reaping,
> > 5) the oom reaper skips reaping.
> 
> I am not against but could you explain why the current printks are not
> sufficient? We do not have any explicit printk for the 2) and 3) but
> are those really necessary?

We also don't have any printks for 1) and 2) if, for, instance, we call
out_of_memory() and task_will_free_mem(current) returns true.

> 
> In other words could you describe the situation when you found these
> tracepoints more useful than what the kernel log offers already?

During my work on cgroup-aware OOM killer and some issues discovered
in process (which are described in https://lkml.org/lkml/2017/5/17/542;
most important problem fixed by Tetsuo), I've found an existing debug output
insufficient and sometimes too bulky.

Suggested traces allowed me to debug issues like I've met (double invocation
of oom_reaper, etc) much easier.

Thanks!

Roman

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm,oom: add tracepoints for oom reaper-related events
@ 2017-05-30 13:33     ` Roman Gushchin
  0 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-05-30 13:33 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Tue, May 30, 2017 at 02:34:16PM +0200, Michal Hocko wrote:
> On Tue 30-05-17 13:05:32, Roman Gushchin wrote:
> > Add tracepoints to simplify the debugging of the oom reaper code.
> > 
> > Trace the following events:
> > 1) a process is marked as an oom victim,
> > 2) a process is added to the oom reaper list,
> > 3) the oom reaper starts reaping process's mm,
> > 4) the oom reaper finished reaping,
> > 5) the oom reaper skips reaping.
> 
> I am not against but could you explain why the current printks are not
> sufficient? We do not have any explicit printk for the 2) and 3) but
> are those really necessary?

We also don't have any printks for 1) and 2) if, for, instance, we call
out_of_memory() and task_will_free_mem(current) returns true.

> 
> In other words could you describe the situation when you found these
> tracepoints more useful than what the kernel log offers already?

During my work on cgroup-aware OOM killer and some issues discovered
in process (which are described in https://lkml.org/lkml/2017/5/17/542;
most important problem fixed by Tetsuo), I've found an existing debug output
insufficient and sometimes too bulky.

Suggested traces allowed me to debug issues like I've met (double invocation
of oom_reaper, etc) much easier.

Thanks!

Roman

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm,oom: add tracepoints for oom reaper-related events
  2017-05-30 13:33     ` Roman Gushchin
@ 2017-05-30 13:45       ` Michal Hocko
  -1 siblings, 0 replies; 20+ messages in thread
From: Michal Hocko @ 2017-05-30 13:45 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Tue 30-05-17 14:33:35, Roman Gushchin wrote:
> On Tue, May 30, 2017 at 02:34:16PM +0200, Michal Hocko wrote:
> > On Tue 30-05-17 13:05:32, Roman Gushchin wrote:
> > > Add tracepoints to simplify the debugging of the oom reaper code.
> > > 
> > > Trace the following events:
> > > 1) a process is marked as an oom victim,
> > > 2) a process is added to the oom reaper list,
> > > 3) the oom reaper starts reaping process's mm,
> > > 4) the oom reaper finished reaping,
> > > 5) the oom reaper skips reaping.
> > 
> > I am not against but could you explain why the current printks are not
> > sufficient? We do not have any explicit printk for the 2) and 3) but
> > are those really necessary?
> 
> We also don't have any printks for 1) and 2) if, for, instance, we call
> out_of_memory() and task_will_free_mem(current) returns true.
> 
> > 
> > In other words could you describe the situation when you found these
> > tracepoints more useful than what the kernel log offers already?
> 
> During my work on cgroup-aware OOM killer and some issues discovered
> in process (which are described in https://lkml.org/lkml/2017/5/17/542;
> most important problem fixed by Tetsuo), I've found an existing debug output
> insufficient and sometimes too bulky.
> 
> Suggested traces allowed me to debug issues like I've met (double invocation
> of oom_reaper, etc) much easier.

Please describe those and examples how the new tracepoints will be
useful in the changelog.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] mm,oom: add tracepoints for oom reaper-related events
@ 2017-05-30 13:45       ` Michal Hocko
  0 siblings, 0 replies; 20+ messages in thread
From: Michal Hocko @ 2017-05-30 13:45 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Tue 30-05-17 14:33:35, Roman Gushchin wrote:
> On Tue, May 30, 2017 at 02:34:16PM +0200, Michal Hocko wrote:
> > On Tue 30-05-17 13:05:32, Roman Gushchin wrote:
> > > Add tracepoints to simplify the debugging of the oom reaper code.
> > > 
> > > Trace the following events:
> > > 1) a process is marked as an oom victim,
> > > 2) a process is added to the oom reaper list,
> > > 3) the oom reaper starts reaping process's mm,
> > > 4) the oom reaper finished reaping,
> > > 5) the oom reaper skips reaping.
> > 
> > I am not against but could you explain why the current printks are not
> > sufficient? We do not have any explicit printk for the 2) and 3) but
> > are those really necessary?
> 
> We also don't have any printks for 1) and 2) if, for, instance, we call
> out_of_memory() and task_will_free_mem(current) returns true.
> 
> > 
> > In other words could you describe the situation when you found these
> > tracepoints more useful than what the kernel log offers already?
> 
> During my work on cgroup-aware OOM killer and some issues discovered
> in process (which are described in https://lkml.org/lkml/2017/5/17/542;
> most important problem fixed by Tetsuo), I've found an existing debug output
> insufficient and sometimes too bulky.
> 
> Suggested traces allowed me to debug issues like I've met (double invocation
> of oom_reaper, etc) much easier.

Please describe those and examples how the new tracepoints will be
useful in the changelog.
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
  2017-05-30 13:45       ` Michal Hocko
@ 2017-05-30 18:52         ` Roman Gushchin
  -1 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-05-30 18:52 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

>From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 23 May 2017 17:37:55 +0100
Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events

During the debugging of the problem described in
https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
in https://lkml.org/lkml/2017/5/19/383 , I've found that
the existing debug output is not really useful to understand
issues related to the oom reaper.

So, I assume, that adding some tracepoints might help with
debugging of similar issues.

Trace the following events:
1) a process is marked as an oom victim,
2) a process is added to the oom reaper list,
3) the oom reaper starts reaping process's mm,
4) the oom reaper finished reaping,
5) the oom reaper skips reaping.

How it works in practice? Below is an example which show
how the problem mentioned above can be found: one process is added
twice to the oom_reaper list:

$ cd /sys/kernel/debug/tracing
$ echo "oom:mark_victim" > set_event
$ echo "oom:wake_reaper" >> set_event
$ echo "oom:skip_task_reaping" >> set_event
$ echo "oom:start_task_reaping" >> set_event
$ echo "oom:finish_task_reaping" >> set_event
$ cat trace_pipe
        allocate-502   [001] ....    91.836405: mark_victim: pid=502
        allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
        allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
      oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
      oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
      oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: kernel-team@fb.com
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/oom_kill.c              |  7 ++++
 2 files changed, 87 insertions(+)

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 38baeb2..c3c19d4 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h
@@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
 			__entry->wmark_check)
 );
 
+TRACE_EVENT(mark_victim,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(wake_reaper,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(start_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(finish_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(skip_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 04c9143..409b685 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		ret = false;
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
@@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	if (!mmget_not_zero(mm)) {
 		up_read(&mm->mmap_sem);
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
+	trace_start_task_reaping(tsk->pid);
+
 	/*
 	 * Tell all users of get_user/copy_from_user etc... that the content
 	 * is no longer stable. No barriers really needed because unmapping
@@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 * put the oom_reaper out of the way.
 	 */
 	mmput_async(mm);
+	trace_finish_task_reaping(tsk->pid);
 unlock_oom:
 	mutex_unlock(&oom_lock);
 	return ret;
@@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
 	tsk->oom_reaper_list = oom_reaper_list;
 	oom_reaper_list = tsk;
 	spin_unlock(&oom_reaper_lock);
+	trace_wake_reaper(tsk->pid);
 	wake_up(&oom_reaper_wait);
 }
 
@@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
 	 */
 	__thaw_task(tsk);
 	atomic_inc(&oom_victims);
+	trace_mark_victim(tsk->pid);
 }
 
 /**
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
@ 2017-05-30 18:52         ` Roman Gushchin
  0 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-05-30 18:52 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
  2017-05-30 18:52         ` Roman Gushchin
@ 2017-05-31 16:39           ` Michal Hocko
  -1 siblings, 0 replies; 20+ messages in thread
From: Michal Hocko @ 2017-05-31 16:39 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Tue 30-05-17 19:52:31, Roman Gushchin wrote:
> >From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
> From: Roman Gushchin <guro@fb.com>
> Date: Tue, 23 May 2017 17:37:55 +0100
> Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
> 
> During the debugging of the problem described in
> https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
> in https://lkml.org/lkml/2017/5/19/383 , I've found that
> the existing debug output is not really useful to understand
> issues related to the oom reaper.
> 
> So, I assume, that adding some tracepoints might help with
> debugging of similar issues.
> 
> Trace the following events:
> 1) a process is marked as an oom victim,
> 2) a process is added to the oom reaper list,
> 3) the oom reaper starts reaping process's mm,
> 4) the oom reaper finished reaping,
> 5) the oom reaper skips reaping.
> 
> How it works in practice? Below is an example which show
> how the problem mentioned above can be found: one process is added
> twice to the oom_reaper list:
> 
> $ cd /sys/kernel/debug/tracing
> $ echo "oom:mark_victim" > set_event
> $ echo "oom:wake_reaper" >> set_event
> $ echo "oom:skip_task_reaping" >> set_event
> $ echo "oom:start_task_reaping" >> set_event
> $ echo "oom:finish_task_reaping" >> set_event
> $ cat trace_pipe
>         allocate-502   [001] ....    91.836405: mark_victim: pid=502
>         allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
>         allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
>       oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
>       oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
>       oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502

OK, this is much better! The clue here would be that we got 2
wakeups for the same task, right?
Do you think it would make sense to put more context to those
tracepoints? E.g. skip_task_reaping can be due to lock contention or the
mm gone. wake_reaper is similar.

> Signed-off-by: Roman Gushchin <guro@fb.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
> Cc: kernel-team@fb.com
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-mm@kvack.org
> ---
>  include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
>  mm/oom_kill.c              |  7 ++++
>  2 files changed, 87 insertions(+)
> 
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index 38baeb2..c3c19d4 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
>  			__entry->wmark_check)
>  );
>  
> +TRACE_EVENT(mark_victim,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(wake_reaper,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(start_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(finish_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(skip_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
>  #ifdef CONFIG_COMPACTION
>  TRACE_EVENT(compact_retry,
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 04c9143..409b685 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  
>  	if (!down_read_trylock(&mm->mmap_sem)) {
>  		ret = false;
> +		trace_skip_task_reaping(tsk->pid);
>  		goto unlock_oom;
>  	}
>  
> @@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  	 */
>  	if (!mmget_not_zero(mm)) {
>  		up_read(&mm->mmap_sem);
> +		trace_skip_task_reaping(tsk->pid);
>  		goto unlock_oom;
>  	}
>  
> +	trace_start_task_reaping(tsk->pid);
> +
>  	/*
>  	 * Tell all users of get_user/copy_from_user etc... that the content
>  	 * is no longer stable. No barriers really needed because unmapping
> @@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  	 * put the oom_reaper out of the way.
>  	 */
>  	mmput_async(mm);
> +	trace_finish_task_reaping(tsk->pid);
>  unlock_oom:
>  	mutex_unlock(&oom_lock);
>  	return ret;
> @@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
>  	tsk->oom_reaper_list = oom_reaper_list;
>  	oom_reaper_list = tsk;
>  	spin_unlock(&oom_reaper_lock);
> +	trace_wake_reaper(tsk->pid);
>  	wake_up(&oom_reaper_wait);
>  }
>  
> @@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
>  	 */
>  	__thaw_task(tsk);
>  	atomic_inc(&oom_victims);
> +	trace_mark_victim(tsk->pid);
>  }
>  
>  /**
> -- 
> 2.7.4

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
@ 2017-05-31 16:39           ` Michal Hocko
  0 siblings, 0 replies; 20+ messages in thread
From: Michal Hocko @ 2017-05-31 16:39 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Tue 30-05-17 19:52:31, Roman Gushchin wrote:
> >From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
> From: Roman Gushchin <guro@fb.com>
> Date: Tue, 23 May 2017 17:37:55 +0100
> Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
> 
> During the debugging of the problem described in
> https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
> in https://lkml.org/lkml/2017/5/19/383 , I've found that
> the existing debug output is not really useful to understand
> issues related to the oom reaper.
> 
> So, I assume, that adding some tracepoints might help with
> debugging of similar issues.
> 
> Trace the following events:
> 1) a process is marked as an oom victim,
> 2) a process is added to the oom reaper list,
> 3) the oom reaper starts reaping process's mm,
> 4) the oom reaper finished reaping,
> 5) the oom reaper skips reaping.
> 
> How it works in practice? Below is an example which show
> how the problem mentioned above can be found: one process is added
> twice to the oom_reaper list:
> 
> $ cd /sys/kernel/debug/tracing
> $ echo "oom:mark_victim" > set_event
> $ echo "oom:wake_reaper" >> set_event
> $ echo "oom:skip_task_reaping" >> set_event
> $ echo "oom:start_task_reaping" >> set_event
> $ echo "oom:finish_task_reaping" >> set_event
> $ cat trace_pipe
>         allocate-502   [001] ....    91.836405: mark_victim: pid=502
>         allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
>         allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
>       oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
>       oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
>       oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502

OK, this is much better! The clue here would be that we got 2
wakeups for the same task, right?
Do you think it would make sense to put more context to those
tracepoints? E.g. skip_task_reaping can be due to lock contention or the
mm gone. wake_reaper is similar.

> Signed-off-by: Roman Gushchin <guro@fb.com>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
> Cc: kernel-team@fb.com
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-mm@kvack.org
> ---
>  include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
>  mm/oom_kill.c              |  7 ++++
>  2 files changed, 87 insertions(+)
> 
> diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
> index 38baeb2..c3c19d4 100644
> --- a/include/trace/events/oom.h
> +++ b/include/trace/events/oom.h
> @@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
>  			__entry->wmark_check)
>  );
>  
> +TRACE_EVENT(mark_victim,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(wake_reaper,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(start_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(finish_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
> +TRACE_EVENT(skip_task_reaping,
> +	TP_PROTO(int pid),
> +
> +	TP_ARGS(pid),
> +
> +	TP_STRUCT__entry(
> +		__field(int, pid)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->pid = pid;
> +	),
> +
> +	TP_printk("pid=%d", __entry->pid)
> +);
> +
>  #ifdef CONFIG_COMPACTION
>  TRACE_EVENT(compact_retry,
>  
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 04c9143..409b685 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  
>  	if (!down_read_trylock(&mm->mmap_sem)) {
>  		ret = false;
> +		trace_skip_task_reaping(tsk->pid);
>  		goto unlock_oom;
>  	}
>  
> @@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  	 */
>  	if (!mmget_not_zero(mm)) {
>  		up_read(&mm->mmap_sem);
> +		trace_skip_task_reaping(tsk->pid);
>  		goto unlock_oom;
>  	}
>  
> +	trace_start_task_reaping(tsk->pid);
> +
>  	/*
>  	 * Tell all users of get_user/copy_from_user etc... that the content
>  	 * is no longer stable. No barriers really needed because unmapping
> @@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  	 * put the oom_reaper out of the way.
>  	 */
>  	mmput_async(mm);
> +	trace_finish_task_reaping(tsk->pid);
>  unlock_oom:
>  	mutex_unlock(&oom_lock);
>  	return ret;
> @@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
>  	tsk->oom_reaper_list = oom_reaper_list;
>  	oom_reaper_list = tsk;
>  	spin_unlock(&oom_reaper_lock);
> +	trace_wake_reaper(tsk->pid);
>  	wake_up(&oom_reaper_wait);
>  }
>  
> @@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
>  	 */
>  	__thaw_task(tsk);
>  	atomic_inc(&oom_victims);
> +	trace_mark_victim(tsk->pid);
>  }
>  
>  /**
> -- 
> 2.7.4

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
  2017-05-31 16:39           ` Michal Hocko
@ 2017-06-01 18:41             ` Roman Gushchin
  -1 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-06-01 18:41 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Wed, May 31, 2017 at 06:39:29PM +0200, Michal Hocko wrote:
> On Tue 30-05-17 19:52:31, Roman Gushchin wrote:
> > >From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
> > From: Roman Gushchin <guro@fb.com>
> > Date: Tue, 23 May 2017 17:37:55 +0100
> > Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
> > 
> > During the debugging of the problem described in
> > https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
> > in https://lkml.org/lkml/2017/5/19/383 , I've found that
> > the existing debug output is not really useful to understand
> > issues related to the oom reaper.
> > 
> > So, I assume, that adding some tracepoints might help with
> > debugging of similar issues.
> > 
> > Trace the following events:
> > 1) a process is marked as an oom victim,
> > 2) a process is added to the oom reaper list,
> > 3) the oom reaper starts reaping process's mm,
> > 4) the oom reaper finished reaping,
> > 5) the oom reaper skips reaping.
> > 
> > How it works in practice? Below is an example which show
> > how the problem mentioned above can be found: one process is added
> > twice to the oom_reaper list:
> > 
> > $ cd /sys/kernel/debug/tracing
> > $ echo "oom:mark_victim" > set_event
> > $ echo "oom:wake_reaper" >> set_event
> > $ echo "oom:skip_task_reaping" >> set_event
> > $ echo "oom:start_task_reaping" >> set_event
> > $ echo "oom:finish_task_reaping" >> set_event
> > $ cat trace_pipe
> >         allocate-502   [001] ....    91.836405: mark_victim: pid=502
> >         allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
> >         allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
> >       oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
> >       oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
> >       oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502
> 
> OK, this is much better! The clue here would be that we got 2
> wakeups for the same task, right?
> Do you think it would make sense to put more context to those
> tracepoints? E.g. skip_task_reaping can be due to lock contention or the
> mm gone. wake_reaper is similar.

I agree, that some context might be useful under some circumstances,
but I don't think we should add any additional fields until we will have some examples
of where this data is actually useful. If we will need it, we can easily add it later.

Thanks!

Roman

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
@ 2017-06-01 18:41             ` Roman Gushchin
  0 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-06-01 18:41 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Wed, May 31, 2017 at 06:39:29PM +0200, Michal Hocko wrote:
> On Tue 30-05-17 19:52:31, Roman Gushchin wrote:
> > >From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
> > From: Roman Gushchin <guro@fb.com>
> > Date: Tue, 23 May 2017 17:37:55 +0100
> > Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
> > 
> > During the debugging of the problem described in
> > https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
> > in https://lkml.org/lkml/2017/5/19/383 , I've found that
> > the existing debug output is not really useful to understand
> > issues related to the oom reaper.
> > 
> > So, I assume, that adding some tracepoints might help with
> > debugging of similar issues.
> > 
> > Trace the following events:
> > 1) a process is marked as an oom victim,
> > 2) a process is added to the oom reaper list,
> > 3) the oom reaper starts reaping process's mm,
> > 4) the oom reaper finished reaping,
> > 5) the oom reaper skips reaping.
> > 
> > How it works in practice? Below is an example which show
> > how the problem mentioned above can be found: one process is added
> > twice to the oom_reaper list:
> > 
> > $ cd /sys/kernel/debug/tracing
> > $ echo "oom:mark_victim" > set_event
> > $ echo "oom:wake_reaper" >> set_event
> > $ echo "oom:skip_task_reaping" >> set_event
> > $ echo "oom:start_task_reaping" >> set_event
> > $ echo "oom:finish_task_reaping" >> set_event
> > $ cat trace_pipe
> >         allocate-502   [001] ....    91.836405: mark_victim: pid=502
> >         allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
> >         allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
> >       oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
> >       oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
> >       oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502
> 
> OK, this is much better! The clue here would be that we got 2
> wakeups for the same task, right?
> Do you think it would make sense to put more context to those
> tracepoints? E.g. skip_task_reaping can be due to lock contention or the
> mm gone. wake_reaper is similar.

I agree, that some context might be useful under some circumstances,
but I don't think we should add any additional fields until we will have some examples
of where this data is actually useful. If we will need it, we can easily add it later.

Thanks!

Roman

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
  2017-06-01 18:41             ` Roman Gushchin
@ 2017-06-02  8:13               ` Michal Hocko
  -1 siblings, 0 replies; 20+ messages in thread
From: Michal Hocko @ 2017-06-02  8:13 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Thu 01-06-17 19:41:13, Roman Gushchin wrote:
> On Wed, May 31, 2017 at 06:39:29PM +0200, Michal Hocko wrote:
> > On Tue 30-05-17 19:52:31, Roman Gushchin wrote:
> > > >From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
> > > From: Roman Gushchin <guro@fb.com>
> > > Date: Tue, 23 May 2017 17:37:55 +0100
> > > Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
> > > 
> > > During the debugging of the problem described in
> > > https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
> > > in https://lkml.org/lkml/2017/5/19/383 , I've found that
> > > the existing debug output is not really useful to understand
> > > issues related to the oom reaper.
> > > 
> > > So, I assume, that adding some tracepoints might help with
> > > debugging of similar issues.
> > > 
> > > Trace the following events:
> > > 1) a process is marked as an oom victim,
> > > 2) a process is added to the oom reaper list,
> > > 3) the oom reaper starts reaping process's mm,
> > > 4) the oom reaper finished reaping,
> > > 5) the oom reaper skips reaping.
> > > 
> > > How it works in practice? Below is an example which show
> > > how the problem mentioned above can be found: one process is added
> > > twice to the oom_reaper list:
> > > 
> > > $ cd /sys/kernel/debug/tracing
> > > $ echo "oom:mark_victim" > set_event
> > > $ echo "oom:wake_reaper" >> set_event
> > > $ echo "oom:skip_task_reaping" >> set_event
> > > $ echo "oom:start_task_reaping" >> set_event
> > > $ echo "oom:finish_task_reaping" >> set_event
> > > $ cat trace_pipe
> > >         allocate-502   [001] ....    91.836405: mark_victim: pid=502
> > >         allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
> > >         allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
> > >       oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
> > >       oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
> > >       oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502
> > 
> > OK, this is much better! The clue here would be that we got 2
> > wakeups for the same task, right?
> > Do you think it would make sense to put more context to those
> > tracepoints? E.g. skip_task_reaping can be due to lock contention or the
> > mm gone. wake_reaper is similar.
> 
> I agree, that some context might be useful under some circumstances,
> but I don't think we should add any additional fields until we will have some examples
> of where this data is actually useful. If we will need it, we can easily add it later.

OK, fair enough.

Acked-by: Michal Hocko <mhocko@suse.com>
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
@ 2017-06-02  8:13               ` Michal Hocko
  0 siblings, 0 replies; 20+ messages in thread
From: Michal Hocko @ 2017-06-02  8:13 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Tetsuo Handa, Johannes Weiner, Vladimir Davydov, kernel-team,
	linux-kernel, linux-mm

On Thu 01-06-17 19:41:13, Roman Gushchin wrote:
> On Wed, May 31, 2017 at 06:39:29PM +0200, Michal Hocko wrote:
> > On Tue 30-05-17 19:52:31, Roman Gushchin wrote:
> > > >From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
> > > From: Roman Gushchin <guro@fb.com>
> > > Date: Tue, 23 May 2017 17:37:55 +0100
> > > Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
> > > 
> > > During the debugging of the problem described in
> > > https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
> > > in https://lkml.org/lkml/2017/5/19/383 , I've found that
> > > the existing debug output is not really useful to understand
> > > issues related to the oom reaper.
> > > 
> > > So, I assume, that adding some tracepoints might help with
> > > debugging of similar issues.
> > > 
> > > Trace the following events:
> > > 1) a process is marked as an oom victim,
> > > 2) a process is added to the oom reaper list,
> > > 3) the oom reaper starts reaping process's mm,
> > > 4) the oom reaper finished reaping,
> > > 5) the oom reaper skips reaping.
> > > 
> > > How it works in practice? Below is an example which show
> > > how the problem mentioned above can be found: one process is added
> > > twice to the oom_reaper list:
> > > 
> > > $ cd /sys/kernel/debug/tracing
> > > $ echo "oom:mark_victim" > set_event
> > > $ echo "oom:wake_reaper" >> set_event
> > > $ echo "oom:skip_task_reaping" >> set_event
> > > $ echo "oom:start_task_reaping" >> set_event
> > > $ echo "oom:finish_task_reaping" >> set_event
> > > $ cat trace_pipe
> > >         allocate-502   [001] ....    91.836405: mark_victim: pid=502
> > >         allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
> > >         allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
> > >       oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
> > >       oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
> > >       oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502
> > 
> > OK, this is much better! The clue here would be that we got 2
> > wakeups for the same task, right?
> > Do you think it would make sense to put more context to those
> > tracepoints? E.g. skip_task_reaping can be due to lock contention or the
> > mm gone. wake_reaper is similar.
> 
> I agree, that some context might be useful under some circumstances,
> but I don't think we should add any additional fields until we will have some examples
> of where this data is actually useful. If we will need it, we can easily add it later.

OK, fair enough.

Acked-by: Michal Hocko <mhocko@suse.com>
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
  2017-06-02  8:13               ` Michal Hocko
@ 2017-06-20 13:43                 ` Roman Gushchin
  -1 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-06-20 13:43 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm

Hi Andrew!

Can you, please, pull this patch?

Thank you!

Roman

On Fri, Jun 02, 2017 at 10:13:38AM +0200, Michal Hocko wrote:
> On Thu 01-06-17 19:41:13, Roman Gushchin wrote:
> > On Wed, May 31, 2017 at 06:39:29PM +0200, Michal Hocko wrote:
> > > On Tue 30-05-17 19:52:31, Roman Gushchin wrote:
> > > > >From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
> > > > From: Roman Gushchin <guro@fb.com>
> > > > Date: Tue, 23 May 2017 17:37:55 +0100
> > > > Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
> > > > 
> > > > During the debugging of the problem described in
> > > > https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
> > > > in https://lkml.org/lkml/2017/5/19/383 , I've found that
> > > > the existing debug output is not really useful to understand
> > > > issues related to the oom reaper.
> > > > 
> > > > So, I assume, that adding some tracepoints might help with
> > > > debugging of similar issues.
> > > > 
> > > > Trace the following events:
> > > > 1) a process is marked as an oom victim,
> > > > 2) a process is added to the oom reaper list,
> > > > 3) the oom reaper starts reaping process's mm,
> > > > 4) the oom reaper finished reaping,
> > > > 5) the oom reaper skips reaping.
> > > > 
> > > > How it works in practice? Below is an example which show
> > > > how the problem mentioned above can be found: one process is added
> > > > twice to the oom_reaper list:
> > > > 
> > > > $ cd /sys/kernel/debug/tracing
> > > > $ echo "oom:mark_victim" > set_event
> > > > $ echo "oom:wake_reaper" >> set_event
> > > > $ echo "oom:skip_task_reaping" >> set_event
> > > > $ echo "oom:start_task_reaping" >> set_event
> > > > $ echo "oom:finish_task_reaping" >> set_event
> > > > $ cat trace_pipe
> > > >         allocate-502   [001] ....    91.836405: mark_victim: pid=502
> > > >         allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
> > > >         allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
> > > >       oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
> > > >       oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
> > > >       oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502
> > > 
> > > OK, this is much better! The clue here would be that we got 2
> > > wakeups for the same task, right?
> > > Do you think it would make sense to put more context to those
> > > tracepoints? E.g. skip_task_reaping can be due to lock contention or the
> > > mm gone. wake_reaper is similar.
> > 
> > I agree, that some context might be useful under some circumstances,
> > but I don't think we should add any additional fields until we will have some examples
> > of where this data is actually useful. If we will need it, we can easily add it later.
> 
> OK, fair enough.
> 
> Acked-by: Michal Hocko <mhocko@suse.com>
> -- 
> Michal Hocko
> SUSE Labs

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
@ 2017-06-20 13:43                 ` Roman Gushchin
  0 siblings, 0 replies; 20+ messages in thread
From: Roman Gushchin @ 2017-06-20 13:43 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm

Hi Andrew!

Can you, please, pull this patch?

Thank you!

Roman

On Fri, Jun 02, 2017 at 10:13:38AM +0200, Michal Hocko wrote:
> On Thu 01-06-17 19:41:13, Roman Gushchin wrote:
> > On Wed, May 31, 2017 at 06:39:29PM +0200, Michal Hocko wrote:
> > > On Tue 30-05-17 19:52:31, Roman Gushchin wrote:
> > > > >From c57e3674efc609f8364f5e228a2c1309cfe99901 Mon Sep 17 00:00:00 2001
> > > > From: Roman Gushchin <guro@fb.com>
> > > > Date: Tue, 23 May 2017 17:37:55 +0100
> > > > Subject: [PATCH v2] mm,oom: add tracepoints for oom reaper-related events
> > > > 
> > > > During the debugging of the problem described in
> > > > https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa
> > > > in https://lkml.org/lkml/2017/5/19/383 , I've found that
> > > > the existing debug output is not really useful to understand
> > > > issues related to the oom reaper.
> > > > 
> > > > So, I assume, that adding some tracepoints might help with
> > > > debugging of similar issues.
> > > > 
> > > > Trace the following events:
> > > > 1) a process is marked as an oom victim,
> > > > 2) a process is added to the oom reaper list,
> > > > 3) the oom reaper starts reaping process's mm,
> > > > 4) the oom reaper finished reaping,
> > > > 5) the oom reaper skips reaping.
> > > > 
> > > > How it works in practice? Below is an example which show
> > > > how the problem mentioned above can be found: one process is added
> > > > twice to the oom_reaper list:
> > > > 
> > > > $ cd /sys/kernel/debug/tracing
> > > > $ echo "oom:mark_victim" > set_event
> > > > $ echo "oom:wake_reaper" >> set_event
> > > > $ echo "oom:skip_task_reaping" >> set_event
> > > > $ echo "oom:start_task_reaping" >> set_event
> > > > $ echo "oom:finish_task_reaping" >> set_event
> > > > $ cat trace_pipe
> > > >         allocate-502   [001] ....    91.836405: mark_victim: pid=502
> > > >         allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
> > > >         allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
> > > >       oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
> > > >       oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
> > > >       oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502
> > > 
> > > OK, this is much better! The clue here would be that we got 2
> > > wakeups for the same task, right?
> > > Do you think it would make sense to put more context to those
> > > tracepoints? E.g. skip_task_reaping can be due to lock contention or the
> > > mm gone. wake_reaper is similar.
> > 
> > I agree, that some context might be useful under some circumstances,
> > but I don't think we should add any additional fields until we will have some examples
> > of where this data is actually useful. If we will need it, we can easily add it later.
> 
> OK, fair enough.
> 
> Acked-by: Michal Hocko <mhocko@suse.com>
> -- 
> Michal Hocko
> SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-06-20 13:44 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-30 12:05 [PATCH] mm,oom: add tracepoints for oom reaper-related events Roman Gushchin
2017-05-30 12:05 ` Roman Gushchin
2017-05-30 12:34 ` Michal Hocko
2017-05-30 12:34   ` Michal Hocko
2017-05-30 13:17   ` Tetsuo Handa
2017-05-30 13:17     ` Tetsuo Handa
2017-05-30 13:33   ` Roman Gushchin
2017-05-30 13:33     ` Roman Gushchin
2017-05-30 13:45     ` Michal Hocko
2017-05-30 13:45       ` Michal Hocko
2017-05-30 18:52       ` [PATCH v2] " Roman Gushchin
2017-05-30 18:52         ` Roman Gushchin
2017-05-31 16:39         ` Michal Hocko
2017-05-31 16:39           ` Michal Hocko
2017-06-01 18:41           ` Roman Gushchin
2017-06-01 18:41             ` Roman Gushchin
2017-06-02  8:13             ` Michal Hocko
2017-06-02  8:13               ` Michal Hocko
2017-06-20 13:43               ` Roman Gushchin
2017-06-20 13:43                 ` Roman Gushchin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.