linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] delayacct: refactor the code to simplify the implementation
@ 2021-03-31  3:24 brookxu
  2021-03-31  3:24 ` [PATCH 2/3] delayacct: Add a proc file to dump the delay info brookxu
  2021-03-31  3:24 ` [PATCH 3/3] cgroup: use tsk->in_iowait instead of delayacct_is_task_waiting_on_io() brookxu
  0 siblings, 2 replies; 3+ messages in thread
From: brookxu @ 2021-03-31  3:24 UTC (permalink / raw)
  To: adobriyan, bsingharora, tj, lizefan.x, hannes, linux-kernel

From: Chunguang Xu <brookxu@tencent.com>

The existing data structure is not very convenient for
expansion, and part of the code can be saved. Here, try
to optimize, which can make the code more concise and
easy to expand.

Signed-off-by: Chunguang Xu <brookxu@tencent.com>
---
 include/linux/delayacct.h | 139 ++++++++++++++++++++--------------------------
 kernel/delayacct.c        |  93 ++++++++-----------------------
 2 files changed, 81 insertions(+), 151 deletions(-)

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 2d3bdcc..4986253 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -2,12 +2,15 @@
 /* delayacct.h - per-task delay accounting
  *
  * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ * Copyright (C) Chunguang Xu, Tencent Corp. 2021
  */
 
 #ifndef _LINUX_DELAYACCT_H
 #define _LINUX_DELAYACCT_H
 
 #include <uapi/linux/taskstats.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
 
 /*
  * Per-task flags relevant to delay accounting
@@ -15,71 +18,44 @@
  * Used to set current->delays->flags
  */
 #define DELAYACCT_PF_SWAPIN	0x00000001	/* I am doing a swapin */
-#define DELAYACCT_PF_BLKIO	0x00000002	/* I am waiting on IO */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
+
+enum delayacct_item {
+	DELAYACCT_BLKIO,     /* block IO latency */
+	DELAYACCT_SWAPIN,    /* swapin IO latency*/
+	DELAYACCT_THRASHING, /* pagecache thrashing IO latency*/
+	DELAYACCT_FREEPAGES, /* memory reclaim latency*/
+	DELAYACCT_NR_ITEMS
+};
+
+struct delayacct_count {
+	u64 start;  /* start timestamp of XXX operation */
+	u64 count;  /* incremented on every XXX operation */
+	u64 delay;  /* accumulated delay time in nanoseconds */
+	u64 max;    /* maximum latency of XXX operation */
+};
+
 struct task_delay_info {
 	raw_spinlock_t	lock;
 	unsigned int	flags;	/* Private per-task flags */
-
-	/* For each stat XXX, add following, aligned appropriately
-	 *
-	 * struct timespec XXX_start, XXX_end;
-	 * u64 XXX_delay;
-	 * u32 XXX_count;
-	 *
-	 * Atomicity of updates to XXX_delay, XXX_count protected by
-	 * single lock above (split into XXX_lock if contention is an issue).
-	 */
-
-	/*
-	 * XXX_count is incremented on every XXX operation, the delay
-	 * associated with the operation is added to XXX_delay.
-	 * XXX_delay contains the accumulated delay time in nanoseconds.
-	 */
-	u64 blkio_start;	/* Shared by blkio, swapin */
-	u64 blkio_delay;	/* wait for sync block io completion */
-	u64 swapin_delay;	/* wait for swapin block io completion */
-	u32 blkio_count;	/* total count of the number of sync block */
-				/* io operations performed */
-	u32 swapin_count;	/* total count of the number of swapin block */
-				/* io operations performed */
-
-	u64 freepages_start;
-	u64 freepages_delay;	/* wait for memory reclaim */
-
-	u64 thrashing_start;
-	u64 thrashing_delay;	/* wait for thrashing page */
-
-	u32 freepages_count;	/* total count of memory reclaim */
-	u32 thrashing_count;	/* total count of thrash waits */
+	struct delayacct_count delays[DELAYACCT_NR_ITEMS];
 };
-#endif
-
-#include <linux/sched.h>
-#include <linux/slab.h>
 
-#ifdef CONFIG_TASK_DELAY_ACCT
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
 extern void __delayacct_tsk_init(struct task_struct *);
-extern void __delayacct_tsk_exit(struct task_struct *);
-extern void __delayacct_blkio_start(void);
-extern void __delayacct_blkio_end(struct task_struct *);
-extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
-extern __u64 __delayacct_blkio_ticks(struct task_struct *);
-extern void __delayacct_freepages_start(void);
-extern void __delayacct_freepages_end(void);
-extern void __delayacct_thrashing_start(void);
-extern void __delayacct_thrashing_end(void);
-
-static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
+extern int  __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk);
+extern u64  __delayacct_blkio_ticks(struct task_struct *tsk);
+extern void __delayacct_end(struct task_delay_info *delays, int item);
+
+extern int  proc_delayacct_show(struct seq_file *m, struct pid_namespace *ns,
+			struct pid *pid, struct task_struct *task);
+
+static inline void __delayacct_start(struct task_delay_info *delays, int item)
 {
-	if (p->delays)
-		return (p->delays->flags & DELAYACCT_PF_BLKIO);
-	else
-		return 0;
+	delays->delays[item].start = ktime_get_ns();
 }
 
 static inline void delayacct_set_flag(int flag)
@@ -112,22 +88,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
 	tsk->delays = NULL;
 }
 
-static inline void delayacct_blkio_start(void)
-{
-	delayacct_set_flag(DELAYACCT_PF_BLKIO);
-	if (current->delays)
-		__delayacct_blkio_start();
-}
-
-static inline void delayacct_blkio_end(struct task_struct *p)
-{
-	if (p->delays)
-		__delayacct_blkio_end(p);
-	delayacct_clear_flag(DELAYACCT_PF_BLKIO);
-}
-
-static inline int delayacct_add_tsk(struct taskstats *d,
-					struct task_struct *tsk)
+static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 {
 	if (!delayacct_on || !tsk->delays)
 		return 0;
@@ -141,31 +102,52 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 	return 0;
 }
 
+static inline void delayacct_blkio_start(void)
+{
+	if (current->delays) {
+		if (current->delays->flags & DELAYACCT_PF_SWAPIN)
+			__delayacct_start(current->delays, DELAYACCT_SWAPIN);
+		else
+			__delayacct_start(current->delays, DELAYACCT_BLKIO);
+	}
+}
+
+static inline void delayacct_blkio_end(struct task_struct *p)
+{
+	if (p->delays) {
+		if (p->delays->flags & DELAYACCT_PF_SWAPIN)
+			__delayacct_end(p->delays, DELAYACCT_SWAPIN);
+		else
+			__delayacct_end(p->delays, DELAYACCT_BLKIO);
+	}
+}
+
 static inline void delayacct_freepages_start(void)
 {
 	if (current->delays)
-		__delayacct_freepages_start();
+		__delayacct_start(current->delays, DELAYACCT_FREEPAGES);
 }
 
 static inline void delayacct_freepages_end(void)
 {
 	if (current->delays)
-		__delayacct_freepages_end();
+		__delayacct_end(current->delays, DELAYACCT_FREEPAGES);
 }
 
 static inline void delayacct_thrashing_start(void)
 {
 	if (current->delays)
-		__delayacct_thrashing_start();
+		__delayacct_start(current->delays, DELAYACCT_THRASHING);
 }
 
 static inline void delayacct_thrashing_end(void)
 {
 	if (current->delays)
-		__delayacct_thrashing_end();
+		__delayacct_end(current->delays, DELAYACCT_THRASHING);
 }
 
 #else
+
 static inline void delayacct_set_flag(int flag)
 {}
 static inline void delayacct_clear_flag(int flag)
@@ -176,17 +158,14 @@ static inline void delayacct_tsk_init(struct task_struct *tsk)
 {}
 static inline void delayacct_tsk_free(struct task_struct *tsk)
 {}
+static inline int  delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+{ return 0; }
+static inline u64  delayacct_blkio_ticks(struct task_struct *tsk)
+{ return 0; }
 static inline void delayacct_blkio_start(void)
 {}
 static inline void delayacct_blkio_end(struct task_struct *p)
 {}
-static inline int delayacct_add_tsk(struct taskstats *d,
-					struct task_struct *tsk)
-{ return 0; }
-static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
-{ return 0; }
-static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
-{ return 0; }
 static inline void delayacct_freepages_start(void)
 {}
 static inline void delayacct_freepages_end(void)
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 27725754..ec580cb 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -2,6 +2,7 @@
 /* delayacct.c - per-task delay accounting
  *
  * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ * Copyright (C) Chunguang Xu, Tencent Corp. 2021
  */
 
 #include <linux/sched.h>
@@ -42,48 +43,24 @@ void __delayacct_tsk_init(struct task_struct *tsk)
  * Finish delay accounting for a statistic using its timestamps (@start),
  * accumalator (@total) and @count
  */
-static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total,
-			  u32 *count)
+void __delayacct_end(struct task_delay_info *delays, int item)
 {
-	s64 ns = ktime_get_ns() - *start;
+	struct delayacct_count *delay = &delays->delays[item];
+	u64 ns = ktime_get_ns() - delay->start;
 	unsigned long flags;
 
 	if (ns > 0) {
-		raw_spin_lock_irqsave(lock, flags);
-		*total += ns;
-		(*count)++;
-		raw_spin_unlock_irqrestore(lock, flags);
+		raw_spin_lock_irqsave(&delays->lock, flags);
+		delay->max = max(delay->max, ns);
+		delay->delay += ns;
+		delay->count++;
+		raw_spin_unlock_irqrestore(&delays->lock, flags);
 	}
 }
 
-void __delayacct_blkio_start(void)
-{
-	current->delays->blkio_start = ktime_get_ns();
-}
-
-/*
- * We cannot rely on the `current` macro, as we haven't yet switched back to
- * the process being woken.
- */
-void __delayacct_blkio_end(struct task_struct *p)
-{
-	struct task_delay_info *delays = p->delays;
-	u64 *total;
-	u32 *count;
-
-	if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
-		total = &delays->swapin_delay;
-		count = &delays->swapin_count;
-	} else {
-		total = &delays->blkio_delay;
-		count = &delays->blkio_count;
-	}
-
-	delayacct_end(&delays->lock, &delays->blkio_start, total, count);
-}
-
 int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 {
+	struct delayacct_count *delays = tsk->delays->delays;
 	u64 utime, stime, stimescaled, utimescaled;
 	unsigned long long t2, t3;
 	unsigned long flags, t1;
@@ -120,58 +97,32 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
 
 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
-	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+	tmp = d->blkio_delay_total + delays[DELAYACCT_BLKIO].delay;
 	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
-	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
+	tmp = d->swapin_delay_total + delays[DELAYACCT_SWAPIN].delay;
 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
-	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+	tmp = d->freepages_delay_total + delays[DELAYACCT_FREEPAGES].delay;
 	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
-	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
+	tmp = d->thrashing_delay_total + delays[DELAYACCT_THRASHING].delay;
 	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
-	d->blkio_count += tsk->delays->blkio_count;
-	d->swapin_count += tsk->delays->swapin_count;
-	d->freepages_count += tsk->delays->freepages_count;
-	d->thrashing_count += tsk->delays->thrashing_count;
+	d->blkio_count += delays[DELAYACCT_BLKIO].count;
+	d->swapin_count += delays[DELAYACCT_SWAPIN].count;
+	d->freepages_count += delays[DELAYACCT_FREEPAGES].count;
+	d->thrashing_count += delays[DELAYACCT_THRASHING].count;
 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
 	return 0;
 }
 
-__u64 __delayacct_blkio_ticks(struct task_struct *tsk)
+u64 __delayacct_blkio_ticks(struct task_struct *tsk)
 {
-	__u64 ret;
+	u64 ret;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
-	ret = nsec_to_clock_t(tsk->delays->blkio_delay +
-				tsk->delays->swapin_delay);
+	ret = nsec_to_clock_t(tsk->delays->delays[DELAYACCT_BLKIO].delay +
+			      tsk->delays->delays[DELAYACCT_SWAPIN].delay);
 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 	return ret;
 }
 
-void __delayacct_freepages_start(void)
-{
-	current->delays->freepages_start = ktime_get_ns();
-}
-
-void __delayacct_freepages_end(void)
-{
-	delayacct_end(
-		&current->delays->lock,
-		&current->delays->freepages_start,
-		&current->delays->freepages_delay,
-		&current->delays->freepages_count);
-}
-
-void __delayacct_thrashing_start(void)
-{
-	current->delays->thrashing_start = ktime_get_ns();
-}
-
-void __delayacct_thrashing_end(void)
-{
-	delayacct_end(&current->delays->lock,
-		      &current->delays->thrashing_start,
-		      &current->delays->thrashing_delay,
-		      &current->delays->thrashing_count);
-}
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/3] delayacct: Add a proc file to dump the delay info
  2021-03-31  3:24 [PATCH 1/3] delayacct: refactor the code to simplify the implementation brookxu
@ 2021-03-31  3:24 ` brookxu
  2021-03-31  3:24 ` [PATCH 3/3] cgroup: use tsk->in_iowait instead of delayacct_is_task_waiting_on_io() brookxu
  1 sibling, 0 replies; 3+ messages in thread
From: brookxu @ 2021-03-31  3:24 UTC (permalink / raw)
  To: adobriyan, bsingharora, tj, lizefan.x, hannes, linux-kernel

From: Chunguang Xu <brookxu@tencent.com>

Many distributions do not install the getdelay tool by
default, similar to task_io_accounting, adding a proc
file to make access easier.

Signed-off-by: Chunguang Xu <brookxu@tencent.com>
---
 fs/proc/base.c     |  7 +++++++
 kernel/delayacct.c | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index b3422cd..4de261a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -96,6 +96,7 @@
 #include <linux/posix-timers.h>
 #include <linux/time_namespace.h>
 #include <linux/resctrl.h>
+#include <linux/delayacct.h>
 #include <trace/events/oom.h>
 #include "internal.h"
 #include "fd.h"
@@ -3244,6 +3245,9 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	ONE("io",	S_IRUSR, proc_tgid_io_accounting),
 #endif
+#ifdef CONFIG_TASK_DELAY_ACCT
+	ONE("delays",	S_IRUSR, proc_delayacct_show),
+#endif
 #ifdef CONFIG_USER_NS
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
@@ -3583,6 +3587,9 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	ONE("io",	S_IRUSR, proc_tid_io_accounting),
 #endif
+#ifdef CONFIG_TASK_DELAY_ACCT
+	ONE("delays",	S_IRUSR, proc_delayacct_show),
+#endif
 #ifdef CONFIG_USER_NS
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index ec580cb..990af3b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -14,6 +14,7 @@
 #include <linux/sysctl.h>
 #include <linux/delayacct.h>
 #include <linux/module.h>
+#include <linux/seq_file.h>
 
 int delayacct_on __read_mostly = 1;	/* Delay accounting turned on/off */
 EXPORT_SYMBOL_GPL(delayacct_on);
@@ -26,6 +27,18 @@ static int __init delayacct_setup_disable(char *str)
 }
 __setup("nodelayacct", delayacct_setup_disable);
 
+struct delayacct_stat {
+	const char *name;
+	unsigned int idx;
+};
+
+struct delayacct_stat delayacct_stats[] = {
+	{"blkio", DELAYACCT_BLKIO},
+	{"swapin", DELAYACCT_SWAPIN},
+	{"pagecache_thrashing", DELAYACCT_THRASHING},
+	{"mem_reclaim", DELAYACCT_FREEPAGES}
+};
+
 void delayacct_init(void)
 {
 	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
@@ -126,3 +139,31 @@ u64 __delayacct_blkio_ticks(struct task_struct *tsk)
 	return ret;
 }
 
+#define K(x) ((x) / 1000)
+
+int proc_delayacct_show(struct seq_file *m, struct pid_namespace *ns,
+		       struct pid *pid, struct task_struct *task)
+{
+	struct delayacct_count *delays;
+	int idx;
+
+	if (!task->delays)
+		return 0;
+
+	delays = task->delays->delays;
+	for (idx = 0; idx < ARRAY_SIZE(delayacct_stats); idx++) {
+		u32 item = delayacct_stats[idx].idx;
+		u64 mean = 0;
+
+		if (delays[item].count)
+			mean = div_u64(delays[item].delay, delays[item].count);
+
+		seq_printf(m, "%s %llu %llu %llu %llu\n",
+			   delayacct_stats[idx].name,
+			   K(mean),
+			   K(delays[item].max),
+			   delays[item].count,
+			   K(delays[item].delay));
+	}
+	return 0;
+}
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 3/3] cgroup: use tsk->in_iowait instead of delayacct_is_task_waiting_on_io()
  2021-03-31  3:24 [PATCH 1/3] delayacct: refactor the code to simplify the implementation brookxu
  2021-03-31  3:24 ` [PATCH 2/3] delayacct: Add a proc file to dump the delay info brookxu
@ 2021-03-31  3:24 ` brookxu
  1 sibling, 0 replies; 3+ messages in thread
From: brookxu @ 2021-03-31  3:24 UTC (permalink / raw)
  To: adobriyan, bsingharora, tj, lizefan.x, hannes, linux-kernel

From: Chunguang Xu <brookxu@tencent.com>

If delayacct is disabled, then delayacct_is_task_waiting_on_io()
always returns false, which causes the statistical value to be
wrong. Perhaps tsk->in_iowait is better.

Signed-off-by: Chunguang Xu <brookxu@tencent.com>
---
 kernel/cgroup/cgroup-v1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 32596fd..91991e2 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -727,7 +727,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 			stats->nr_stopped++;
 			break;
 		default:
-			if (delayacct_is_task_waiting_on_io(tsk))
+			if (tsk->in_iowait)
 				stats->nr_io_wait++;
 			break;
 		}
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-03-31  3:25 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-31  3:24 [PATCH 1/3] delayacct: refactor the code to simplify the implementation brookxu
2021-03-31  3:24 ` [PATCH 2/3] delayacct: Add a proc file to dump the delay info brookxu
2021-03-31  3:24 ` [PATCH 3/3] cgroup: use tsk->in_iowait instead of delayacct_is_task_waiting_on_io() brookxu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).