linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] speed up /proc/pid/stat, statm
@ 2012-01-31  8:14 KAMEZAWA Hiroyuki
  2012-01-31 20:36 ` Andrew Morton
  0 siblings, 1 reply; 5+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-01-31  8:14 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm


This patch is based onto linux-mm
procfs-add-num_to_str-to-speed-up-proc-stat.patch +
procfs-add-num_to_str-to-speed-up-proc-stat-fix.patch

This patch is for /proc/pid/stat and you can see the effect with realisitc applications.


>From 5b9be0e1e8757af0c91cff3b6258d6663a6f9224 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 31 Jan 2012 13:31:20 +0900
Subject: [PATCH] speedup /proc/<pid>/stat, statm

Process accounting applications as top, ps visit some files under
/proc/<pid>. With seq_put_decimal_ull(), we can optimize /proc/<pid>/stat
and /proc/<pid>/statm files.

This patch adds
  - seq_put_decimal_ll() for signed values.
  - allow delimiter == 0.
  - convert seq_printf() to seq_put_decimal_ull/ll in /proc/stat, statm.

Test result on a system with 2000+ procs.

Before patch:
[kamezawa@bluextal test]$ top -b -n 1 | wc -l
2223
[kamezawa@bluextal test]$ time top -b -n 1 > /dev/null

real    0m0.675s
user    0m0.044s
sys     0m0.121s

[kamezawa@bluextal test]$ time ps -elf > /dev/null

real    0m0.236s
user    0m0.056s
sys     0m0.176s

After patch:
kamezawa@bluextal ~]$ time top -b -n 1 > /dev/null

real    0m0.657s
user    0m0.052s
sys     0m0.100s

[kamezawa@bluextal ~]$ time ps -elf > /dev/null

real    0m0.198s
user    0m0.050s
sys     0m0.145s

Considering top, ps tend to scan /proc periodically, this will
reduce cpu consumption by top/ps to some extent.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 fs/proc/array.c          |  119 +++++++++++++++++++++++++---------------------
 fs/seq_file.c            |   21 ++++++++-
 include/linux/seq_file.h |    2 +
 3 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index c602b8d..80e6f4e 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -462,59 +462,56 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(start_time);
 
-	seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
-%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n",
-		pid_nr_ns(pid, ns),
-		tcomm,
-		state,
-		ppid,
-		pgid,
-		sid,
-		tty_nr,
-		tty_pgrp,
-		task->flags,
-		min_flt,
-		cmin_flt,
-		maj_flt,
-		cmaj_flt,
-		cputime_to_clock_t(utime),
-		cputime_to_clock_t(stime),
-		cputime_to_clock_t(cutime),
-		cputime_to_clock_t(cstime),
-		priority,
-		nice,
-		num_threads,
-		start_time,
-		vsize,
-		mm ? get_mm_rss(mm) : 0,
-		rsslim,
-		mm ? (permitted ? mm->start_code : 1) : 0,
-		mm ? (permitted ? mm->end_code : 1) : 0,
-		(permitted && mm) ? mm->start_stack : 0,
-		esp,
-		eip,
-		/* The signal information here is obsolete.
-		 * It must be decimal for Linux 2.0 compatibility.
-		 * Use /proc/#/status for real-time signals.
-		 */
-		task->pending.signal.sig[0] & 0x7fffffffUL,
-		task->blocked.sig[0] & 0x7fffffffUL,
-		sigign      .sig[0] & 0x7fffffffUL,
-		sigcatch    .sig[0] & 0x7fffffffUL,
-		wchan,
-		0UL,
-		0UL,
-		task->exit_signal,
-		task_cpu(task),
-		task->rt_priority,
-		task->policy,
-		(unsigned long long)delayacct_blkio_ticks(task),
-		cputime_to_clock_t(gtime),
-		cputime_to_clock_t(cgtime),
-		(mm && permitted) ? mm->start_data : 0,
-		(mm && permitted) ? mm->end_data : 0,
-		(mm && permitted) ? mm->start_brk : 0);
+	seq_printf(m,"%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
+	seq_put_decimal_ll(m, ' ', ppid);
+	seq_put_decimal_ll(m, ' ', pgid);
+	seq_put_decimal_ll(m, ' ', sid);
+	seq_put_decimal_ll(m, ' ', tty_nr);
+	seq_put_decimal_ll(m, ' ', tty_pgrp);
+	seq_put_decimal_ull(m, ' ', task->flags);
+	seq_put_decimal_ull(m, ' ', min_flt);
+	seq_put_decimal_ull(m, ' ', cmin_flt);
+	seq_put_decimal_ull(m, ' ', maj_flt);
+	seq_put_decimal_ull(m, ' ', cmaj_flt);
+	seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
+	seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
+	seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
+	seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
+	seq_put_decimal_ll(m, ' ', priority);
+	seq_put_decimal_ll(m, ' ', nice);
+	seq_put_decimal_ll(m, ' ', num_threads);
+	seq_put_decimal_ull(m, ' ', 0);
+	seq_put_decimal_ull(m, ' ', start_time);
+	seq_put_decimal_ull(m, ' ', vsize);
+	seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0);
+	seq_put_decimal_ull(m, ' ', rsslim);
+	seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
+	seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
+	seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
+	seq_put_decimal_ull(m, ' ', esp);
+	seq_put_decimal_ull(m, ' ', eip);
+	/* The signal information here is obsolete.
+ 	 * It must be decimal for Linux 2.0 compatibility.
+	 * Use /proc/#/status for real-time signals.
+	 */
+	seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
+	seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
+	seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
+	seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
+	seq_put_decimal_ull(m, ' ', wchan);
+	seq_put_decimal_ull(m, ' ', 0);
+	seq_put_decimal_ull(m, ' ', 0);
+	seq_put_decimal_ll(m, ' ', task->exit_signal);
+	seq_put_decimal_ll(m, ' ', task_cpu(task));
+	seq_put_decimal_ull(m, ' ', task->rt_priority);
+	seq_put_decimal_ull(m, ' ', task->policy);
+	seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
+	seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
+	seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
+	seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0);
+	seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0);
+	seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0);
+	seq_putc(m, '\n');
 	if (mm)
 		mmput(mm);
 	return 0;
@@ -542,8 +539,20 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
 		size = task_statm(mm, &shared, &text, &data, &resident);
 		mmput(mm);
 	}
-	seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
-			size, resident, shared, text, data);
+	/*
+	 * For quick read, open code by putting numbers directly
+	 * expected format is
+	 * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
+	 *               size, resident, shared, text, data);
+	 */
+	seq_put_decimal_ull(m, 0, size);
+	seq_put_decimal_ull(m, ' ', resident);
+	seq_put_decimal_ull(m, ' ', shared);
+	seq_put_decimal_ull(m, ' ', text);
+	seq_put_decimal_ull(m, ' ', 0);
+	seq_put_decimal_ull(m, ' ', text);
+	seq_put_decimal_ull(m, ' ', 0);
+	seq_putc(m, '\n');
 
 	return 0;
 }
diff --git a/fs/seq_file.c b/fs/seq_file.c
index bb773d5..eea2b01 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -657,7 +657,8 @@ int seq_put_decimal_ull(struct seq_file *m, char delimiter,
 	if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	m->buf[m->count++] = delimiter;
+	if (delimiter)
+		m->buf[m->count++] = delimiter;
 
 	if (num < 10) {
 		m->buf[m->count++] = num + '0';
@@ -675,6 +676,24 @@ overflow:
 }
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
+int seq_put_decimal_ll(struct seq_file *m, char delimiter,
+			long long num)
+{
+	if (num < 0) {
+		if (m->count + 3 >= m->size) {
+			m->count = m->size;
+			return -1;
+		}
+		if (delimiter)
+			m->buf[m->count++] = delimiter;
+		num = -num;
+		delimiter = '-';
+	}
+	return seq_put_decimal_ull(m, delimiter, num);
+
+}
+EXPORT_SYMBOL(seq_put_decimal_ll);
+
 /**
  * seq_write - write arbitrary data to buffer
  * @seq: seq_file identifying the buffer to which data should be written
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 5bba42c..54e5ae7 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -123,6 +123,8 @@ int seq_open_private(struct file *, const struct seq_operations *, int);
 int seq_release_private(struct inode *, struct file *);
 int seq_put_decimal_ull(struct seq_file *m, char delimiter,
 			unsigned long long num);
+int seq_put_decimal_ll(struct seq_file *m, char delimiter,
+			long long num);
 
 #define SEQ_START_TOKEN ((void *)1)
 /*
-- 
1.7.4.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] speed up /proc/pid/stat, statm
  2012-01-31  8:14 [PATCH] speed up /proc/pid/stat, statm KAMEZAWA Hiroyuki
@ 2012-01-31 20:36 ` Andrew Morton
  2012-01-31 23:55   ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2012-01-31 20:36 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: linux-kernel

On Tue, 31 Jan 2012 17:14:43 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:

> +int seq_put_decimal_ll(struct seq_file *m, char delimiter,
> +			long long num)
> +{
> +	if (num < 0) {
> +		if (m->count + 3 >= m->size) {
> +			m->count = m->size;

Why is m->count udpated even thought we didn't write any data? 
seq_put_decimal_ull() does it too.

> +			return -1;
> +		}
> +		if (delimiter)
> +			m->buf[m->count++] = delimiter;
> +		num = -num;
> +		delimiter = '-';
> +	}
> +	return seq_put_decimal_ull(m, delimiter, num);
> +
> +}
> +EXPORT_SYMBOL(seq_put_decimal_ll);

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] speed up /proc/pid/stat, statm
  2012-01-31 20:36 ` Andrew Morton
@ 2012-01-31 23:55   ` KAMEZAWA Hiroyuki
  2012-02-01  5:54     ` Eric Dumazet
  0 siblings, 1 reply; 5+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-01-31 23:55 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Tue, 31 Jan 2012 12:36:12 -0800
Andrew Morton <akpm@linux-foundation.org> wrote:

> On Tue, 31 Jan 2012 17:14:43 +0900
> KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> 
> > +int seq_put_decimal_ll(struct seq_file *m, char delimiter,
> > +			long long num)
> > +{
> > +	if (num < 0) {
> > +		if (m->count + 3 >= m->size) {
> > +			m->count = m->size;
> 
> Why is m->count udpated even thought we didn't write any data? 
> seq_put_decimal_ull() does it too.
> 

seq_xxxx functions set m->count == m->size when it finds possible buffer overflow.
If m->count == m->size after ->show(), buffer will be freed and twice size buffer
will be re-allocated, and retry.

If we don't set m->count == m->size here, following seq_putc() will succeed and
we'll see corrupted outputs.


Thanks,
-Kame



> > +			return -1;
> > +		}
> > +		if (delimiter)
> > +			m->buf[m->count++] = delimiter;
> > +		num = -num;
> > +		delimiter = '-';
> > +	}
> > +	return seq_put_decimal_ull(m, delimiter, num);
> > +
> > +}
> > +EXPORT_SYMBOL(seq_put_decimal_ll);
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] speed up /proc/pid/stat, statm
  2012-01-31 23:55   ` KAMEZAWA Hiroyuki
@ 2012-02-01  5:54     ` Eric Dumazet
  2012-02-01  8:56       ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2012-02-01  5:54 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: Andrew Morton, linux-kernel

Le mercredi 01 février 2012 à 08:55 +0900, KAMEZAWA Hiroyuki a écrit :

> seq_xxxx functions set m->count == m->size when it finds possible buffer overflow.
> If m->count == m->size after ->show(), buffer will be freed and twice size buffer
> will be re-allocated, and retry.
> 
> If we don't set m->count == m->size here, following seq_putc() will succeed and
> we'll see corrupted outputs.

I suppose we could define a self documented helper

void seq_overflow(struct seq_file *seq)
{
	seq->count = seq->size;
}

Or if we were counting the approximate number of missing bytes, we could
avoid some iterations of the "double size buffer".

(letting seq->count going above seq->size, but not allowing buffer
overflow of course !)




^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] speed up /proc/pid/stat, statm
  2012-02-01  5:54     ` Eric Dumazet
@ 2012-02-01  8:56       ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 5+ messages in thread
From: KAMEZAWA Hiroyuki @ 2012-02-01  8:56 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andrew Morton, linux-kernel

On Wed, 01 Feb 2012 06:54:42 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Le mercredi 01 février 2012 à 08:55 +0900, KAMEZAWA Hiroyuki a écrit :
> 
> > seq_xxxx functions set m->count == m->size when it finds possible buffer overflow.
> > If m->count == m->size after ->show(), buffer will be freed and twice size buffer
> > will be re-allocated, and retry.
> > 
> > If we don't set m->count == m->size here, following seq_putc() will succeed and
> > we'll see corrupted outputs.
> 
> I suppose we could define a self documented helper
> 
> void seq_overflow(struct seq_file *seq)
> {
> 	seq->count = seq->size;
> }
> 

How about this ?
==
>From 76769c79d44ae0532c32113a352dc0269bb7a7d5 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 1 Feb 2012 18:10:16 +0900
Subject: [PATCH] add seq_set_overflow(), seq_overflow()

It's undocumented but seq file's overflow state is checked by
m->count == m->size. This patch adds seq_set_overflow() and
seq_overflow() to set/check overflow status explicitly.

Based on an idea from Eric Dumazet <eric.dumazet@gmail.com>

Signed-off-by:KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 fs/seq_file.c |   36 ++++++++++++++++++++++++++----------
 1 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index eea2b01..8712652 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -13,6 +13,22 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
 
+
+/*
+ * seq file has a buffer but it may overflow. At overflow, larger buffer
+ * is re-allocated and all data will be printed, again.
+ * Now, overflow check is done by m->count == m->size.
+ */
+static bool seq_overflow(struct seq_file *m)
+{
+	return m->count == m->size;
+}
+
+static void seq_set_overflow(struct seq_file *m)
+{
+	m->count = m->size;
+}
+
 /**
  *	seq_open -	initialize sequential file
  *	@file: file we initialize
@@ -92,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
 			error = 0;
 			m->count = 0;
 		}
-		if (m->count == m->size)
+		if (seq_overflow(m))
 			goto Eoverflow;
 		if (pos + m->count > offset) {
 			m->from = offset - pos;
@@ -232,7 +248,7 @@ Fill:
 			break;
 		}
 		err = m->op->show(m, p);
-		if (m->count == m->size || err) {
+		if (seq_overflow(m) || err) {
 			m->count = offs;
 			if (likely(err <= 0))
 				break;
@@ -359,7 +375,7 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
 			*p++ = '0' + (c & 07);
 			continue;
 		}
-		m->count = m->size;
+		seq_set_overflow(m);
 		return -1;
         }
 	m->count = p - m->buf;
@@ -381,7 +397,7 @@ int seq_printf(struct seq_file *m, const char *f, ...)
 			return 0;
 		}
 	}
-	m->count = m->size;
+	seq_set_overflow(m);
 	return -1;
 }
 EXPORT_SYMBOL(seq_printf);
@@ -510,7 +526,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
 			return 0;
 		}
 	}
-	m->count = m->size;
+	seq_set_overflow(m);
 	return -1;
 }
 EXPORT_SYMBOL(seq_bitmap);
@@ -526,7 +542,7 @@ int seq_bitmap_list(struct seq_file *m, const unsigned long *bits,
 			return 0;
 		}
 	}
-	m->count = m->size;
+	seq_set_overflow(m);
 	return -1;
 }
 EXPORT_SYMBOL(seq_bitmap_list);
@@ -637,7 +653,7 @@ int seq_puts(struct seq_file *m, const char *s)
 		m->count += len;
 		return 0;
 	}
-	m->count = m->size;
+	seq_set_overflow(m);
 	return -1;
 }
 EXPORT_SYMBOL(seq_puts);
@@ -671,7 +687,7 @@ int seq_put_decimal_ull(struct seq_file *m, char delimiter,
 	m->count += len;
 	return 0;
 overflow:
-	m->count = m->size;
+	seq_set_overflow(m);
 	return -1;
 }
 EXPORT_SYMBOL(seq_put_decimal_ull);
@@ -681,7 +697,7 @@ int seq_put_decimal_ll(struct seq_file *m, char delimiter,
 {
 	if (num < 0) {
 		if (m->count + 3 >= m->size) {
-			m->count = m->size;
+			seq_set_overflow(m);
 			return -1;
 		}
 		if (delimiter)
@@ -709,7 +725,7 @@ int seq_write(struct seq_file *seq, const void *data, size_t len)
 		seq->count += len;
 		return 0;
 	}
-	seq->count = seq->size;
+	seq_set_overflow(seq);
 	return -1;
 }
 EXPORT_SYMBOL(seq_write);
-- 
1.7.4.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-02-01  8:58 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-31  8:14 [PATCH] speed up /proc/pid/stat, statm KAMEZAWA Hiroyuki
2012-01-31 20:36 ` Andrew Morton
2012-01-31 23:55   ` KAMEZAWA Hiroyuki
2012-02-01  5:54     ` Eric Dumazet
2012-02-01  8:56       ` KAMEZAWA Hiroyuki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).