All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4 v2] proc: add seq_put_decimal_ull_width to speed up /proc/pid/smaps
@ 2018-02-12  7:49 Andrei Vagin
  2018-02-12  7:49 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Andrei Vagin @ 2018-02-12  7:49 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton
  Cc: Alexey Dobriyan, Andrei Vagin, KAMEZAWA Hiroyuki

seq_put_decimal_ull_w(m, str, val, width) prints a decimal number with a
specified minimal field width.

It is equivalent of seq_printf(m, "%s%*d", str, width, val), but it
works much faster.

== test_smaps.py
num = 0
with open("/proc/1/smaps") as f:
        for x in xrange(10000):
                data = f.read()
                f.seek(0, 0)
==

== Before patch ==
$ time python test_smaps.py

real    0m4.593s
user    0m0.398s
sys     0m4.158s

== After patch ==
$ time python test_smaps.py

real    0m3.828s
user    0m0.413s
sys     0m3.408s

$ perf -g record python test_smaps.py
== Before patch ==
-   79.01%     3.36%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 75.65% show_smap.isra.33
      + 48.85% seq_printf
      + 15.75% __walk_page_range
      + 9.70% show_map_vma.isra.23
        0.61% seq_puts

== After patch ==
-   75.51%     4.62%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 70.88% show_smap.isra.33
      + 24.82% seq_put_decimal_ull_w
      + 19.78% __walk_page_range
      + 12.74% seq_printf
      + 11.08% show_map_vma.isra.23
      + 1.68% seq_puts

v2: rename seq_put_decimal_ull_align into seq_put_decimal_ull_width

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/proc/meminfo.c        |  15 +-----
 fs/proc/task_mmu.c       | 127 +++++++++++++++++++----------------------------
 fs/seq_file.c            |  28 ++++++++---
 include/linux/kernel.h   |   3 +-
 include/linux/seq_file.h |   2 +
 lib/vsprintf.c           |  18 +++++--
 6 files changed, 92 insertions(+), 101 deletions(-)

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6bb20f864259..65a72ab57471 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -26,20 +26,7 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 
 static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 {
-	char v[32];
-	static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
-	int len;
-
-	len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
-
-	seq_write(m, s, 16);
-
-	if (len > 0) {
-		if (len < 8)
-			seq_write(m, blanks, 8 - len);
-
-		seq_write(m, v, len);
-	}
+	seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
 	seq_write(m, " kB\n", 4);
 }
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b66fc8de7d34..3026feda0432 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -24,6 +24,8 @@
 #include <asm/tlbflush.h>
 #include "internal.h"
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
 	unsigned long text, lib, swap, anon, file, shmem;
@@ -53,39 +55,28 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	lib = (mm->exec_vm << PAGE_SHIFT) - text;
 
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	seq_printf(m,
-		"VmPeak:\t%8lu kB\n"
-		"VmSize:\t%8lu kB\n"
-		"VmLck:\t%8lu kB\n"
-		"VmPin:\t%8lu kB\n"
-		"VmHWM:\t%8lu kB\n"
-		"VmRSS:\t%8lu kB\n"
-		"RssAnon:\t%8lu kB\n"
-		"RssFile:\t%8lu kB\n"
-		"RssShmem:\t%8lu kB\n"
-		"VmData:\t%8lu kB\n"
-		"VmStk:\t%8lu kB\n"
-		"VmExe:\t%8lu kB\n"
-		"VmLib:\t%8lu kB\n"
-		"VmPTE:\t%8lu kB\n"
-		"VmSwap:\t%8lu kB\n",
-		hiwater_vm << (PAGE_SHIFT-10),
-		total_vm << (PAGE_SHIFT-10),
-		mm->locked_vm << (PAGE_SHIFT-10),
-		mm->pinned_vm << (PAGE_SHIFT-10),
-		hiwater_rss << (PAGE_SHIFT-10),
-		total_rss << (PAGE_SHIFT-10),
-		anon << (PAGE_SHIFT-10),
-		file << (PAGE_SHIFT-10),
-		shmem << (PAGE_SHIFT-10),
-		mm->data_vm << (PAGE_SHIFT-10),
-		mm->stack_vm << (PAGE_SHIFT-10),
-		text >> 10,
-		lib >> 10,
-		mm_pgtables_bytes(mm) >> 10,
-		swap << (PAGE_SHIFT-10));
+	SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
+	SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
+	SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
+	SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+	SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
+	SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
+	SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
+	SEQ_PUT_DEC(" kB\nRssFile:\t", file);
+	SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
+	SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
+	SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmExe:\t", text >> 10, 8);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmLib:\t", lib >> 10, 8);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
+	SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
+	seq_puts(m, " kB\n");
 	hugetlb_report_usage(m, mm);
 }
+#undef SEQ_PUT_DEC
 
 unsigned long task_vsize(struct mm_struct *mm)
 {
@@ -739,6 +730,8 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
 {
 }
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
 	struct proc_maps_private *priv = m->private;
@@ -812,51 +805,34 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		ret = SEQ_SKIP;
 	}
 
-	if (!rollup_mode)
-		seq_printf(m,
-			   "Size:           %8lu kB\n"
-			   "KernelPageSize: %8lu kB\n"
-			   "MMUPageSize:    %8lu kB\n",
-			   (vma->vm_end - vma->vm_start) >> 10,
-			   vma_kernel_pagesize(vma) >> 10,
-			   vma_mmu_pagesize(vma) >> 10);
-
-
-	if (!rollup_mode || last_vma)
-		seq_printf(m,
-			   "Rss:            %8lu kB\n"
-			   "Pss:            %8lu kB\n"
-			   "Shared_Clean:   %8lu kB\n"
-			   "Shared_Dirty:   %8lu kB\n"
-			   "Private_Clean:  %8lu kB\n"
-			   "Private_Dirty:  %8lu kB\n"
-			   "Referenced:     %8lu kB\n"
-			   "Anonymous:      %8lu kB\n"
-			   "LazyFree:       %8lu kB\n"
-			   "AnonHugePages:  %8lu kB\n"
-			   "ShmemPmdMapped: %8lu kB\n"
-			   "Shared_Hugetlb: %8lu kB\n"
-			   "Private_Hugetlb: %7lu kB\n"
-			   "Swap:           %8lu kB\n"
-			   "SwapPss:        %8lu kB\n"
-			   "Locked:         %8lu kB\n",
-			   mss->resident >> 10,
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
-			   mss->shared_clean  >> 10,
-			   mss->shared_dirty  >> 10,
-			   mss->private_clean >> 10,
-			   mss->private_dirty >> 10,
-			   mss->referenced >> 10,
-			   mss->anonymous >> 10,
-			   mss->lazyfree >> 10,
-			   mss->anonymous_thp >> 10,
-			   mss->shmem_thp >> 10,
-			   mss->shared_hugetlb >> 10,
-			   mss->private_hugetlb >> 10,
-			   mss->swap >> 10,
-			   (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+	if (!rollup_mode) {
+		SEQ_PUT_DEC("Size:           ", vma->vm_end - vma->vm_start);
+		SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+		SEQ_PUT_DEC(" kB\nMMUPageSize:    ", vma_mmu_pagesize(vma));
+		seq_puts(m, " kB\n");
+	}
 
+	if (!rollup_mode || last_vma) {
+		SEQ_PUT_DEC("Rss:            ", mss->resident);
+		SEQ_PUT_DEC(" kB\nPss:            ", mss->pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nShared_Clean:   ", mss->shared_clean);
+		SEQ_PUT_DEC(" kB\nShared_Dirty:   ", mss->shared_dirty);
+		SEQ_PUT_DEC(" kB\nPrivate_Clean:  ", mss->private_clean);
+		SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
+		SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
+		SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+		SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
+		SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
+		SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+		SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+		seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
+					  mss->private_hugetlb >> 10, 7);
+		SEQ_PUT_DEC(" kB\nSwap:           ", mss->swap);
+		SEQ_PUT_DEC(" kB\nSwapPss:        ",
+						mss->swap_pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nLocked:         ", mss->pss >> PSS_SHIFT);
+		seq_puts(m, " kB\n");
+	}
 	if (!rollup_mode) {
 		arch_show_smap(m, vma);
 		show_smap_vma_flags(m, vma);
@@ -864,6 +840,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 	m_cache_vma(m, vma);
 	return ret;
 }
+#undef SEQ_PUT_DEC
 
 static int show_pid_smap(struct seq_file *m, void *v)
 {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index f36ab451ade2..e6ec14a02a52 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -673,15 +673,20 @@ void seq_puts(struct seq_file *m, const char *s)
 }
 EXPORT_SYMBOL(seq_puts);
 
-/*
+/**
  * A helper routine for putting decimal numbers without rich format of printf().
  * only 'unsigned long long' is supported.
- * This routine will put strlen(delimiter) + number into seq_file.
+ * @m: seq_file identifying the buffer to which data should be written
+ * @delimiter: a string which is printed before the number
+ * @num: the number
+ * @width: a minimum field width
+ *
+ * This routine will put strlen(delimiter) + number into seq_filed.
  * This routine is very quick when you show lots of numbers.
  * In usual cases, it will be better to use seq_printf(). It's easier to read.
  */
-void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
-			 unsigned long long num)
+void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
+			 unsigned long long num, unsigned int width)
 {
 	int len;
 
@@ -695,10 +700,13 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 	memcpy(m->buf + m->count, delimiter, len);
 	m->count += len;
 
-	if (m->count + 1 >= m->size)
+	if (!width)
+		width = 1;
+
+	if (m->count + width >= m->size)
 		goto overflow;
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
 	if (!len)
 		goto overflow;
 
@@ -708,6 +716,12 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 overflow:
 	seq_set_overflow(m);
 }
+
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
+			 unsigned long long num)
+{
+	return seq_put_decimal_ull_width(m, delimiter, num, 0);
+}
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
 /**
@@ -777,7 +791,7 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 		num = -num;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, 0);
 	if (!len)
 		goto overflow;
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ce51455e2adf..8c621e74526a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -439,7 +439,8 @@ extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
 
-extern int num_to_str(char *buf, int size, unsigned long long num);
+extern int num_to_str(char *buf, int size,
+		      unsigned long long num, unsigned int width);
 
 /* lib/printf utilities */
 
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 440f1351ad57..71d6f643b9d7 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,6 +118,8 @@ __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
 void seq_puts(struct seq_file *m, const char *s);
+void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
+			       unsigned long long num, unsigned int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 			 unsigned long long num);
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d7a708f82559..942b5234a59b 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -336,7 +336,7 @@ char *put_dec(char *buf, unsigned long long n)
  *
  * If speed is not important, use snprintf(). It's easy to read the code.
  */
-int num_to_str(char *buf, int size, unsigned long long num)
+int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
 {
 	/* put_dec requires 2-byte alignment of the buffer. */
 	char tmp[sizeof(num) * 3] __aligned(2);
@@ -350,11 +350,21 @@ int num_to_str(char *buf, int size, unsigned long long num)
 		len = put_dec(tmp, num) - tmp;
 	}
 
-	if (len > size)
+	if (len > size || width > size)
 		return 0;
+
+	if (width > len) {
+		width = width - len;
+		for (idx = 0; idx < width; idx++)
+			buf[idx] = ' ';
+	} else {
+		width = 0;
+	}
+
 	for (idx = 0; idx < len; ++idx)
-		buf[idx] = tmp[len - idx - 1];
-	return len;
+		buf[idx + width] = tmp[len - idx - 1];
+
+	return len + width;
 }
 
 #define SIGN	1		/* unsigned/signed, must be 1 */
-- 
2.13.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/4] proc: replace seq_printf on seq_putc to speed up /proc/pid/smaps
  2018-02-12  7:49 [PATCH 1/4 v2] proc: add seq_put_decimal_ull_width to speed up /proc/pid/smaps Andrei Vagin
@ 2018-02-12  7:49 ` Andrei Vagin
  2018-02-12 14:10   ` Alexey Dobriyan
  2018-02-12  7:49 ` [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull Andrei Vagin
  2018-02-12  7:49 ` [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status Andrei Vagin
  2 siblings, 1 reply; 6+ messages in thread
From: Andrei Vagin @ 2018-02-12  7:49 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton
  Cc: Alexey Dobriyan, Andrei Vagin, KAMEZAWA Hiroyuki

seq_putc() works much faster than seq_printf()

$ time python test_smaps.py

== Before patch ==
real    0m3.828s
user    0m0.413s
sys     0m3.408s

== After patch ==
real	0m3.405s
user	0m0.401s
sys	0m3.003s

== Before patch ==
-   75.51%     4.62%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 70.88% show_smap.isra.33
      + 24.82% seq_put_decimal_ull_aligned
      + 19.78% __walk_page_range
      + 12.74% seq_printf
      + 11.08% show_map_vma.isra.23
      + 1.68% seq_puts

== After patch ==
-   69.16%     5.70%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 63.46% show_smap.isra.33
      + 25.98% seq_put_decimal_ull_aligned
      + 20.90% __walk_page_range
      + 12.60% show_map_vma.isra.23
        1.56% seq_putc
      + 1.55% seq_puts

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/proc/task_mmu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3026feda0432..65ae54659833 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -688,8 +688,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		if (!mnemonics[i][0])
 			continue;
 		if (vma->vm_flags & (1UL << i)) {
-			seq_printf(m, "%c%c ",
-				   mnemonics[i][0], mnemonics[i][1]);
+			seq_putc(m, mnemonics[i][0]);
+			seq_putc(m, mnemonics[i][1]);
+			seq_putc(m, ' ');
 		}
 	}
 	seq_putc(m, '\n');
-- 
2.13.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull
  2018-02-12  7:49 [PATCH 1/4 v2] proc: add seq_put_decimal_ull_width to speed up /proc/pid/smaps Andrei Vagin
  2018-02-12  7:49 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
@ 2018-02-12  7:49 ` Andrei Vagin
  2018-02-12  7:49 ` [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status Andrei Vagin
  2 siblings, 0 replies; 6+ messages in thread
From: Andrei Vagin @ 2018-02-12  7:49 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton; +Cc: Alexey Dobriyan, Andrei Vagin

A delimiter is a string which is printed before a number.
A syngle-symbol delimiters can be printed by set_putc() and this works
faster than printing by set_puts().

== test_proc.c

int main(int argc, char **argv)
{
	int n, i, fd;
	char buf[16384];

	n = atoi(argv[1]);
	for (i = 0; i < n; i++) {
		fd = open(argv[2], O_RDONLY);
		if (fd < 0)
			return 1;
		if (read(fd, buf, sizeof(buf)) <= 0)
			return 1;
		close(fd);
	}

	return 0;
}
==

$ time ./test_proc  1000000 /proc/1/stat

== Before patch ==
  real	0m3.820s
  user	0m0.337s
  sys	0m3.394s

== After patch ==
  real	0m3.110s
  user	0m0.324s
  sys	0m2.700s

Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/seq_file.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index e6ec14a02a52..f58549542e73 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -693,12 +693,12 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
 	if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
-
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
 	if (!width)
 		width = 1;
@@ -776,12 +776,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 	if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
-
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
 	if (m->count + 2 >= m->size)
 		goto overflow;
-- 
2.13.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status
  2018-02-12  7:49 [PATCH 1/4 v2] proc: add seq_put_decimal_ull_width to speed up /proc/pid/smaps Andrei Vagin
  2018-02-12  7:49 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
  2018-02-12  7:49 ` [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull Andrei Vagin
@ 2018-02-12  7:49 ` Andrei Vagin
  2 siblings, 0 replies; 6+ messages in thread
From: Andrei Vagin @ 2018-02-12  7:49 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton; +Cc: Alexey Dobriyan, Andrei Vagin

seq_printf() works slower than seq_puts, seq_puts, etc.

== test_proc.c
int main(int argc, char **argv)
{
	int n, i, fd;
	char buf[16384];

	n = atoi(argv[1]);
	for (i = 0; i < n; i++) {
		fd = open(argv[2], O_RDONLY);
		if (fd < 0)
			return 1;
		if (read(fd, buf, sizeof(buf)) <= 0)
			return 1;
		close(fd);
	}

	return 0;
}
==

$ time ./test_proc  1000000 /proc/1/status

== Before path ==
real	0m5.171s
user	0m0.328s
sys	0m4.783s

== After patch ==
real	0m4.761s
user	0m0.334s
sys	0m4.366s

Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/proc/array.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 598803576e4c..0a61affe1356 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -187,7 +187,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	task_unlock(p);
 	rcu_read_unlock();
 
-	seq_printf(m, "State:\t%s", get_task_state(p));
+	seq_puts(m, "State:\t");
+	seq_puts(m, get_task_state(p));
 
 	seq_put_decimal_ull(m, "\nTgid:\t", tgid);
 	seq_put_decimal_ull(m, "\nNgid:\t", ngid);
@@ -313,8 +314,8 @@ static void render_cap_t(struct seq_file *m, const char *header,
 
 	seq_puts(m, header);
 	CAP_FOR_EACH_U32(__capi) {
-		seq_printf(m, "%08x",
-			   a->cap[CAP_LAST_U32 - __capi]);
+		seq_put_hex_ll(m, NULL,
+			   a->cap[CAP_LAST_U32 - __capi], 8);
 	}
 	seq_putc(m, '\n');
 }
@@ -368,7 +369,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
 
 static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
 {
-	seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state);
+	seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+	seq_putc(m, '\n');
 }
 
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -504,7 +506,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(task->real_start_time);
 
-	seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
+	seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
+	seq_puts(m, " (");
+	seq_puts(m, tcomm);
+	seq_puts(m, ") ");
+	seq_putc(m, state);
 	seq_put_decimal_ll(m, " ", ppid);
 	seq_put_decimal_ll(m, " ", pgid);
 	seq_put_decimal_ll(m, " ", sid);
-- 
2.13.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/4] proc: replace seq_printf on seq_putc to speed up /proc/pid/smaps
  2018-02-12  7:49 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
@ 2018-02-12 14:10   ` Alexey Dobriyan
  0 siblings, 0 replies; 6+ messages in thread
From: Alexey Dobriyan @ 2018-02-12 14:10 UTC (permalink / raw)
  To: Andrei Vagin; +Cc: linux-fsdevel, Andrew Morton, KAMEZAWA Hiroyuki

On Sun, Feb 11, 2018 at 11:49:29PM -0800, Andrei Vagin wrote:
>  		if (vma->vm_flags & (1UL << i)) {
> -			seq_printf(m, "%c%c ",
> -				   mnemonics[i][0], mnemonics[i][1]);
> +			seq_putc(m, mnemonics[i][0]);
> +			seq_putc(m, mnemonics[i][1]);
> +			seq_putc(m, ' ');

Reviewed-by: Alexey Dobriyan <adobriyan@gmail.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status
  2018-01-29  8:00 [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Andrei Vagin
@ 2018-01-29  8:00 ` Andrei Vagin
  0 siblings, 0 replies; 6+ messages in thread
From: Andrei Vagin @ 2018-01-29  8:00 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton; +Cc: Alexey Dobriyan, Andrei Vagin

seq_printf() works slower than seq_puts, seq_puts, etc.

== test_proc.c
int main(int argc, char **argv)
{
	int n, i, fd;
	char buf[16384];

	n = atoi(argv[1]);
	for (i = 0; i < n; i++) {
		fd = open(argv[2], O_RDONLY);
		if (fd < 0)
			return 1;
		if (read(fd, buf, sizeof(buf)) <= 0)
			return 1;
		close(fd);
	}

	return 0;
}
==

$ time ./test_proc  1000000 /proc/1/status

== Before path ==
real	0m5.171s
user	0m0.328s
sys	0m4.783s

== After patch ==
real	0m4.761s
user	0m0.334s
sys	0m4.366s

Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/proc/array.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index d67a72dcb92c..f22dbfab6ab0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -187,7 +187,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	task_unlock(p);
 	rcu_read_unlock();
 
-	seq_printf(m, "State:\t%s", get_task_state(p));
+	seq_puts(m, "State:\t");
+	seq_puts(m, get_task_state(p));
 
 	seq_put_decimal_ull(m, "\nTgid:\t", tgid);
 	seq_put_decimal_ull(m, "\nNgid:\t", ngid);
@@ -313,8 +314,8 @@ static void render_cap_t(struct seq_file *m, const char *header,
 
 	seq_puts(m, header);
 	CAP_FOR_EACH_U32(__capi) {
-		seq_printf(m, "%08x",
-			   a->cap[CAP_LAST_U32 - __capi]);
+		seq_put_hex_ll(m, NULL,
+			   a->cap[CAP_LAST_U32 - __capi], 8);
 	}
 	seq_putc(m, '\n');
 }
@@ -368,7 +369,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
 
 static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
 {
-	seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state);
+	seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+	seq_putc(m, '\n');
 }
 
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -504,7 +506,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(task->real_start_time);
 
-	seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
+	seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
+	seq_puts(m, " (");
+	seq_puts(m, tcomm);
+	seq_puts(m, ") ");
+	seq_putc(m, state);
 	seq_put_decimal_ll(m, " ", ppid);
 	seq_put_decimal_ll(m, " ", pgid);
 	seq_put_decimal_ll(m, " ", sid);
-- 
2.13.6

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-02-12 14:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-12  7:49 [PATCH 1/4 v2] proc: add seq_put_decimal_ull_width to speed up /proc/pid/smaps Andrei Vagin
2018-02-12  7:49 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
2018-02-12 14:10   ` Alexey Dobriyan
2018-02-12  7:49 ` [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull Andrei Vagin
2018-02-12  7:49 ` [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status Andrei Vagin
  -- strict thread matches above, loose matches on Subject: below --
2018-01-29  8:00 [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Andrei Vagin
2018-01-29  8:00 ` [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status Andrei Vagin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.