All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps
@ 2018-01-29  8:00 Andrei Vagin
  2018-01-29  8:00 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Andrei Vagin @ 2018-01-29  8:00 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton
  Cc: Alexey Dobriyan, Andrei Vagin, KAMEZAWA Hiroyuki

seq_put_decimal_ull_align(m, str, val, width) is equivalent of
seq_printf(m, "%s%*d", str, width, val), but it works much faster.

== test_smaps.py
num = 0
with open("/proc/1/smaps") as f:
        for x in xrange(10000):
                data = f.read()
                f.seek(0, 0)
==

== Before patch ==
$ time python test_smaps.py

real    0m4.593s
user    0m0.398s
sys     0m4.158s

== After patch ==
$ time python test_smaps.py

real    0m3.828s
user    0m0.413s
sys     0m3.408s

$ perf -g record python test_smaps.py
== Before patch ==
-   79.01%     3.36%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 75.65% show_smap.isra.33
      + 48.85% seq_printf
      + 15.75% __walk_page_range
      + 9.70% show_map_vma.isra.23
        0.61% seq_puts

== After patch ==
-   75.51%     4.62%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 70.88% show_smap.isra.33
      + 24.82% seq_put_decimal_ull_aligned
      + 19.78% __walk_page_range
      + 12.74% seq_printf
      + 11.08% show_map_vma.isra.23
      + 1.68% seq_puts

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/proc/meminfo.c        |  15 +-----
 fs/proc/task_mmu.c       | 124 +++++++++++++++++++----------------------------
 fs/seq_file.c            |  24 +++++++--
 include/linux/kernel.h   |   2 +-
 include/linux/seq_file.h |   2 +
 lib/vsprintf.c           |  18 +++++--
 6 files changed, 88 insertions(+), 97 deletions(-)

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6bb20f864259..2b197ce8b99f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -26,20 +26,7 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 
 static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 {
-	char v[32];
-	static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
-	int len;
-
-	len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
-
-	seq_write(m, s, 16);
-
-	if (len > 0) {
-		if (len < 8)
-			seq_write(m, blanks, 8 - len);
-
-		seq_write(m, v, len);
-	}
+	seq_put_decimal_ull_aligned(m, s, num << (PAGE_SHIFT - 10), 8);
 	seq_write(m, " kB\n", 4);
 }
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3a08685ef27c..cc0aaf3a7315 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -24,6 +24,8 @@
 #include <asm/tlbflush.h>
 #include "internal.h"
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_aligned(m, str, (val) << (PAGE_SHIFT-10), 8)
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
 	unsigned long text, lib, swap, anon, file, shmem;
@@ -50,37 +52,28 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	seq_printf(m,
-		"VmPeak:\t%8lu kB\n"
-		"VmSize:\t%8lu kB\n"
-		"VmLck:\t%8lu kB\n"
-		"VmPin:\t%8lu kB\n"
-		"VmHWM:\t%8lu kB\n"
-		"VmRSS:\t%8lu kB\n"
-		"RssAnon:\t%8lu kB\n"
-		"RssFile:\t%8lu kB\n"
-		"RssShmem:\t%8lu kB\n"
-		"VmData:\t%8lu kB\n"
-		"VmStk:\t%8lu kB\n"
-		"VmExe:\t%8lu kB\n"
-		"VmLib:\t%8lu kB\n"
-		"VmPTE:\t%8lu kB\n"
-		"VmSwap:\t%8lu kB\n",
-		hiwater_vm << (PAGE_SHIFT-10),
-		total_vm << (PAGE_SHIFT-10),
-		mm->locked_vm << (PAGE_SHIFT-10),
-		mm->pinned_vm << (PAGE_SHIFT-10),
-		hiwater_rss << (PAGE_SHIFT-10),
-		total_rss << (PAGE_SHIFT-10),
-		anon << (PAGE_SHIFT-10),
-		file << (PAGE_SHIFT-10),
-		shmem << (PAGE_SHIFT-10),
-		mm->data_vm << (PAGE_SHIFT-10),
-		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-		mm_pgtables_bytes(mm) >> 10,
-		swap << (PAGE_SHIFT-10));
+	SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
+	SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
+	SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
+	SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+	SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
+	SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
+	SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
+	SEQ_PUT_DEC(" kB\nRssFile:\t", file);
+	SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
+	SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
+	SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
+	seq_put_decimal_ull_aligned(m,
+		    " kB\nVmExe:\t", text, 8);
+	seq_put_decimal_ull_aligned(m,
+		    " kB\nVmLib:\t", lib, 8);
+	seq_put_decimal_ull_aligned(m,
+		    " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
+	SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
+	seq_puts(m, " kB\n");
 	hugetlb_report_usage(m, mm);
 }
+#undef SEQ_PUT_DEC
 
 unsigned long task_vsize(struct mm_struct *mm)
 {
@@ -734,6 +727,8 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
 {
 }
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_aligned(m, str, (val) >> 10, 8)
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
 	struct proc_maps_private *priv = m->private;
@@ -807,51 +802,33 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		ret = SEQ_SKIP;
 	}
 
-	if (!rollup_mode)
-		seq_printf(m,
-			   "Size:           %8lu kB\n"
-			   "KernelPageSize: %8lu kB\n"
-			   "MMUPageSize:    %8lu kB\n",
-			   (vma->vm_end - vma->vm_start) >> 10,
-			   vma_kernel_pagesize(vma) >> 10,
-			   vma_mmu_pagesize(vma) >> 10);
-
-
-	if (!rollup_mode || last_vma)
-		seq_printf(m,
-			   "Rss:            %8lu kB\n"
-			   "Pss:            %8lu kB\n"
-			   "Shared_Clean:   %8lu kB\n"
-			   "Shared_Dirty:   %8lu kB\n"
-			   "Private_Clean:  %8lu kB\n"
-			   "Private_Dirty:  %8lu kB\n"
-			   "Referenced:     %8lu kB\n"
-			   "Anonymous:      %8lu kB\n"
-			   "LazyFree:       %8lu kB\n"
-			   "AnonHugePages:  %8lu kB\n"
-			   "ShmemPmdMapped: %8lu kB\n"
-			   "Shared_Hugetlb: %8lu kB\n"
-			   "Private_Hugetlb: %7lu kB\n"
-			   "Swap:           %8lu kB\n"
-			   "SwapPss:        %8lu kB\n"
-			   "Locked:         %8lu kB\n",
-			   mss->resident >> 10,
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
-			   mss->shared_clean  >> 10,
-			   mss->shared_dirty  >> 10,
-			   mss->private_clean >> 10,
-			   mss->private_dirty >> 10,
-			   mss->referenced >> 10,
-			   mss->anonymous >> 10,
-			   mss->lazyfree >> 10,
-			   mss->anonymous_thp >> 10,
-			   mss->shmem_thp >> 10,
-			   mss->shared_hugetlb >> 10,
-			   mss->private_hugetlb >> 10,
-			   mss->swap >> 10,
-			   (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+	if (!rollup_mode) {
+		SEQ_PUT_DEC("Size:           ", vma->vm_end - vma->vm_start);
+		SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+		SEQ_PUT_DEC(" kB\nMMUPageSize:    ", vma_mmu_pagesize(vma));
+		seq_puts(m, " kB\n");
+	}
 
+	if (!rollup_mode || last_vma) {
+		SEQ_PUT_DEC("Rss:            ", mss->resident);
+		SEQ_PUT_DEC(" kB\nPss:            ", mss->pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nShared_Clean:   ", mss->shared_clean);
+		SEQ_PUT_DEC(" kB\nShared_Dirty:   ", mss->shared_dirty);
+		SEQ_PUT_DEC(" kB\nPrivate_Clean:  ", mss->private_clean);
+		SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
+		SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
+		SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+		SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
+		SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
+		SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+		SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+		seq_put_decimal_ull_aligned(m,
+			     " kB\nPrivate_Hugetlb: ", mss->private_hugetlb >> 10, 7);
+		SEQ_PUT_DEC(" kB\nSwap:           ", mss->swap);
+		SEQ_PUT_DEC(" kB\nSwapPss:        ", mss->swap_pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nLocked:         ", mss->pss >> PSS_SHIFT);
+		seq_puts(m, " kB\n");
+	}
 	if (!rollup_mode) {
 		arch_show_smap(m, vma);
 		show_smap_vma_flags(m, vma);
@@ -859,6 +836,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 	m_cache_vma(m, vma);
 	return ret;
 }
+#undef SEQ_PUT_DEC
 
 static int show_pid_smap(struct seq_file *m, void *v)
 {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index d2885220ecf7..55c89eb95a90 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -677,8 +677,8 @@ EXPORT_SYMBOL(seq_puts);
  * This routine is very quick when you show lots of numbers.
  * In usual cases, it will be better to use seq_printf(). It's easier to read.
  */
-void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
-			 unsigned long long num)
+void seq_put_decimal_ull_aligned(struct seq_file *m, const char *delimiter,
+			 unsigned long long num, int width)
 {
 	int len;
 
@@ -692,15 +692,23 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 	memcpy(m->buf + m->count, delimiter, len);
 	m->count += len;
 
-	if (m->count + 1 >= m->size)
+	if (!width)
+		width = 1;
+
+	if (m->count + width >= m->size)
 		goto overflow;
 
 	if (num < 10) {
+		int i;
+
+		for (i = 0; i < width - 1; i++)
+			m->buf[m->count++] = ' ';
+
 		m->buf[m->count++] = num + '0';
 		return;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
 	if (!len)
 		goto overflow;
 
@@ -710,6 +718,12 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 overflow:
 	seq_set_overflow(m);
 }
+
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
+			 unsigned long long num)
+{
+	return seq_put_decimal_ull_aligned(m, delimiter, num, 0);
+}
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
 /**
@@ -784,7 +798,7 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 		return;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, 0);
 	if (!len)
 		goto overflow;
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ce51455e2adf..6485a32db7d5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -439,7 +439,7 @@ extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
 
-extern int num_to_str(char *buf, int size, unsigned long long num);
+extern int num_to_str(char *buf, int size, unsigned long long num, int width);
 
 /* lib/printf utilities */
 
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 53f238934d7f..521de9e41227 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,6 +118,8 @@ __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
 void seq_puts(struct seq_file *m, const char *s);
+void seq_put_decimal_ull_aligned(struct seq_file *m, const char *delimiter,
+			 unsigned long long num, int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 			 unsigned long long num);
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 01c3957b2de6..b8015e165a54 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -337,7 +337,7 @@ char *put_dec(char *buf, unsigned long long n)
  *
  * If speed is not important, use snprintf(). It's easy to read the code.
  */
-int num_to_str(char *buf, int size, unsigned long long num)
+int num_to_str(char *buf, int size, unsigned long long num, int width)
 {
 	/* put_dec requires 2-byte alignment of the buffer. */
 	char tmp[sizeof(num) * 3] __aligned(2);
@@ -351,11 +351,21 @@ int num_to_str(char *buf, int size, unsigned long long num)
 		len = put_dec(tmp, num) - tmp;
 	}
 
-	if (len > size)
+	if (len > size || width > size)
 		return 0;
+
+	if (width > len) {
+		width = width - len;
+		for (idx = 0; idx < width; idx++)
+			buf[idx] = ' ';
+	} else {
+		width = 0;
+	}
+
 	for (idx = 0; idx < len; ++idx)
-		buf[idx] = tmp[len - idx - 1];
-	return len;
+		buf[idx + width] = tmp[len - idx - 1];
+
+	return len + width;
 }
 
 #define SIGN	1		/* unsigned/signed, must be 1 */
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] proc: replace seq_printf on seq_putc to speed up /proc/pid/smaps
  2018-01-29  8:00 [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Andrei Vagin
@ 2018-01-29  8:00 ` Andrei Vagin
  2018-01-29  8:00 ` [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull Andrei Vagin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Andrei Vagin @ 2018-01-29  8:00 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton
  Cc: Alexey Dobriyan, Andrei Vagin, KAMEZAWA Hiroyuki

seq_putc() works much faster than seq_printf()

$ time python test_smaps.py

== Before patch ==
real    0m3.828s
user    0m0.413s
sys     0m3.408s

== After patch ==
real	0m3.405s
user	0m0.401s
sys	0m3.003s

== Before patch ==
-   75.51%     4.62%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 70.88% show_smap.isra.33
      + 24.82% seq_put_decimal_ull_aligned
      + 19.78% __walk_page_range
      + 12.74% seq_printf
      + 11.08% show_map_vma.isra.23
      + 1.68% seq_puts

== After patch ==
-   69.16%     5.70%  python   [kernel.kallsyms]    [k] show_smap.isra.33
   - 63.46% show_smap.isra.33
      + 25.98% seq_put_decimal_ull_aligned
      + 20.90% __walk_page_range
      + 12.60% show_map_vma.isra.23
        1.56% seq_putc
      + 1.55% seq_puts

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/proc/task_mmu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cc0aaf3a7315..d7a429132a1f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -685,8 +685,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		if (!mnemonics[i][0])
 			continue;
 		if (vma->vm_flags & (1UL << i)) {
-			seq_printf(m, "%c%c ",
-				   mnemonics[i][0], mnemonics[i][1]);
+			seq_putc(m, mnemonics[i][0]);
+			seq_putc(m, mnemonics[i][1]);
+			seq_putc(m, ' ');
 		}
 	}
 	seq_putc(m, '\n');
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull
  2018-01-29  8:00 [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Andrei Vagin
  2018-01-29  8:00 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
@ 2018-01-29  8:00 ` Andrei Vagin
  2018-01-29  8:00 ` [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status Andrei Vagin
  2018-01-29 19:10 ` [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Alexey Dobriyan
  3 siblings, 0 replies; 10+ messages in thread
From: Andrei Vagin @ 2018-01-29  8:00 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton; +Cc: Alexey Dobriyan, Andrei Vagin

A delimiter is a string which is printed before a number.
A syngle-symbol delimiters can be printed by set_putc() and this works
faster than printing by set_puts().

== test_proc.c

int main(int argc, char **argv)
{
	int n, i, fd;
	char buf[16384];

	n = atoi(argv[1]);
	for (i = 0; i < n; i++) {
		fd = open(argv[2], O_RDONLY);
		if (fd < 0)
			return 1;
		if (read(fd, buf, sizeof(buf)) <= 0)
			return 1;
		close(fd);
	}

	return 0;
}
==

$ time ./test_proc  1000000 /proc/1/stat

== Before patch ==
  real	0m3.820s
  user	0m0.337s
  sys	0m3.394s

== After patch ==
  real	0m3.110s
  user	0m0.324s
  sys	0m2.700s

Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/seq_file.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 55c89eb95a90..d14b023bc0f8 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -685,12 +685,12 @@ void seq_put_decimal_ull_aligned(struct seq_file *m, const char *delimiter,
 	if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
-
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
 	if (!width)
 		width = 1;
@@ -778,12 +778,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 	if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
-
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
 	if (m->count + 2 >= m->size)
 		goto overflow;
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status
  2018-01-29  8:00 [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Andrei Vagin
  2018-01-29  8:00 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
  2018-01-29  8:00 ` [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull Andrei Vagin
@ 2018-01-29  8:00 ` Andrei Vagin
  2018-01-29 19:10 ` [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Alexey Dobriyan
  3 siblings, 0 replies; 10+ messages in thread
From: Andrei Vagin @ 2018-01-29  8:00 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton; +Cc: Alexey Dobriyan, Andrei Vagin

seq_printf() works slower than seq_puts, seq_puts, etc.

== test_proc.c
int main(int argc, char **argv)
{
	int n, i, fd;
	char buf[16384];

	n = atoi(argv[1]);
	for (i = 0; i < n; i++) {
		fd = open(argv[2], O_RDONLY);
		if (fd < 0)
			return 1;
		if (read(fd, buf, sizeof(buf)) <= 0)
			return 1;
		close(fd);
	}

	return 0;
}
==

$ time ./test_proc  1000000 /proc/1/status

== Before path ==
real	0m5.171s
user	0m0.328s
sys	0m4.783s

== After patch ==
real	0m4.761s
user	0m0.334s
sys	0m4.366s

Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/proc/array.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index d67a72dcb92c..f22dbfab6ab0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -187,7 +187,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	task_unlock(p);
 	rcu_read_unlock();
 
-	seq_printf(m, "State:\t%s", get_task_state(p));
+	seq_puts(m, "State:\t");
+	seq_puts(m, get_task_state(p));
 
 	seq_put_decimal_ull(m, "\nTgid:\t", tgid);
 	seq_put_decimal_ull(m, "\nNgid:\t", ngid);
@@ -313,8 +314,8 @@ static void render_cap_t(struct seq_file *m, const char *header,
 
 	seq_puts(m, header);
 	CAP_FOR_EACH_U32(__capi) {
-		seq_printf(m, "%08x",
-			   a->cap[CAP_LAST_U32 - __capi]);
+		seq_put_hex_ll(m, NULL,
+			   a->cap[CAP_LAST_U32 - __capi], 8);
 	}
 	seq_putc(m, '\n');
 }
@@ -368,7 +369,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
 
 static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
 {
-	seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state);
+	seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+	seq_putc(m, '\n');
 }
 
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -504,7 +506,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(task->real_start_time);
 
-	seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
+	seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
+	seq_puts(m, " (");
+	seq_puts(m, tcomm);
+	seq_puts(m, ") ");
+	seq_putc(m, state);
 	seq_put_decimal_ll(m, " ", ppid);
 	seq_put_decimal_ll(m, " ", pgid);
 	seq_put_decimal_ll(m, " ", sid);
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps
  2018-01-29  8:00 [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Andrei Vagin
                   ` (2 preceding siblings ...)
  2018-01-29  8:00 ` [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status Andrei Vagin
@ 2018-01-29 19:10 ` Alexey Dobriyan
  2018-01-29 21:09   ` Andrei Vagin
  3 siblings, 1 reply; 10+ messages in thread
From: Alexey Dobriyan @ 2018-01-29 19:10 UTC (permalink / raw)
  To: Andrei Vagin; +Cc: linux-fsdevel, Andrew Morton, KAMEZAWA Hiroyuki

On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> seq_printf(m, "%s%*d", str, width, val), but it works much faster.

It is called "minimum width", not alignment.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps
  2018-01-29 19:10 ` [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Alexey Dobriyan
@ 2018-01-29 21:09   ` Andrei Vagin
  2018-01-31 16:57     ` Alexey Dobriyan
  2018-02-09  0:18     ` Andrew Morton
  0 siblings, 2 replies; 10+ messages in thread
From: Andrei Vagin @ 2018-01-29 21:09 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: Andrei Vagin, linux-fsdevel, Andrew Morton, KAMEZAWA Hiroyuki

On Mon, Jan 29, 2018 at 10:10:42PM +0300, Alexey Dobriyan wrote:
> On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> > seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> > seq_printf(m, "%s%*d", str, width, val), but it works much faster.
> 
> It is called "minimum width", not alignment.

Yes, you are right. And it's used to align numbers in output files.

Maybe you could suggest a better name for this function?

Thanks,
Andrei

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps
  2018-01-29 21:09   ` Andrei Vagin
@ 2018-01-31 16:57     ` Alexey Dobriyan
  2018-02-09  0:18     ` Andrew Morton
  1 sibling, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2018-01-31 16:57 UTC (permalink / raw)
  To: Andrei Vagin
  Cc: Andrei Vagin, linux-fsdevel, Andrew Morton, KAMEZAWA Hiroyuki

On Mon, Jan 29, 2018 at 01:09:07PM -0800, Andrei Vagin wrote:
> On Mon, Jan 29, 2018 at 10:10:42PM +0300, Alexey Dobriyan wrote:
> > On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> > > seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> > > seq_printf(m, "%s%*d", str, width, val), but it works much faster.
> > 
> > It is called "minimum width", not alignment.
> 
> Yes, you are right. And it's used to align numbers in output files.
> 
> Maybe you could suggest a better name for this function?

I honestly don't know.
Contemplating printing integers backwards.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps
  2018-01-29 21:09   ` Andrei Vagin
  2018-01-31 16:57     ` Alexey Dobriyan
@ 2018-02-09  0:18     ` Andrew Morton
  2018-02-12  8:06       ` Andrei Vagin
  1 sibling, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2018-02-09  0:18 UTC (permalink / raw)
  To: Andrei Vagin
  Cc: Alexey Dobriyan, Andrei Vagin, linux-fsdevel, KAMEZAWA Hiroyuki

On Mon, 29 Jan 2018 13:09:07 -0800 Andrei Vagin <avagin@virtuozzo.com> wrote:

> On Mon, Jan 29, 2018 at 10:10:42PM +0300, Alexey Dobriyan wrote:
> > On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> > > seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> > > seq_printf(m, "%s%*d", str, width, val), but it works much faster.
> > 
> > It is called "minimum width", not alignment.
> 
> Yes, you are right. And it's used to align numbers in output files.
> 
> Maybe you could suggest a better name for this function?
> 

_width?  _min_width_?  _pad?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps
  2018-02-09  0:18     ` Andrew Morton
@ 2018-02-12  8:06       ` Andrei Vagin
  0 siblings, 0 replies; 10+ messages in thread
From: Andrei Vagin @ 2018-02-12  8:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexey Dobriyan, Andrei Vagin, linux-fsdevel, KAMEZAWA Hiroyuki

On Thu, Feb 08, 2018 at 04:18:25PM -0800, Andrew Morton wrote:
> On Mon, 29 Jan 2018 13:09:07 -0800 Andrei Vagin <avagin@virtuozzo.com> wrote:
> 
> > On Mon, Jan 29, 2018 at 10:10:42PM +0300, Alexey Dobriyan wrote:
> > > On Mon, Jan 29, 2018 at 12:00:40AM -0800, Andrei Vagin wrote:
> > > > seq_put_decimal_ull_align(m, str, val, width) is equivalent of
> > > > seq_printf(m, "%s%*d", str, width, val), but it works much faster.
> > > 
> > > It is called "minimum width", not alignment.
> > 
> > Yes, you are right. And it's used to align numbers in output files.
> > 
> > Maybe you could suggest a better name for this function?
> > 
> 
> _width?  _min_width_?  _pad?

I chose the first one. Thank you. The second version of these patches
has to be in your mailbox.

Thanks,
Andrei

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull
  2018-02-12  7:49 [PATCH 1/4 v2] proc: add seq_put_decimal_ull_width " Andrei Vagin
@ 2018-02-12  7:49 ` Andrei Vagin
  0 siblings, 0 replies; 10+ messages in thread
From: Andrei Vagin @ 2018-02-12  7:49 UTC (permalink / raw)
  To: linux-fsdevel, Andrew Morton; +Cc: Alexey Dobriyan, Andrei Vagin

A delimiter is a string which is printed before a number.
A syngle-symbol delimiters can be printed by set_putc() and this works
faster than printing by set_puts().

== test_proc.c

int main(int argc, char **argv)
{
	int n, i, fd;
	char buf[16384];

	n = atoi(argv[1]);
	for (i = 0; i < n; i++) {
		fd = open(argv[2], O_RDONLY);
		if (fd < 0)
			return 1;
		if (read(fd, buf, sizeof(buf)) <= 0)
			return 1;
		close(fd);
	}

	return 0;
}
==

$ time ./test_proc  1000000 /proc/1/stat

== Before patch ==
  real	0m3.820s
  user	0m0.337s
  sys	0m3.394s

== After patch ==
  real	0m3.110s
  user	0m0.324s
  sys	0m2.700s

Signed-off-by: Andrei Vagin <avagin@openvz.org>
---
 fs/seq_file.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index e6ec14a02a52..f58549542e73 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -693,12 +693,12 @@ void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
 	if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
-
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
 	if (!width)
 		width = 1;
@@ -776,12 +776,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 	if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
-
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
 	if (m->count + 2 >= m->size)
 		goto overflow;
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-02-12  8:06 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-29  8:00 [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Andrei Vagin
2018-01-29  8:00 ` [PATCH 2/4] proc: replace seq_printf on seq_putc " Andrei Vagin
2018-01-29  8:00 ` [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull Andrei Vagin
2018-01-29  8:00 ` [PATCH 4/4] proc: replace seq_printf by seq_put_smth to speed up /proc/pid/status Andrei Vagin
2018-01-29 19:10 ` [PATCH 1/4] proc: add seq_put_decimal_ull_align to speed up /proc/pid/smaps Alexey Dobriyan
2018-01-29 21:09   ` Andrei Vagin
2018-01-31 16:57     ` Alexey Dobriyan
2018-02-09  0:18     ` Andrew Morton
2018-02-12  8:06       ` Andrei Vagin
2018-02-12  7:49 [PATCH 1/4 v2] proc: add seq_put_decimal_ull_width " Andrei Vagin
2018-02-12  7:49 ` [PATCH 3/4] proc: optimize single-symbol delimiters to spead up seq_put_decimal_ull Andrei Vagin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.