All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] proc: faster /proc/*/status
@ 2016-08-06 12:56 Alexey Dobriyan
  2016-08-06 22:41 ` Joe Perches
  2016-08-07  3:16 ` [PATCH] proc: faster /proc/*/status Andi Kleen
  0 siblings, 2 replies; 13+ messages in thread
From: Alexey Dobriyan @ 2016-08-06 12:56 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel

top(1) opens the following files for every PID:

	/proc/*/stat
	/proc/*/statm
	/proc/*/status

This patch switches /proc/*/status away from seq_printf().
The result is 13.5% speedup.

Benchmark is open("/proc/self/status")+read+close 1.000.000 million times.

				BEFORE
$ perf stat -r 10 taskset -c 3 ./proc-self-status

 Performance counter stats for 'taskset -c 3 ./proc-self-status' (10 runs):

      10748.474301      task-clock (msec)         #    0.954 CPUs utilized            ( +-  0.91% )
                12      context-switches          #    0.001 K/sec                    ( +-  1.09% )
                 1      cpu-migrations            #    0.000 K/sec
               104      page-faults               #    0.010 K/sec                    ( +-  0.45% )
    37,424,127,876      cycles                    #    3.482 GHz                      ( +-  0.04% )
     8,453,010,029      stalled-cycles-frontend   #   22.59% frontend cycles idle     ( +-  0.12% )
     3,747,609,427      stalled-cycles-backend    #  10.01% backend cycles idle       ( +-  0.68% )
    65,632,764,147      instructions              #    1.75  insn per cycle
                                                  #    0.13  stalled cycles per insn  ( +-  0.00% )
    13,981,324,775      branches                  # 1300.773 M/sec                    ( +-  0.00% )
       138,967,110      branch-misses             #    0.99% of all branches          ( +-  0.18% )

      11.263885428 seconds time elapsed                                          ( +-  0.04% )
      ^^^^^^^^^^^^

				AFTER
$ perf stat -r 10 taskset -c 3 ./proc-self-status

 Performance counter stats for 'taskset -c 3 ./proc-self-status' (10 runs):

       9010.521776      task-clock (msec)         #    0.925 CPUs utilized            ( +-  1.54% )
                11      context-switches          #    0.001 K/sec                    ( +-  1.54% )
                 1      cpu-migrations            #    0.000 K/sec                    ( +- 11.11% )
               103      page-faults               #    0.011 K/sec                    ( +-  0.60% )
    32,352,310,603      cycles                    #    3.591 GHz                      ( +-  0.07% )
     7,849,199,578      stalled-cycles-frontend   #   24.26% frontend cycles idle     ( +-  0.27% )
     3,269,738,842      stalled-cycles-backend    #  10.11% backend cycles idle       ( +-  0.73% )
    56,012,163,567      instructions              #    1.73  insn per cycle
                                                  #    0.14  stalled cycles per insn  ( +-  0.00% )
    11,735,778,795      branches                  # 1302.453 M/sec                    ( +-  0.00% )
        98,084,459      branch-misses             #    0.84% of all branches          ( +-  0.28% )

       9.741247736 seconds time elapsed                                          ( +-  0.07% )
       ^^^^^^^^^^^

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 fs/proc/array.c |   87 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 47 insertions(+), 40 deletions(-)

--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -186,51 +186,52 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	task_unlock(p);
 	rcu_read_unlock();
 
-	seq_printf(m,
-		"State:\t%s\n"
-		"Tgid:\t%d\n"
-		"Ngid:\t%d\n"
-		"Pid:\t%d\n"
-		"PPid:\t%d\n"
-		"TracerPid:\t%d\n"
-		"Uid:\t%d\t%d\t%d\t%d\n"
-		"Gid:\t%d\t%d\t%d\t%d\n"
-		"FDSize:\t%d\nGroups:\t",
-		get_task_state(p),
-		tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
-		from_kuid_munged(user_ns, cred->uid),
-		from_kuid_munged(user_ns, cred->euid),
-		from_kuid_munged(user_ns, cred->suid),
-		from_kuid_munged(user_ns, cred->fsuid),
-		from_kgid_munged(user_ns, cred->gid),
-		from_kgid_munged(user_ns, cred->egid),
-		from_kgid_munged(user_ns, cred->sgid),
-		from_kgid_munged(user_ns, cred->fsgid),
-		max_fds);
-
+	seq_printf(m, "State:\t%s", get_task_state(p));
+
+	seq_puts(m, "\nTgid:\t");
+	seq_put_decimal_ull(m, 0, tgid);
+	seq_puts(m, "\nNgid:\t");
+	seq_put_decimal_ull(m, 0, ngid);
+	seq_puts(m, "\nPid:\t");
+	seq_put_decimal_ull(m, 0, pid_nr_ns(pid, ns));
+	seq_puts(m, "\nPPid:\t");
+	seq_put_decimal_ull(m, 0, ppid);
+	seq_puts(m, "\nTracerPid:\t");
+	seq_put_decimal_ull(m, 0, tpid);
+	seq_puts(m, "\nUid:");
+	seq_put_decimal_ull(m, '\t', from_kuid_munged(user_ns, cred->uid));
+	seq_put_decimal_ull(m, '\t', from_kuid_munged(user_ns, cred->euid));
+	seq_put_decimal_ull(m, '\t', from_kuid_munged(user_ns, cred->suid));
+	seq_put_decimal_ull(m, '\t', from_kuid_munged(user_ns, cred->fsuid));
+	seq_puts(m, "\nGid:");
+	seq_put_decimal_ull(m, '\t', from_kgid_munged(user_ns, cred->gid));
+	seq_put_decimal_ull(m, '\t', from_kgid_munged(user_ns, cred->egid));
+	seq_put_decimal_ull(m, '\t', from_kgid_munged(user_ns, cred->sgid));
+	seq_put_decimal_ull(m, '\t', from_kgid_munged(user_ns, cred->fsgid));
+	seq_puts(m, "\nFDSize:\t");
+	seq_put_decimal_ull(m, 0, max_fds);
+
+	seq_puts(m, "\nGroups:\t");
 	group_info = cred->group_info;
 	for (g = 0; g < group_info->ngroups; g++)
-		seq_printf(m, "%d ",
-			   from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
+		seq_put_decimal_ull(m, g ? ' ' : 0, from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
 	put_cred(cred);
+	/* Trailing space shouldn't have been added in the first place. */
+	seq_putc(m, ' ');
 
 #ifdef CONFIG_PID_NS
 	seq_puts(m, "\nNStgid:");
 	for (g = ns->level; g <= pid->level; g++)
-		seq_printf(m, "\t%d",
-			task_tgid_nr_ns(p, pid->numbers[g].ns));
+		seq_put_decimal_ull(m, '\t', task_tgid_nr_ns(p, pid->numbers[g].ns));
 	seq_puts(m, "\nNSpid:");
 	for (g = ns->level; g <= pid->level; g++)
-		seq_printf(m, "\t%d",
-			task_pid_nr_ns(p, pid->numbers[g].ns));
+		seq_put_decimal_ull(m, '\t', task_pid_nr_ns(p, pid->numbers[g].ns));
 	seq_puts(m, "\nNSpgid:");
 	for (g = ns->level; g <= pid->level; g++)
-		seq_printf(m, "\t%d",
-			task_pgrp_nr_ns(p, pid->numbers[g].ns));
+		seq_put_decimal_ull(m, '\t', task_pgrp_nr_ns(p, pid->numbers[g].ns));
 	seq_puts(m, "\nNSsid:");
 	for (g = ns->level; g <= pid->level; g++)
-		seq_printf(m, "\t%d",
-			task_session_nr_ns(p, pid->numbers[g].ns));
+		seq_put_decimal_ull(m, '\t', task_session_nr_ns(p, pid->numbers[g].ns));
 #endif
 	seq_putc(m, '\n');
 }
@@ -299,11 +300,14 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 		unlock_task_sighand(p, &flags);
 	}
 
-	seq_printf(m, "Threads:\t%d\n", num_threads);
-	seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim);
+	seq_puts(m, "Threads:\t");
+	seq_put_decimal_ull(m, 0, num_threads);
+	seq_puts(m, "\nSigQ:\t");
+	seq_put_decimal_ull(m, 0, qsize);
+	seq_put_decimal_ull(m, '/', qlim);
 
 	/* render them all */
-	render_sigset_t(m, "SigPnd:\t", &pending);
+	render_sigset_t(m, "\nSigPnd:\t", &pending);
 	render_sigset_t(m, "ShdPnd:\t", &shpending);
 	render_sigset_t(m, "SigBlk:\t", &blocked);
 	render_sigset_t(m, "SigIgn:\t", &ignored);
@@ -348,17 +352,20 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
 {
 #ifdef CONFIG_SECCOMP
-	seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode);
+	seq_puts(m, "Seccomp:\t");
+	seq_put_decimal_ull(m, 0, p->seccomp.mode);
+	seq_putc(m, '\n');
 #endif
 }
 
 static inline void task_context_switch_counts(struct seq_file *m,
 						struct task_struct *p)
 {
-	seq_printf(m,	"voluntary_ctxt_switches:\t%lu\n"
-			"nonvoluntary_ctxt_switches:\t%lu\n",
-			p->nvcsw,
-			p->nivcsw);
+	seq_puts(m, "voluntary_ctxt_switches:\t");
+	seq_put_decimal_ull(m, 0, p->nvcsw);
+	seq_puts(m, "\nnonvoluntary_ctxt_switches:\t");
+	seq_put_decimal_ull(m, 0, p->nivcsw);
+	seq_putc(m, '\n');
 }
 
 static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] proc: faster /proc/*/status
  2016-08-06 12:56 [PATCH] proc: faster /proc/*/status Alexey Dobriyan
@ 2016-08-06 22:41 ` Joe Perches
  2016-08-09  3:02   ` [PATCH] seq/proc: Modify seq_put_decimal_[u]ll to take a const char *, not char Joe Perches
  2016-08-07  3:16 ` [PATCH] proc: faster /proc/*/status Andi Kleen
  1 sibling, 1 reply; 13+ messages in thread
From: Joe Perches @ 2016-08-06 22:41 UTC (permalink / raw)
  To: Alexey Dobriyan, akpm; +Cc: linux-kernel

On Sat, 2016-08-06 at 15:56 +0300, Alexey Dobriyan wrote:
> top(1) opens the following files for every PID:
> 
> 	/proc/*/stat
> 	/proc/*/statm
> 	/proc/*/status
> 
> This patch switches /proc/*/status away from seq_printf().
> The result is 13.5% speedup.

If this is really an important consideration, perhaps
seq_put_decimal_ull (_ll too) should be changed from
	void seq_put_decimal_ull(seq_file *, char, unsigned long long)
to
	void seq_put_decimal_ull(seq_file *, const char *, unsigned long)
> fs/proc/array.c |   87 ++++++++++++++++++++++++++++++--------------------------
[]
> @@ -186,51 +186,52 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
>  	task_unlock(p);
>  	rcu_read_unlock();
>  
> -	seq_printf(m,
> -		"State:\t%s\n"
> -		"Tgid:\t%d\n"
> -		"Ngid:\t%d\n"
> -		"Pid:\t%d\n"
> -		"PPid:\t%d\n"
> -		"TracerPid:\t%d\n"
> -		"Uid:\t%d\t%d\t%d\t%d\n"
> -		"Gid:\t%d\t%d\t%d\t%d\n"
> -		"FDSize:\t%d\nGroups:\t",
> -		get_task_state(p),
> -		tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
> -		from_kuid_munged(user_ns, cred->uid),
> -		from_kuid_munged(user_ns, cred->euid),
> -		from_kuid_munged(user_ns, cred->suid),
> -		from_kuid_munged(user_ns, cred->fsuid),
> -		from_kgid_munged(user_ns, cred->gid),
> -		from_kgid_munged(user_ns, cred->egid),
> -		from_kgid_munged(user_ns, cred->sgid),
> -		from_kgid_munged(user_ns, cred->fsgid),
> -		max_fds);
> -
> +	seq_printf(m, "State:\t%s", get_task_state(p));
> +
> +	seq_puts(m, "\nTgid:\t");
> +	seq_put_decimal_ull(m, 0, tgid);

[etc...]

Perhaps too the conversions from %d to %llu are inappropriate
and seq_put_decimal and seq_put_decimal_u should be added.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] proc: faster /proc/*/status
  2016-08-06 12:56 [PATCH] proc: faster /proc/*/status Alexey Dobriyan
  2016-08-06 22:41 ` Joe Perches
@ 2016-08-07  3:16 ` Andi Kleen
  2016-08-07  8:53   ` Alexey Dobriyan
  1 sibling, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2016-08-07  3:16 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: akpm, linux-kernel

Alexey Dobriyan <adobriyan@gmail.com> writes:
> -
> +	seq_printf(m, "State:\t%s", get_task_state(p));
> +
> +	seq_puts(m, "\nTgid:\t");

The only different should be the format string.

Scanning the format string really shouldn't be that expensive?!?

It would be better if you could find out why that is slow and optimize
it. Then you would benefit every seq_printf user, not just this
special case.

Perhaps it could benefit from some of the bit masking tricks to
scan the string with wider tests than a word.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] proc: faster /proc/*/status
  2016-08-07  3:16 ` [PATCH] proc: faster /proc/*/status Andi Kleen
@ 2016-08-07  8:53   ` Alexey Dobriyan
  2016-08-07 16:59     ` Andi Kleen
  0 siblings, 1 reply; 13+ messages in thread
From: Alexey Dobriyan @ 2016-08-07  8:53 UTC (permalink / raw)
  To: Andi Kleen; +Cc: akpm, linux-kernel, viro

On Sat, Aug 06, 2016 at 08:16:27PM -0700, Andi Kleen wrote:
> Alexey Dobriyan <adobriyan@gmail.com> writes:
> > -
> > +	seq_printf(m, "State:\t%s", get_task_state(p));
> > +
> > +	seq_puts(m, "\nTgid:\t");
> 
> The only different should be the format string.
> 
> Scanning the format string really shouldn't be that expensive?!?

Surprise, it is (see my reply to Al).

What seq_put_decimal_ull() did is the equivalent of

	seq << "foo";
	seq << bar;
	seq << '\n';

No precisions, not widths, no padding, no upper and lowercasing.

> It would be better if you could find out why that is slow and optimize
> it. Then you would benefit every seq_printf user, not just this
> special case.
> 
> Perhaps it could benefit from some of the bit masking tricks to
> scan the string with wider tests than a word.

And then what? Parsing format string is still be there.

This is first line of profile of the first function (format_decode)

       │     static noinline_for_stack
       │     int format_decode(const char *fmt, struct printf_spec *spec)
       │     {
 10.38 │       push   %rbp			<===
  1.07 │       mov    %rsp,%rbp
  1.09 │       push   %r12
  4.51 │       mov    %rsi,%r12
  1.40 │       push   %rbx
  1.86 │       mov    %rdi,%rbx
       │       sub    $0x8,%rsp

It is so bloated that gcc needs to be asked to not screw up with stack
size.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] proc: faster /proc/*/status
  2016-08-07  8:53   ` Alexey Dobriyan
@ 2016-08-07 16:59     ` Andi Kleen
  2016-08-07 17:39       ` Joe Perches
  0 siblings, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2016-08-07 16:59 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: Andi Kleen, akpm, linux-kernel, viro

> And then what? Parsing format string is still be there.

Perhaps could have a fast path for simple cases.

> 
> This is first line of profile of the first function (format_decode)
> 
>        │     static noinline_for_stack
>        │     int format_decode(const char *fmt, struct printf_spec *spec)
>        │     {
>  10.38 │       push   %rbp			<===
>   1.07 │       mov    %rsp,%rbp
>   1.09 │       push   %r12
>   4.51 │       mov    %rsi,%r12
>   1.40 │       push   %rbx
>   1.86 │       mov    %rdi,%rbx
>        │       sub    $0x8,%rsp
> 
> It is so bloated that gcc needs to be asked to not screw up with stack
> size.

What happens when you drop all the noinlines for this? I assume
this would alread make it faster. And now that we have bigger
stacks we can likely tolerate it.


-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] proc: faster /proc/*/status
  2016-08-07 16:59     ` Andi Kleen
@ 2016-08-07 17:39       ` Joe Perches
  2016-08-07 17:44         ` Andi Kleen
  0 siblings, 1 reply; 13+ messages in thread
From: Joe Perches @ 2016-08-07 17:39 UTC (permalink / raw)
  To: Andi Kleen, Alexey Dobriyan; +Cc: akpm, linux-kernel, viro

On Sun, 2016-08-07 at 09:59 -0700, Andi Kleen wrote:
> > And then what? Parsing format string is still be there.
> Perhaps could have a fast path for simple cases.
> > 
> > This is first line of profile of the first function (format_decode)
> > 
> >        │     static noinline_for_stack
> >        │     int format_decode(const char *fmt, struct printf_spec *spec)
> >        │     {
> >  10.38 │       push   %rbp			<===
> >   1.07 │       mov    %rsp,%rbp
> >   1.09 │       push   %r12
> >   4.51 │       mov    %rsi,%r12
> >   1.40 │       push   %rbx
> >   1.86 │       mov    %rdi,%rbx
> >        │       sub    $0x8,%rsp
> > 
> > It is so bloated that gcc needs to be asked to not screw up with stack
> > size.
> What happens when you drop all the noinlines for this? I assume
> this would alread make it faster. And now that we have bigger
> stacks we can likely tolerate it.

%pV recurses through these code paths.
I believe the maximum current recursion depth is 3.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] proc: faster /proc/*/status
  2016-08-07 17:39       ` Joe Perches
@ 2016-08-07 17:44         ` Andi Kleen
  2016-08-07 21:22           ` Joe Perches
  0 siblings, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2016-08-07 17:44 UTC (permalink / raw)
  To: Joe Perches; +Cc: Andi Kleen, Alexey Dobriyan, akpm, linux-kernel, viro

> > > It is so bloated that gcc needs to be asked to not screw up with stack
> > > size.
> > What happens when you drop all the noinlines for this? I assume
> > this would alread make it faster. And now that we have bigger
> > stacks we can likely tolerate it.
> 
> %pV recurses through these code paths.
> I believe the maximum current recursion depth is 3.

I assume 2 max would sufficient for all users in kernel.

And perhaps it would be better to get rid of "features" like this,
and instead focus on more common cases.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] proc: faster /proc/*/status
  2016-08-07 17:44         ` Andi Kleen
@ 2016-08-07 21:22           ` Joe Perches
  2016-08-09 23:25             ` Andrew Morton
  0 siblings, 1 reply; 13+ messages in thread
From: Joe Perches @ 2016-08-07 21:22 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Alexey Dobriyan, akpm, linux-kernel, viro

On Sun, 2016-08-07 at 10:44 -0700, Andi Kleen wrote:
> > > > It is so bloated that gcc needs to be asked to not screw up with stack
> > > > size.
> > > What happens when you drop all the noinlines for this? I assume
> > > this would alread make it faster. And now that we have bigger
> > > stacks we can likely tolerate it.
> > %pV recurses through these code paths.
> > I believe the maximum current recursion depth is 3.
> I assume 2 max would sufficient for all users in kernel.
> 
> And perhaps it would be better to get rid of "features" like this,
> and instead focus on more common cases.

It'd be a dubious trade-off in my opinion.

Overall code size has been reduced by hundreds of KB
by using %pV.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] seq/proc: Modify seq_put_decimal_[u]ll to take a const char *, not char
  2016-08-06 22:41 ` Joe Perches
@ 2016-08-09  3:02   ` Joe Perches
  0 siblings, 0 replies; 13+ messages in thread
From: Joe Perches @ 2016-08-09  3:02 UTC (permalink / raw)
  To: Alexey Dobriyan, Alexander Viro
  Cc: Andrew Morton, linux-kernel, linux-fsdevel

Allow some seq_puts removals by taking a string instead of a single char.

Signed-off-by: Joe Perches <joe@perches.com>
---

On top of Alexey's patch, this would also save a couple percent CPU

 fs/proc/array.c          | 178 ++++++++++++++++++++++-------------------------
 fs/proc/stat.c           |  49 +++++++------
 fs/seq_file.c            |  57 +++++++++++----
 include/linux/seq_file.h |   4 +-
 4 files changed, 153 insertions(+), 135 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5e7d252..d25b446 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -188,33 +188,26 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 
 	seq_printf(m, "State:\t%s", get_task_state(p));
 
-	seq_puts(m, "\nTgid:\t");
-	seq_put_decimal_ull(m, 0, tgid);
-	seq_puts(m, "\nNgid:\t");
-	seq_put_decimal_ull(m, 0, ngid);
-	seq_puts(m, "\nPid:\t");
-	seq_put_decimal_ull(m, 0, pid_nr_ns(pid, ns));
-	seq_puts(m, "\nPPid:\t");
-	seq_put_decimal_ull(m, 0, ppid);
-	seq_puts(m, "\nTracerPid:\t");
-	seq_put_decimal_ull(m, 0, tpid);
-	seq_puts(m, "\nUid:");
-	seq_put_decimal_ull(m, '\t', from_kuid_munged(user_ns, cred->uid));
-	seq_put_decimal_ull(m, '\t', from_kuid_munged(user_ns, cred->euid));
-	seq_put_decimal_ull(m, '\t', from_kuid_munged(user_ns, cred->suid));
-	seq_put_decimal_ull(m, '\t', from_kuid_munged(user_ns, cred->fsuid));
-	seq_puts(m, "\nGid:");
-	seq_put_decimal_ull(m, '\t', from_kgid_munged(user_ns, cred->gid));
-	seq_put_decimal_ull(m, '\t', from_kgid_munged(user_ns, cred->egid));
-	seq_put_decimal_ull(m, '\t', from_kgid_munged(user_ns, cred->sgid));
-	seq_put_decimal_ull(m, '\t', from_kgid_munged(user_ns, cred->fsgid));
-	seq_puts(m, "\nFDSize:\t");
-	seq_put_decimal_ull(m, 0, max_fds);
+	seq_put_decimal_ull(m, "\nTgid:\t", tgid);
+	seq_put_decimal_ull(m, "\nNgid:\t", ngid);
+	seq_put_decimal_ull(m, "\nPid:\t", pid_nr_ns(pid, ns));
+	seq_put_decimal_ull(m, "\nPPid:\t", ppid);
+	seq_put_decimal_ull(m, "\nTracerPid:\t", tpid);
+	seq_put_decimal_ull(m, "\nUid:\t", from_kuid_munged(user_ns, cred->uid));
+	seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->euid));
+	seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->suid));
+	seq_put_decimal_ull(m, "\t", from_kuid_munged(user_ns, cred->fsuid));
+	seq_put_decimal_ull(m, "\nGid:\t", from_kgid_munged(user_ns, cred->gid));
+	seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->egid));
+	seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->sgid));
+	seq_put_decimal_ull(m, "\t", from_kgid_munged(user_ns, cred->fsgid));
+	seq_put_decimal_ull(m, "\nFDSize:\t", max_fds);
 
 	seq_puts(m, "\nGroups:\t");
 	group_info = cred->group_info;
 	for (g = 0; g < group_info->ngroups; g++)
-		seq_put_decimal_ull(m, g ? ' ' : 0, from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
+		seq_put_decimal_ull(m, g ? " " : "",
+				    from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
 	put_cred(cred);
 	/* Trailing space shouldn't have been added in the first place. */
 	seq_putc(m, ' ');
@@ -222,16 +215,16 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 #ifdef CONFIG_PID_NS
 	seq_puts(m, "\nNStgid:");
 	for (g = ns->level; g <= pid->level; g++)
-		seq_put_decimal_ull(m, '\t', task_tgid_nr_ns(p, pid->numbers[g].ns));
+		seq_put_decimal_ull(m, "\t", task_tgid_nr_ns(p, pid->numbers[g].ns));
 	seq_puts(m, "\nNSpid:");
 	for (g = ns->level; g <= pid->level; g++)
-		seq_put_decimal_ull(m, '\t', task_pid_nr_ns(p, pid->numbers[g].ns));
+		seq_put_decimal_ull(m, "\t", task_pid_nr_ns(p, pid->numbers[g].ns));
 	seq_puts(m, "\nNSpgid:");
 	for (g = ns->level; g <= pid->level; g++)
-		seq_put_decimal_ull(m, '\t', task_pgrp_nr_ns(p, pid->numbers[g].ns));
+		seq_put_decimal_ull(m, "\t", task_pgrp_nr_ns(p, pid->numbers[g].ns));
 	seq_puts(m, "\nNSsid:");
 	for (g = ns->level; g <= pid->level; g++)
-		seq_put_decimal_ull(m, '\t', task_session_nr_ns(p, pid->numbers[g].ns));
+		seq_put_decimal_ull(m, "\t", task_session_nr_ns(p, pid->numbers[g].ns));
 #endif
 	seq_putc(m, '\n');
 }
@@ -300,11 +293,9 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 		unlock_task_sighand(p, &flags);
 	}
 
-	seq_puts(m, "Threads:\t");
-	seq_put_decimal_ull(m, 0, num_threads);
-	seq_puts(m, "\nSigQ:\t");
-	seq_put_decimal_ull(m, 0, qsize);
-	seq_put_decimal_ull(m, '/', qlim);
+	seq_put_decimal_ull(m, "Threads:\t", num_threads);
+	seq_put_decimal_ull(m, "\nSigQ:\t", qsize);
+	seq_put_decimal_ull(m, "/", qlim);
 
 	/* render them all */
 	render_sigset_t(m, "\nSigPnd:\t", &pending);
@@ -352,8 +343,7 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
 {
 #ifdef CONFIG_SECCOMP
-	seq_puts(m, "Seccomp:\t");
-	seq_put_decimal_ull(m, 0, p->seccomp.mode);
+	seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode);
 	seq_putc(m, '\n');
 #endif
 }
@@ -361,10 +351,8 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
 static inline void task_context_switch_counts(struct seq_file *m,
 						struct task_struct *p)
 {
-	seq_puts(m, "voluntary_ctxt_switches:\t");
-	seq_put_decimal_ull(m, 0, p->nvcsw);
-	seq_puts(m, "\nnonvoluntary_ctxt_switches:\t");
-	seq_put_decimal_ull(m, 0, p->nivcsw);
+	seq_put_decimal_ull(m, "voluntary_ctxt_switches:\t", p->nvcsw);
+	seq_put_decimal_ull(m, "\nnonvoluntary_ctxt_switches:\t", p->nivcsw);
 	seq_putc(m, '\n');
 }
 
@@ -497,41 +485,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	start_time = nsec_to_clock_t(task->real_start_time);
 
 	seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
-	seq_put_decimal_ll(m, ' ', ppid);
-	seq_put_decimal_ll(m, ' ', pgid);
-	seq_put_decimal_ll(m, ' ', sid);
-	seq_put_decimal_ll(m, ' ', tty_nr);
-	seq_put_decimal_ll(m, ' ', tty_pgrp);
-	seq_put_decimal_ull(m, ' ', task->flags);
-	seq_put_decimal_ull(m, ' ', min_flt);
-	seq_put_decimal_ull(m, ' ', cmin_flt);
-	seq_put_decimal_ull(m, ' ', maj_flt);
-	seq_put_decimal_ull(m, ' ', cmaj_flt);
-	seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
-	seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
-	seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
-	seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
-	seq_put_decimal_ll(m, ' ', priority);
-	seq_put_decimal_ll(m, ' ', nice);
-	seq_put_decimal_ll(m, ' ', num_threads);
-	seq_put_decimal_ull(m, ' ', 0);
-	seq_put_decimal_ull(m, ' ', start_time);
-	seq_put_decimal_ull(m, ' ', vsize);
-	seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0);
-	seq_put_decimal_ull(m, ' ', rsslim);
-	seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
-	seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
-	seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
-	seq_put_decimal_ull(m, ' ', esp);
-	seq_put_decimal_ull(m, ' ', eip);
+	seq_put_decimal_ll(m, " ", ppid);
+	seq_put_decimal_ll(m, " ", pgid);
+	seq_put_decimal_ll(m, " ", sid);
+	seq_put_decimal_ll(m, " ", tty_nr);
+	seq_put_decimal_ll(m, " ", tty_pgrp);
+	seq_put_decimal_ull(m, " ", task->flags);
+	seq_put_decimal_ull(m, " ", min_flt);
+	seq_put_decimal_ull(m, " ", cmin_flt);
+	seq_put_decimal_ull(m, " ", maj_flt);
+	seq_put_decimal_ull(m, " ", cmaj_flt);
+	seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime));
+	seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime));
+	seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime));
+	seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime));
+	seq_put_decimal_ll(m, " ", priority);
+	seq_put_decimal_ll(m, " ", nice);
+	seq_put_decimal_ll(m, " ", num_threads);
+	seq_put_decimal_ull(m, " ", 0);
+	seq_put_decimal_ull(m, " ", start_time);
+	seq_put_decimal_ull(m, " ", vsize);
+	seq_put_decimal_ull(m, " ", mm ? get_mm_rss(mm) : 0);
+	seq_put_decimal_ull(m, " ", rsslim);
+	seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->start_code : 1) : 0);
+	seq_put_decimal_ull(m, " ", mm ? (permitted ? mm->end_code : 1) : 0);
+	seq_put_decimal_ull(m, " ", (permitted && mm) ? mm->start_stack : 0);
+	seq_put_decimal_ull(m, " ", esp);
+	seq_put_decimal_ull(m, " ", eip);
 	/* The signal information here is obsolete.
 	 * It must be decimal for Linux 2.0 compatibility.
 	 * Use /proc/#/status for real-time signals.
 	 */
-	seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
-	seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
-	seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
-	seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
+	seq_put_decimal_ull(m, " ", task->pending.signal.sig[0] & 0x7fffffffUL);
+	seq_put_decimal_ull(m, " ", task->blocked.sig[0] & 0x7fffffffUL);
+	seq_put_decimal_ull(m, " ", sigign.sig[0] & 0x7fffffffUL);
+	seq_put_decimal_ull(m, " ", sigcatch.sig[0] & 0x7fffffffUL);
 
 	/*
 	 * We used to output the absolute kernel address, but that's an
@@ -545,31 +533,31 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	else
 		seq_puts(m, " 0");
 
-	seq_put_decimal_ull(m, ' ', 0);
-	seq_put_decimal_ull(m, ' ', 0);
-	seq_put_decimal_ll(m, ' ', task->exit_signal);
-	seq_put_decimal_ll(m, ' ', task_cpu(task));
-	seq_put_decimal_ull(m, ' ', task->rt_priority);
-	seq_put_decimal_ull(m, ' ', task->policy);
-	seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
-	seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
-	seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
+	seq_put_decimal_ull(m, " ", 0);
+	seq_put_decimal_ull(m, " ", 0);
+	seq_put_decimal_ll(m, " ", task->exit_signal);
+	seq_put_decimal_ll(m, " ", task_cpu(task));
+	seq_put_decimal_ull(m, " ", task->rt_priority);
+	seq_put_decimal_ull(m, " ", task->policy);
+	seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task));
+	seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime));
+	seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime));
 
 	if (mm && permitted) {
-		seq_put_decimal_ull(m, ' ', mm->start_data);
-		seq_put_decimal_ull(m, ' ', mm->end_data);
-		seq_put_decimal_ull(m, ' ', mm->start_brk);
-		seq_put_decimal_ull(m, ' ', mm->arg_start);
-		seq_put_decimal_ull(m, ' ', mm->arg_end);
-		seq_put_decimal_ull(m, ' ', mm->env_start);
-		seq_put_decimal_ull(m, ' ', mm->env_end);
+		seq_put_decimal_ull(m, " ", mm->start_data);
+		seq_put_decimal_ull(m, " ", mm->end_data);
+		seq_put_decimal_ull(m, " ", mm->start_brk);
+		seq_put_decimal_ull(m, " ", mm->arg_start);
+		seq_put_decimal_ull(m, " ", mm->arg_end);
+		seq_put_decimal_ull(m, " ", mm->env_start);
+		seq_put_decimal_ull(m, " ", mm->env_end);
 	} else
-		seq_printf(m, " 0 0 0 0 0 0 0");
+		seq_puts(m, " 0 0 0 0 0 0 0");
 
 	if (permitted)
-		seq_put_decimal_ll(m, ' ', task->exit_code);
+		seq_put_decimal_ll(m, " ", task->exit_code);
 	else
-		seq_put_decimal_ll(m, ' ', 0);
+		seq_puts(m, " 0");
 
 	seq_putc(m, '\n');
 	if (mm)
@@ -605,13 +593,13 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
 	 * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
 	 *               size, resident, shared, text, data);
 	 */
-	seq_put_decimal_ull(m, 0, size);
-	seq_put_decimal_ull(m, ' ', resident);
-	seq_put_decimal_ull(m, ' ', shared);
-	seq_put_decimal_ull(m, ' ', text);
-	seq_put_decimal_ull(m, ' ', 0);
-	seq_put_decimal_ull(m, ' ', data);
-	seq_put_decimal_ull(m, ' ', 0);
+	seq_put_decimal_ull(m, "", size);
+	seq_put_decimal_ull(m, " ", resident);
+	seq_put_decimal_ull(m, " ", shared);
+	seq_put_decimal_ull(m, " ", text);
+	seq_put_decimal_ull(m, " ", 0);
+	seq_put_decimal_ull(m, " ", data);
+	seq_put_decimal_ull(m, " ", 0);
 	seq_putc(m, '\n');
 
 	return 0;
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 7907e45..d700c42 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -115,17 +115,16 @@ static int show_stat(struct seq_file *p, void *v)
 	}
 	sum += arch_irq_stat();
 
-	seq_puts(p, "cpu ");
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
-	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+	seq_put_decimal_ull(p, "cpu  ", cputime64_to_clock_t(user));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+	seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
 	seq_putc(p, '\n');
 
 	for_each_online_cpu(i) {
@@ -141,23 +140,23 @@ static int show_stat(struct seq_file *p, void *v)
 		guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
 		guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
 		seq_printf(p, "cpu%d", i);
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
-		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
+		seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
 		seq_putc(p, '\n');
 	}
-	seq_printf(p, "intr %llu", (unsigned long long)sum);
+	seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
 	for_each_irq_nr(j)
-		seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
+		seq_put_decimal_ull(p, " ", kstat_irqs_usr(j));
 
 	seq_printf(p,
 		"\nctxt %llu\n"
@@ -171,10 +170,10 @@ static int show_stat(struct seq_file *p, void *v)
 		nr_running(),
 		nr_iowait());
 
-	seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
+	seq_put_decimal_ull(p, "softirq ", (unsigned long long)sum_softirq);
 
 	for (i = 0; i < NR_SOFTIRQS; i++)
-		seq_put_decimal_ull(p, ' ', per_softirq_sums[i]);
+		seq_put_decimal_ull(p, " ", per_softirq_sums[i]);
 	seq_putc(p, '\n');
 
 	return 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 19f532e..b8ac757e 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -677,11 +677,11 @@ EXPORT_SYMBOL(seq_puts);
 /*
  * A helper routine for putting decimal numbers without rich format of printf().
  * only 'unsigned long long' is supported.
- * This routine will put one byte delimiter + number into seq_file.
+ * This routine will put strlen(delimiter) + number into seq_file.
  * This routine is very quick when you show lots of numbers.
  * In usual cases, it will be better to use seq_printf(). It's easier to read.
  */
-void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 			 unsigned long long num)
 {
 	int len;
@@ -689,8 +689,15 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter,
 	if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	if (delimiter)
-		m->buf[m->count++] = delimiter;
+	len = strlen(delimiter);
+	if (m->count + len >= m->size)
+		goto overflow;
+
+	memcpy(m->buf + m->count, delimiter, len);
+	m->count += len;
+
+	if (m->count + 1 >= m->size)
+		goto overflow;
 
 	if (num < 10) {
 		m->buf[m->count++] = num + '0';
@@ -700,6 +707,7 @@ void seq_put_decimal_ull(struct seq_file *m, char delimiter,
 	len = num_to_str(m->buf + m->count, m->size - m->count, num);
 	if (!len)
 		goto overflow;
+
 	m->count += len;
 	return;
 
@@ -708,19 +716,42 @@ overflow:
 }
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
-void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num)
+void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num)
 {
+	int len;
+
+	if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
+		goto overflow;
+
+	len = strlen(delimiter);
+	if (m->count + len >= m->size)
+		goto overflow;
+
+	memcpy(m->buf + m->count, delimiter, len);
+	m->count += len;
+
+	if (m->count + 2 >= m->size)
+		goto overflow;
+
 	if (num < 0) {
-		if (m->count + 3 >= m->size) {
-			seq_set_overflow(m);
-			return;
-		}
-		if (delimiter)
-			m->buf[m->count++] = delimiter;
+		m->buf[m->count++] = '-';
 		num = -num;
-		delimiter = '-';
 	}
-	seq_put_decimal_ull(m, delimiter, num);
+
+	if (num < 10) {
+		m->buf[m->count++] = num + '0';
+		return;
+	}
+
+	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	if (!len)
+		goto overflow;
+
+	m->count += len;
+	return;
+
+overflow:
+	seq_set_overflow(m);
 }
 EXPORT_SYMBOL(seq_put_decimal_ll);
 
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index f3d45dd..e305b66 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -117,9 +117,9 @@ __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
 void seq_puts(struct seq_file *m, const char *s);
-void seq_put_decimal_ull(struct seq_file *m, char delimiter,
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 			 unsigned long long num);
-void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num);
+void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num);
 void seq_escape(struct seq_file *m, const char *s, const char *esc);
 
 void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
-- 
2.8.0.rc4.16.g56331f8

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] proc: faster /proc/*/status
  2016-08-07 21:22           ` Joe Perches
@ 2016-08-09 23:25             ` Andrew Morton
  2016-08-10  6:38               ` [RFC PATCH] meminfo: Break apart a very long seq_printf with #ifdefs Joe Perches
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Morton @ 2016-08-09 23:25 UTC (permalink / raw)
  To: Joe Perches; +Cc: Andi Kleen, Alexey Dobriyan, linux-kernel, viro

On Sun, 07 Aug 2016 14:22:31 -0700 Joe Perches <joe@perches.com> wrote:

> On Sun, 2016-08-07 at 10:44 -0700, Andi Kleen wrote:
> > > > > It is so bloated that gcc needs to be asked to not screw up with stack
> > > > > size.
> > > > What happens when you drop all the noinlines for this? I assume
> > > > this would alread make it faster. And now that we have bigger
> > > > stacks we can likely tolerate it.
> > > %pV recurses through these code paths.
> > > I believe the maximum current recursion depth is 3.
> > I assume 2 max would sufficient for all users in kernel.
> > 
> > And perhaps it would be better to get rid of "features" like this,
> > and instead focus on more common cases.
> 
> It'd be a dubious trade-off in my opinion.
> 
> Overall code size has been reduced by hundreds of KB
> by using %pV.

I kinda like Alexey's patch just for aesthetic reasons.  Those huge
straggly printk-style statements are a real pain to maintain, trying to
keep the control string and the args in sync.  Especially when there are
ifdefs everywhere - take a look at meminfo_proc_show() and weep.  Geeze
the number of times I've had to maintain (and screw up) conflicts in
that thing...

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC PATCH] meminfo: Break apart a very long seq_printf with #ifdefs
  2016-08-09 23:25             ` Andrew Morton
@ 2016-08-10  6:38               ` Joe Perches
  2016-08-11 21:50                 ` Andrew Morton
  0 siblings, 1 reply; 13+ messages in thread
From: Joe Perches @ 2016-08-10  6:38 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel; +Cc: Andi Kleen, Alexey Dobriyan, Al Viro

Use a specific routine to emit most lines so that the code is
easier to read and maintain.

Signed-off-by: Joe Perches <joe@perches.com>
---

compiled but untested

 fs/proc/meminfo.c | 211 ++++++++++++++++++++++++------------------------------
 1 file changed, 95 insertions(+), 116 deletions(-)

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 09e18fd..c5073b8 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -23,6 +23,25 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 {
 }
 
+static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
+{
+	char v[32];
+	static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
+	int len;
+
+	len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
+
+	seq_write(m, s, 16);
+
+	if (len > 0) {
+		if (len < 8)
+			seq_write(m, blanks, 8 - len);
+
+		seq_write(m, v, len);
+	}
+	seq_write(m, " kB\n", 4);
+}
+
 static int meminfo_proc_show(struct seq_file *m, void *v)
 {
 	struct sysinfo i;
@@ -32,10 +51,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	unsigned long pages[NR_LRU_LISTS];
 	int lru;
 
-/*
- * display in kilobytes.
- */
-#define K(x) ((x) << (PAGE_SHIFT - 10))
 	si_meminfo(&i);
 	si_swapinfo(&i);
 	committed = percpu_counter_read_positive(&vm_committed_as);
@@ -50,136 +65,100 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 
 	available = si_mem_available();
 
-	/*
-	 * Tagged format, for easy grepping and expansion.
-	 */
-	seq_printf(m,
-		"MemTotal:       %8lu kB\n"
-		"MemFree:        %8lu kB\n"
-		"MemAvailable:   %8lu kB\n"
-		"Buffers:        %8lu kB\n"
-		"Cached:         %8lu kB\n"
-		"SwapCached:     %8lu kB\n"
-		"Active:         %8lu kB\n"
-		"Inactive:       %8lu kB\n"
-		"Active(anon):   %8lu kB\n"
-		"Inactive(anon): %8lu kB\n"
-		"Active(file):   %8lu kB\n"
-		"Inactive(file): %8lu kB\n"
-		"Unevictable:    %8lu kB\n"
-		"Mlocked:        %8lu kB\n"
-#ifdef CONFIG_HIGHMEM
-		"HighTotal:      %8lu kB\n"
-		"HighFree:       %8lu kB\n"
-		"LowTotal:       %8lu kB\n"
-		"LowFree:        %8lu kB\n"
-#endif
-#ifndef CONFIG_MMU
-		"MmapCopy:       %8lu kB\n"
-#endif
-		"SwapTotal:      %8lu kB\n"
-		"SwapFree:       %8lu kB\n"
-		"Dirty:          %8lu kB\n"
-		"Writeback:      %8lu kB\n"
-		"AnonPages:      %8lu kB\n"
-		"Mapped:         %8lu kB\n"
-		"Shmem:          %8lu kB\n"
-		"Slab:           %8lu kB\n"
-		"SReclaimable:   %8lu kB\n"
-		"SUnreclaim:     %8lu kB\n"
-		"KernelStack:    %8lu kB\n"
-		"PageTables:     %8lu kB\n"
-#ifdef CONFIG_QUICKLIST
-		"Quicklists:     %8lu kB\n"
-#endif
-		"NFS_Unstable:   %8lu kB\n"
-		"Bounce:         %8lu kB\n"
-		"WritebackTmp:   %8lu kB\n"
-		"CommitLimit:    %8lu kB\n"
-		"Committed_AS:   %8lu kB\n"
-		"VmallocTotal:   %8lu kB\n"
-		"VmallocUsed:    %8lu kB\n"
-		"VmallocChunk:   %8lu kB\n"
-#ifdef CONFIG_MEMORY_FAILURE
-		"HardwareCorrupted: %5lu kB\n"
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		"AnonHugePages:  %8lu kB\n"
-		"ShmemHugePages: %8lu kB\n"
-		"ShmemPmdMapped: %8lu kB\n"
-#endif
-#ifdef CONFIG_CMA
-		"CmaTotal:       %8lu kB\n"
-		"CmaFree:        %8lu kB\n"
-#endif
-		,
-		K(i.totalram),
-		K(i.freeram),
-		K(available),
-		K(i.bufferram),
-		K(cached),
-		K(total_swapcache_pages()),
-		K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
-		K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
-		K(pages[LRU_ACTIVE_ANON]),
-		K(pages[LRU_INACTIVE_ANON]),
-		K(pages[LRU_ACTIVE_FILE]),
-		K(pages[LRU_INACTIVE_FILE]),
-		K(pages[LRU_UNEVICTABLE]),
-		K(global_page_state(NR_MLOCK)),
+	show_val_kb(m, "MemTotal:       ", i.totalram);
+	show_val_kb(m, "MemFree:        ", i.freeram);
+	show_val_kb(m, "MemAvailable:   ", available);
+	show_val_kb(m, "Buffers:        ", i.bufferram);
+	show_val_kb(m, "Cached:         ", cached);
+	show_val_kb(m, "SwapCached:     ", total_swapcache_pages());
+	show_val_kb(m, "Active:         ", pages[LRU_ACTIVE_ANON] +
+					   pages[LRU_ACTIVE_FILE]);
+	show_val_kb(m, "Inactive:       ", pages[LRU_INACTIVE_ANON] +
+					   pages[LRU_INACTIVE_FILE]);
+	show_val_kb(m, "Active(anon):   ", pages[LRU_ACTIVE_ANON]);
+	show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]);
+	show_val_kb(m, "Active(file):   ", pages[LRU_ACTIVE_FILE]);
+	show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]);
+	show_val_kb(m, "Unevictable:    ", pages[LRU_UNEVICTABLE]);
+	show_val_kb(m, "Mlocked:        ", global_page_state(NR_MLOCK));
+
 #ifdef CONFIG_HIGHMEM
-		K(i.totalhigh),
-		K(i.freehigh),
-		K(i.totalram-i.totalhigh),
-		K(i.freeram-i.freehigh),
+	show_val_kb(m, "HighTotal:      ", i.totalhigh);
+	show_val_kb(m, "HighFree:       ", i.freehigh);
+	show_val_kb(m, "LowTotal:       ", i.totalram - i.totalhigh);
+	show_val_kb(m, "LowFree:        ", i.freeram - i.freehigh);
 #endif
+
 #ifndef CONFIG_MMU
-		K((unsigned long) atomic_long_read(&mmap_pages_allocated)),
+	show_val_kb(m, "MmapCopy:       ",
+		    (unsigned long)atomic_long_read(&mmap_pages_allocated));
 #endif
-		K(i.totalswap),
-		K(i.freeswap),
-		K(global_node_page_state(NR_FILE_DIRTY)),
-		K(global_node_page_state(NR_WRITEBACK)),
-		K(global_node_page_state(NR_ANON_MAPPED)),
-		K(global_node_page_state(NR_FILE_MAPPED)),
-		K(i.sharedram),
-		K(global_page_state(NR_SLAB_RECLAIMABLE) +
-				global_page_state(NR_SLAB_UNRECLAIMABLE)),
-		K(global_page_state(NR_SLAB_RECLAIMABLE)),
-		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
-		global_page_state(NR_KERNEL_STACK_KB),
-		K(global_page_state(NR_PAGETABLE)),
+
+	show_val_kb(m, "SwapTotal:      ", i.totalswap);
+	show_val_kb(m, "SwapFree:       ", i.freeswap);
+	show_val_kb(m, "Dirty:          ",
+		    global_node_page_state(NR_FILE_DIRTY));
+	show_val_kb(m, "Writeback:      ",
+		    global_node_page_state(NR_WRITEBACK));
+	show_val_kb(m, "AnonPages:      ",
+		    global_node_page_state(NR_ANON_MAPPED));
+	show_val_kb(m, "Mapped:         ",
+		    global_node_page_state(NR_FILE_MAPPED));
+	show_val_kb(m, "Shmem:          ", i.sharedram);
+	show_val_kb(m, "Slab:           ",
+		    global_page_state(NR_SLAB_RECLAIMABLE) +
+		    global_page_state(NR_SLAB_UNRECLAIMABLE));
+
+	show_val_kb(m, "SReclaimable:   ",
+		    global_page_state(NR_SLAB_RECLAIMABLE));
+	show_val_kb(m, "SUnreclaim:     ",
+		    global_page_state(NR_SLAB_UNRECLAIMABLE));
+	seq_printf(m, "KernelStack:    %8lu kB\n",
+		   global_page_state(NR_KERNEL_STACK_KB));
+	show_val_kb(m, "PageTables:     ",
+		    global_page_state(NR_PAGETABLE));
 #ifdef CONFIG_QUICKLIST
-		K(quicklist_total_size()),
+	show_val_kb(m, "Quicklists:     ", quicklist_total_size());
 #endif
-		K(global_node_page_state(NR_UNSTABLE_NFS)),
-		K(global_page_state(NR_BOUNCE)),
-		K(global_node_page_state(NR_WRITEBACK_TEMP)),
-		K(vm_commit_limit()),
-		K(committed),
-		(unsigned long)VMALLOC_TOTAL >> 10,
-		0ul, // used to be vmalloc 'used'
-		0ul  // used to be vmalloc 'largest_chunk'
+
+	show_val_kb(m, "NFS_Unstable:   ",
+		    global_node_page_state(NR_UNSTABLE_NFS));
+	show_val_kb(m, "Bounce:         ",
+		    global_page_state(NR_BOUNCE));
+	show_val_kb(m, "WritebackTmp:   ",
+		    global_node_page_state(NR_WRITEBACK_TEMP));
+	show_val_kb(m, "CommitLimit:    ", vm_commit_limit());
+	show_val_kb(m, "Committed_AS:   ", committed);
+	seq_printf(m, "VmallocTotal:   %8lu kB\n",
+		   (unsigned long)VMALLOC_TOTAL >> 10);
+	show_val_kb(m, "VmallocUsed:    ", 0ul);
+	show_val_kb(m, "VmallocChunk:   ", 0ul);
+
 #ifdef CONFIG_MEMORY_FAILURE
-		, atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
+	seq_printf(m, "HardwareCorrupted: %5lu kB\n",
+		   atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10));
 #endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		, K(global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR)
-		, K(global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR)
-		, K(global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR)
+	show_val_kb(m, "AnonHugePages:  ",
+		    global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR);
+	show_val_kb(m, "ShmemHugePages: ",
+		    global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
+	show_val_kb(m, "ShmemPmdMapped: ",
+		    global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
 #endif
+
 #ifdef CONFIG_CMA
-		, K(totalcma_pages)
-		, K(global_page_state(NR_FREE_CMA_PAGES))
+	show_val_kb(m, "CmaTotal:       ", totalcma_pages);
+	show_val_kb(m, "CmaFree:        ",
+		    global_page_state(NR_FREE_CMA_PAGES));
 #endif
-		);
 
 	hugetlb_report_meminfo(m);
 
 	arch_report_meminfo(m);
 
 	return 0;
-#undef K
 }
 
 static int meminfo_proc_open(struct inode *inode, struct file *file)
-- 
2.8.0.rc4.16.g56331f8

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH] meminfo: Break apart a very long seq_printf with #ifdefs
  2016-08-10  6:38               ` [RFC PATCH] meminfo: Break apart a very long seq_printf with #ifdefs Joe Perches
@ 2016-08-11 21:50                 ` Andrew Morton
  2016-08-11 21:57                   ` Joe Perches
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Morton @ 2016-08-11 21:50 UTC (permalink / raw)
  To: Joe Perches; +Cc: linux-kernel, Andi Kleen, Alexey Dobriyan, Al Viro

On Tue,  9 Aug 2016 23:38:56 -0700 Joe Perches <joe@perches.com> wrote:

> Use a specific routine to emit most lines so that the code is
> easier to read and maintain.

huh.

   text    data     bss     dec     hex filename
   2976       8       0    2984     ba8 fs/proc/meminfo.o before
   2669       8       0    2677     a75 fs/proc/meminfo.o after

Nice.  I wonder why that happened.

Let's hope it works ;)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC PATCH] meminfo: Break apart a very long seq_printf with #ifdefs
  2016-08-11 21:50                 ` Andrew Morton
@ 2016-08-11 21:57                   ` Joe Perches
  0 siblings, 0 replies; 13+ messages in thread
From: Joe Perches @ 2016-08-11 21:57 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Andi Kleen, Alexey Dobriyan, Al Viro

On Thu, 2016-08-11 at 14:50 -0700, Andrew Morton wrote:
> On Tue,  9 Aug 2016 23:38:56 -0700 Joe Perches <joe@perches.com> wrote:
> > Use a specific routine to emit most lines so that the code is
> > easier to read and maintain.
> huh.
> 
>    text    data     bss     dec     hex filename
>    2976       8       0    2984     ba8 fs/proc/meminfo.o before
>    2669       8       0    2677     a75 fs/proc/meminfo.o after
> 
> Nice.  I wonder why that happened.

Fewer shifts via removal of K macro.
Much smaller call stack in seq_printf.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-08-11 21:58 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-06 12:56 [PATCH] proc: faster /proc/*/status Alexey Dobriyan
2016-08-06 22:41 ` Joe Perches
2016-08-09  3:02   ` [PATCH] seq/proc: Modify seq_put_decimal_[u]ll to take a const char *, not char Joe Perches
2016-08-07  3:16 ` [PATCH] proc: faster /proc/*/status Andi Kleen
2016-08-07  8:53   ` Alexey Dobriyan
2016-08-07 16:59     ` Andi Kleen
2016-08-07 17:39       ` Joe Perches
2016-08-07 17:44         ` Andi Kleen
2016-08-07 21:22           ` Joe Perches
2016-08-09 23:25             ` Andrew Morton
2016-08-10  6:38               ` [RFC PATCH] meminfo: Break apart a very long seq_printf with #ifdefs Joe Perches
2016-08-11 21:50                 ` Andrew Morton
2016-08-11 21:57                   ` Joe Perches

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.