kernel-janitors.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/2] seq_file: Optimize seq_puts()
@ 2024-04-22 10:24 Christophe JAILLET
  2024-04-22 10:24 ` [PATCH v2 2/2] seq_file: Simplify __seq_puts() Christophe JAILLET
  2024-05-02 14:31 ` [PATCH v2 1/2] seq_file: Optimize seq_puts() Christian Brauner
  0 siblings, 2 replies; 3+ messages in thread
From: Christophe JAILLET @ 2024-04-22 10:24 UTC (permalink / raw)
  To: David.Laight, rasmus.villemoes, Alexander Viro,
	Christian Brauner, Jan Kara
  Cc: linux-kernel, kernel-janitors, Christophe JAILLET, linux-fsdevel

Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_putc() or seq_write() can then be used
instead.

This saves a few cycles.

To have an estimation of how often this optimization triggers:
   $ git grep seq_puts.*\" | wc -l
   3436

   $ git grep seq_puts.*\".\" | wc -l
   84

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
Changes in v2:
   - Use a function, instead of a macro   [Al Viro]
   - Handle the case of 1 char only strings, in order to use seq_putc()   [Al Viro]
   - Use __always_inline   [David Laight]

V1: https://lore.kernel.org/all/5c4f7ad7b88f5026940efa9c8be36a58755ec1b3.1704374916.git.christophe.jaillet@wanadoo.fr/


Checked by comparing the output of a few .s files.
Here is one of these outputs:

$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w

 .L2918:
@@ -46072,10 +46073,11 @@
 	call	clk_prepare_unlock	#
 # drivers/clk/clk.c:3438: 	clk_pm_runtime_put_all();
 	call	clk_pm_runtime_put_all	#
-# drivers/clk/clk.c:3440: 	seq_puts(s, "}\n");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	movl	$2, %edx	#,
 	movq	$.LC94, %rsi	#,
 	movq	%rbp, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
 # drivers/clk/clk.c:3441: 	return 0;
 	jmp	.L2917	#
 	.size	clk_dump_show, .-clk_dump_show
@@ -46200,7 +46202,7 @@
 	leaq	128(%rbx), %r15	#, _97
 	movq	%r15, %rdi	# _97,
--
 # drivers/clk/clk.c:1987: 		__clk_recalc_rates(core, false, 0);
@@ -46480,15 +46482,17 @@
 	call	seq_printf	#
 	jmp	.L2950	#
 .L2946:
-# drivers/clk/clk.c:3315: 		seq_puts(s, "-----");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
 	call	__sanitizer_cov_trace_pc	#
+	movl	$5, %edx	#,
 	movq	$.LC100, %rsi	#,
 	movq	%rbp, %rdi	# s,
-	call	seq_puts	#
+	call	seq_write	#
+# ./include/linux/seq_file.h:131: }
 	jmp	.L2947	#
 .L2957:
 # drivers/clk/clk.c:1903: 	return clk_core_get_accuracy_no_lock(core);
-	xorl	%r14d, %r14d	# _34
+	xorl	%r14d, %r14d	# _35
--
@@ -46736,21 +46740,22 @@
 	call	__sanitizer_cov_trace_pc	#
 	leaq	240(%r12), %rdi	#, tmp101
 	call	__tsan_read8	#
-# drivers/clk/clk.c:3355: 	seq_puts(s, "                                 enable  prepare  protect                                duty  hardware                            connection\n");
-	movq	$.LC104, %rsi	#,
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	movl	$142, %edx	#,
 	movq	%r12, %rdi	# s,
+	movq	$.LC104, %rsi	#,
 # drivers/clk/clk.c:3352: 	struct hlist_head **lists = s->private;
 	movq	240(%r12), %r13	# s_10(D)->private, lists
-# drivers/clk/clk.c:3355: 	seq_puts(s, "                                 enable  prepare  protect                                duty  hardware                            connection\n");
-	call	seq_puts	#
-# drivers/clk/clk.c:3356: 	seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle    enable   consumer                         id\n");
+# ./include/linux/seq_file.h:130: 		seq_write(m, s, __builtin_strlen(s));
+	call	seq_write	#
+	movl	$142, %edx	#,
 	movq	$.LC105, %rsi	#,
 	movq	%r12, %rdi	# s,
-	call	seq_puts	#
-# drivers/clk/clk.c:3357: 	seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+	call	seq_write	#
 	movq	$.LC106, %rsi	#,
 	movq	%r12, %rdi	# s,
-	call	seq_puts	#
+	movl	$142, %edx	#,
+	call	seq_write	#
 # drivers/clk/clk.c:3359: 	ret = clk_pm_runtime_get_all();
 	call	clk_pm_runtime_get_all	#
 # drivers/clk/clk.c:3360: 	if (ret)
@@ -57943,7 +57948,7 @@
 	subq	$88, %rsp	#,
 # drivers/clk/clk.c:5338: {


The output for seq_putc() generation has also be checked and works.
---
 fs/seq_file.c            |  4 ++--
 include/linux/seq_file.h | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
 }
 EXPORT_SYMBOL(seq_putc);
 
-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
 {
 	int len = strlen(s);
 
@@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
 	memcpy(m->buf + m->count, s, len);
 	m->count += len;
 }
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);
 
 /**
  * seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..8bd4fda6e027 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,18 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
 __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+
+static __always_inline void seq_puts(struct seq_file *m, const char *s)
+{
+	if (!__builtin_constant_p(*s))
+		__seq_puts(m, s);
+	else if (s[0] && !s[1])
+		seq_putc(m, s[0]);
+	else
+		seq_write(m, s, __builtin_strlen(s));
+}
+
 void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
 			       unsigned long long num, unsigned int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2 2/2] seq_file: Simplify __seq_puts()
  2024-04-22 10:24 [PATCH v2 1/2] seq_file: Optimize seq_puts() Christophe JAILLET
@ 2024-04-22 10:24 ` Christophe JAILLET
  2024-05-02 14:31 ` [PATCH v2 1/2] seq_file: Optimize seq_puts() Christian Brauner
  1 sibling, 0 replies; 3+ messages in thread
From: Christophe JAILLET @ 2024-04-22 10:24 UTC (permalink / raw)
  To: David.Laight, rasmus.villemoes, Alexander Viro,
	Christian Brauner, Jan Kara
  Cc: linux-kernel, kernel-janitors, Christophe JAILLET, linux-fsdevel

Change the implementation of the out-of-line __seq_puts() to simply be
a seq_write() call instead of duplicating the overflow/memcpy logic.

Suggested-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
Changes in v2:
   - New patch

Now than most (if not all) seq_puts() calls will be turned into a
seq_write() or seq_putc() at compilation time, the added function call in
__seq_puts() should not be noticeable.

It could be even better to just remove this __seq_puts() and call
seq_write(m, s, strlen(s)) directly in seq_puts() if it can't be
optimized at compile time.
---
 fs/seq_file.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 8ef0a07033ca..e676c8b0cf5d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -671,14 +671,7 @@ EXPORT_SYMBOL(seq_putc);
 
 void __seq_puts(struct seq_file *m, const char *s)
 {
-	int len = strlen(s);
-
-	if (m->count + len >= m->size) {
-		seq_set_overflow(m);
-		return;
-	}
-	memcpy(m->buf + m->count, s, len);
-	m->count += len;
+	seq_write(m, s, strlen(s));
 }
 EXPORT_SYMBOL(__seq_puts);
 
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2 1/2] seq_file: Optimize seq_puts()
  2024-04-22 10:24 [PATCH v2 1/2] seq_file: Optimize seq_puts() Christophe JAILLET
  2024-04-22 10:24 ` [PATCH v2 2/2] seq_file: Simplify __seq_puts() Christophe JAILLET
@ 2024-05-02 14:31 ` Christian Brauner
  1 sibling, 0 replies; 3+ messages in thread
From: Christian Brauner @ 2024-05-02 14:31 UTC (permalink / raw)
  To: Christophe JAILLET
  Cc: Christian Brauner, linux-kernel, kernel-janitors, linux-fsdevel,
	David.Laight, rasmus.villemoes, Alexander Viro, Jan Kara

On Mon, 22 Apr 2024 12:24:06 +0200, Christophe JAILLET wrote:
> Most of seq_puts() usages are done with a string literal. In such cases,
> the length of the string car be computed at compile time in order to save
> a strlen() call at run-time. seq_putc() or seq_write() can then be used
> instead.
> 
> This saves a few cycles.
> 
> [...]

Applied to the vfs.misc branch of the vfs/vfs.git tree.
Patches in the vfs.misc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.misc

[1/2] seq_file: Optimize seq_puts()
      https://git.kernel.org/vfs/vfs/c/45751097aeb3
[2/2] seq_file: Simplify __seq_puts()
      https://git.kernel.org/vfs/vfs/c/e035af9f6eba

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-05-02 14:31 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-22 10:24 [PATCH v2 1/2] seq_file: Optimize seq_puts() Christophe JAILLET
2024-04-22 10:24 ` [PATCH v2 2/2] seq_file: Simplify __seq_puts() Christophe JAILLET
2024-05-02 14:31 ` [PATCH v2 1/2] seq_file: Optimize seq_puts() Christian Brauner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).