* [PATCH v2 1/2] seq_file: Optimize seq_puts()
@ 2024-04-22 10:24 Christophe JAILLET
2024-04-22 10:24 ` [PATCH v2 2/2] seq_file: Simplify __seq_puts() Christophe JAILLET
2024-05-02 14:31 ` [PATCH v2 1/2] seq_file: Optimize seq_puts() Christian Brauner
0 siblings, 2 replies; 3+ messages in thread
From: Christophe JAILLET @ 2024-04-22 10:24 UTC (permalink / raw)
To: David.Laight, rasmus.villemoes, Alexander Viro,
Christian Brauner, Jan Kara
Cc: linux-kernel, kernel-janitors, Christophe JAILLET, linux-fsdevel
Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_putc() or seq_write() can then be used
instead.
This saves a few cycles.
To have an estimation of how often this optimization triggers:
$ git grep seq_puts.*\" | wc -l
3436
$ git grep seq_puts.*\".\" | wc -l
84
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
Changes in v2:
- Use a function, instead of a macro [Al Viro]
- Handle the case of 1 char only strings, in order to use seq_putc() [Al Viro]
- Use __always_inline [David Laight]
V1: https://lore.kernel.org/all/5c4f7ad7b88f5026940efa9c8be36a58755ec1b3.1704374916.git.christophe.jaillet@wanadoo.fr/
Checked by comparing the output of a few .s files.
Here is one of these outputs:
$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w
.L2918:
@@ -46072,10 +46073,11 @@
call clk_prepare_unlock #
# drivers/clk/clk.c:3438: clk_pm_runtime_put_all();
call clk_pm_runtime_put_all #
-# drivers/clk/clk.c:3440: seq_puts(s, "}\n");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ movl $2, %edx #,
movq $.LC94, %rsi #,
movq %rbp, %rdi # s,
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3441: return 0;
jmp .L2917 #
.size clk_dump_show, .-clk_dump_show
@@ -46200,7 +46202,7 @@
leaq 128(%rbx), %r15 #, _97
movq %r15, %rdi # _97,
--
# drivers/clk/clk.c:1987: __clk_recalc_rates(core, false, 0);
@@ -46480,15 +46482,17 @@
call seq_printf #
jmp .L2950 #
.L2946:
-# drivers/clk/clk.c:3315: seq_puts(s, "-----");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
call __sanitizer_cov_trace_pc #
+ movl $5, %edx #,
movq $.LC100, %rsi #,
movq %rbp, %rdi # s,
- call seq_puts #
+ call seq_write #
+# ./include/linux/seq_file.h:131: }
jmp .L2947 #
.L2957:
# drivers/clk/clk.c:1903: return clk_core_get_accuracy_no_lock(core);
- xorl %r14d, %r14d # _34
+ xorl %r14d, %r14d # _35
--
@@ -46736,21 +46740,22 @@
call __sanitizer_cov_trace_pc #
leaq 240(%r12), %rdi #, tmp101
call __tsan_read8 #
-# drivers/clk/clk.c:3355: seq_puts(s, " enable prepare protect duty hardware connection\n");
- movq $.LC104, %rsi #,
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ movl $142, %edx #,
movq %r12, %rdi # s,
+ movq $.LC104, %rsi #,
# drivers/clk/clk.c:3352: struct hlist_head **lists = s->private;
movq 240(%r12), %r13 # s_10(D)->private, lists
-# drivers/clk/clk.c:3355: seq_puts(s, " enable prepare protect duty hardware connection\n");
- call seq_puts #
-# drivers/clk/clk.c:3356: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ call seq_write #
+ movl $142, %edx #,
movq $.LC105, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
-# drivers/clk/clk.c:3357: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+ call seq_write #
movq $.LC106, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
+ movl $142, %edx #,
+ call seq_write #
# drivers/clk/clk.c:3359: ret = clk_pm_runtime_get_all();
call clk_pm_runtime_get_all #
# drivers/clk/clk.c:3360: if (ret)
@@ -57943,7 +57948,7 @@
subq $88, %rsp #,
# drivers/clk/clk.c:5338: {
The output for seq_putc() generation has also be checked and works.
---
fs/seq_file.c | 4 ++--
include/linux/seq_file.h | 13 ++++++++++++-
2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
}
EXPORT_SYMBOL(seq_putc);
-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
{
int len = strlen(s);
@@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
memcpy(m->buf + m->count, s, len);
m->count += len;
}
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);
/**
* seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..8bd4fda6e027 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,18 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
__printf(2, 3)
void seq_printf(struct seq_file *m, const char *fmt, ...);
void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+
+static __always_inline void seq_puts(struct seq_file *m, const char *s)
+{
+ if (!__builtin_constant_p(*s))
+ __seq_puts(m, s);
+ else if (s[0] && !s[1])
+ seq_putc(m, s[0]);
+ else
+ seq_write(m, s, __builtin_strlen(s));
+}
+
void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
unsigned long long num, unsigned int width);
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
--
2.44.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v2 2/2] seq_file: Simplify __seq_puts()
2024-04-22 10:24 [PATCH v2 1/2] seq_file: Optimize seq_puts() Christophe JAILLET
@ 2024-04-22 10:24 ` Christophe JAILLET
2024-05-02 14:31 ` [PATCH v2 1/2] seq_file: Optimize seq_puts() Christian Brauner
1 sibling, 0 replies; 3+ messages in thread
From: Christophe JAILLET @ 2024-04-22 10:24 UTC (permalink / raw)
To: David.Laight, rasmus.villemoes, Alexander Viro,
Christian Brauner, Jan Kara
Cc: linux-kernel, kernel-janitors, Christophe JAILLET, linux-fsdevel
Change the implementation of the out-of-line __seq_puts() to simply be
a seq_write() call instead of duplicating the overflow/memcpy logic.
Suggested-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
Changes in v2:
- New patch
Now than most (if not all) seq_puts() calls will be turned into a
seq_write() or seq_putc() at compilation time, the added function call in
__seq_puts() should not be noticeable.
It could be even better to just remove this __seq_puts() and call
seq_write(m, s, strlen(s)) directly in seq_puts() if it can't be
optimized at compile time.
---
fs/seq_file.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 8ef0a07033ca..e676c8b0cf5d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -671,14 +671,7 @@ EXPORT_SYMBOL(seq_putc);
void __seq_puts(struct seq_file *m, const char *s)
{
- int len = strlen(s);
-
- if (m->count + len >= m->size) {
- seq_set_overflow(m);
- return;
- }
- memcpy(m->buf + m->count, s, len);
- m->count += len;
+ seq_write(m, s, strlen(s));
}
EXPORT_SYMBOL(__seq_puts);
--
2.44.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2 1/2] seq_file: Optimize seq_puts()
2024-04-22 10:24 [PATCH v2 1/2] seq_file: Optimize seq_puts() Christophe JAILLET
2024-04-22 10:24 ` [PATCH v2 2/2] seq_file: Simplify __seq_puts() Christophe JAILLET
@ 2024-05-02 14:31 ` Christian Brauner
1 sibling, 0 replies; 3+ messages in thread
From: Christian Brauner @ 2024-05-02 14:31 UTC (permalink / raw)
To: Christophe JAILLET
Cc: Christian Brauner, linux-kernel, kernel-janitors, linux-fsdevel,
David.Laight, rasmus.villemoes, Alexander Viro, Jan Kara
On Mon, 22 Apr 2024 12:24:06 +0200, Christophe JAILLET wrote:
> Most of seq_puts() usages are done with a string literal. In such cases,
> the length of the string car be computed at compile time in order to save
> a strlen() call at run-time. seq_putc() or seq_write() can then be used
> instead.
>
> This saves a few cycles.
>
> [...]
Applied to the vfs.misc branch of the vfs/vfs.git tree.
Patches in the vfs.misc branch should appear in linux-next soon.
Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.
It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.
Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.
tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.misc
[1/2] seq_file: Optimize seq_puts()
https://git.kernel.org/vfs/vfs/c/45751097aeb3
[2/2] seq_file: Simplify __seq_puts()
https://git.kernel.org/vfs/vfs/c/e035af9f6eba
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-05-02 14:31 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-22 10:24 [PATCH v2 1/2] seq_file: Optimize seq_puts() Christophe JAILLET
2024-04-22 10:24 ` [PATCH v2 2/2] seq_file: Simplify __seq_puts() Christophe JAILLET
2024-05-02 14:31 ` [PATCH v2 1/2] seq_file: Optimize seq_puts() Christian Brauner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).