All of lore.kernel.org
 help / color / mirror / Atom feed
From: Heiko Stuebner <heiko@sntech.de>
To: palmer@rivosinc.com
Cc: conor@kernel.org, linux-kernel@vger.kernel.org,
	linux-riscv@lists.infradead.org, christoph.muellner@vrull.eu,
	ajones@ventanamicro.com, Heiko Stuebner <heiko.stuebner@vrull.eu>
Subject: [PATCH 2/2] RISC-V: improve string-function assembly
Date: Wed,  8 Feb 2023 23:53:28 +0100	[thread overview]
Message-ID: <20230208225328.1636017-3-heiko@sntech.de> (raw)
In-Reply-To: <20230208225328.1636017-1-heiko@sntech.de>

From: Heiko Stuebner <heiko.stuebner@vrull.eu>

Adapt the suggestions for the assembly string functions that Andrew
suggested but that I didn't manage to include into the series that
got applied.

This includes improvements to two comments, removal of unneeded labels
and moving one instruction slightly higher to contradict an
explanatory comment.

Suggested-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
---
 arch/riscv/lib/strcmp.S  |  6 ++++--
 arch/riscv/lib/strlen.S  | 10 +++++-----
 arch/riscv/lib/strncmp.S | 16 +++++++---------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
index 8148b6418f61..fb186bf28f07 100644
--- a/arch/riscv/lib/strcmp.S
+++ b/arch/riscv/lib/strcmp.S
@@ -40,7 +40,9 @@ SYM_FUNC_START(strcmp)
 	ret
 
 /*
- * Variant of strcmp using the ZBB extension if available
+ * Variant of strcmp using the ZBB extension if available.
+ * The code was published as part of the bitmanip manual
+ * in Appendix A.
  */
 #ifdef CONFIG_RISCV_ISA_ZBB
 strcmp_zbb:
@@ -57,7 +59,7 @@ strcmp_zbb:
 	 *   a1 - string2
 	 *
 	 * Clobbers
-	 *   t0, t1, t2, t3, t4, t5
+	 *   t0, t1, t2, t3, t4
 	 */
 
 	or	t2, a0, a1
diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
index 0f9dbf93301a..898466f67000 100644
--- a/arch/riscv/lib/strlen.S
+++ b/arch/riscv/lib/strlen.S
@@ -96,7 +96,7 @@ strlen_zbb:
 	 * of valid bytes in this chunk.
 	 */
 	srli	a0, t1, 3
-	bgtu	t3, a0, 3f
+	bgtu	t3, a0, 2f
 
 	/* Prepare for the word comparison loop. */
 	addi	t2, t0, SZREG
@@ -112,20 +112,20 @@ strlen_zbb:
 	addi	t0, t0, SZREG
 	orc.b	t1, t1
 	beq	t1, t3, 1b
-2:
+
 	not	t1, t1
 	CZ	t1, t1
+	srli	t1, t1, 3
 
-	/* Get number of processed words.  */
+	/* Get number of processed bytes. */
 	sub	t2, t0, t2
 
 	/* Add number of characters in the first word.  */
 	add	a0, a0, t2
-	srli	t1, t1, 3
 
 	/* Add number of characters in the last word.  */
 	add	a0, a0, t1
-3:
+2:
 	ret
 
 .option pop
diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index 7940ddab2d48..e36f5a6e1b16 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -70,7 +70,7 @@ strncmp_zbb:
 	li	t5, -1
 	and	t2, t2, SZREG-1
 	add	t4, a0, a2
-	bnez	t2, 4f
+	bnez	t2, 3f
 
 	/* Adjust limit for fast-path.  */
 	andi	t6, t4, -SZREG
@@ -114,23 +114,21 @@ strncmp_zbb:
 	ret
 
 	/* Simple loop for misaligned strings.  */
-3:
-	/* Restore limit for slow-path.  */
 	.p2align 3
-4:
-	bge	a0, t4, 6f
+3:
+	bge	a0, t4, 5f
 	lbu	t0, 0(a0)
 	lbu	t1, 0(a1)
 	addi	a0, a0, 1
 	addi	a1, a1, 1
-	bne	t0, t1, 5f
-	bnez	t0, 4b
+	bne	t0, t1, 4f
+	bnez	t0, 3b
 
-5:
+4:
 	sub	a0, t0, t1
 	ret
 
-6:
+5:
 	li	a0, 0
 	ret
 
-- 
2.39.0


WARNING: multiple messages have this Message-ID (diff)
From: Heiko Stuebner <heiko@sntech.de>
To: palmer@rivosinc.com
Cc: conor@kernel.org, linux-kernel@vger.kernel.org,
	linux-riscv@lists.infradead.org, christoph.muellner@vrull.eu,
	ajones@ventanamicro.com, Heiko Stuebner <heiko.stuebner@vrull.eu>
Subject: [PATCH 2/2] RISC-V: improve string-function assembly
Date: Wed,  8 Feb 2023 23:53:28 +0100	[thread overview]
Message-ID: <20230208225328.1636017-3-heiko@sntech.de> (raw)
In-Reply-To: <20230208225328.1636017-1-heiko@sntech.de>

From: Heiko Stuebner <heiko.stuebner@vrull.eu>

Adapt the suggestions for the assembly string functions that Andrew
suggested but that I didn't manage to include into the series that
got applied.

This includes improvements to two comments, removal of unneeded labels
and moving one instruction slightly higher to contradict an
explanatory comment.

Suggested-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
---
 arch/riscv/lib/strcmp.S  |  6 ++++--
 arch/riscv/lib/strlen.S  | 10 +++++-----
 arch/riscv/lib/strncmp.S | 16 +++++++---------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
index 8148b6418f61..fb186bf28f07 100644
--- a/arch/riscv/lib/strcmp.S
+++ b/arch/riscv/lib/strcmp.S
@@ -40,7 +40,9 @@ SYM_FUNC_START(strcmp)
 	ret
 
 /*
- * Variant of strcmp using the ZBB extension if available
+ * Variant of strcmp using the ZBB extension if available.
+ * The code was published as part of the bitmanip manual
+ * in Appendix A.
  */
 #ifdef CONFIG_RISCV_ISA_ZBB
 strcmp_zbb:
@@ -57,7 +59,7 @@ strcmp_zbb:
 	 *   a1 - string2
 	 *
 	 * Clobbers
-	 *   t0, t1, t2, t3, t4, t5
+	 *   t0, t1, t2, t3, t4
 	 */
 
 	or	t2, a0, a1
diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
index 0f9dbf93301a..898466f67000 100644
--- a/arch/riscv/lib/strlen.S
+++ b/arch/riscv/lib/strlen.S
@@ -96,7 +96,7 @@ strlen_zbb:
 	 * of valid bytes in this chunk.
 	 */
 	srli	a0, t1, 3
-	bgtu	t3, a0, 3f
+	bgtu	t3, a0, 2f
 
 	/* Prepare for the word comparison loop. */
 	addi	t2, t0, SZREG
@@ -112,20 +112,20 @@ strlen_zbb:
 	addi	t0, t0, SZREG
 	orc.b	t1, t1
 	beq	t1, t3, 1b
-2:
+
 	not	t1, t1
 	CZ	t1, t1
+	srli	t1, t1, 3
 
-	/* Get number of processed words.  */
+	/* Get number of processed bytes. */
 	sub	t2, t0, t2
 
 	/* Add number of characters in the first word.  */
 	add	a0, a0, t2
-	srli	t1, t1, 3
 
 	/* Add number of characters in the last word.  */
 	add	a0, a0, t1
-3:
+2:
 	ret
 
 .option pop
diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index 7940ddab2d48..e36f5a6e1b16 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -70,7 +70,7 @@ strncmp_zbb:
 	li	t5, -1
 	and	t2, t2, SZREG-1
 	add	t4, a0, a2
-	bnez	t2, 4f
+	bnez	t2, 3f
 
 	/* Adjust limit for fast-path.  */
 	andi	t6, t4, -SZREG
@@ -114,23 +114,21 @@ strncmp_zbb:
 	ret
 
 	/* Simple loop for misaligned strings.  */
-3:
-	/* Restore limit for slow-path.  */
 	.p2align 3
-4:
-	bge	a0, t4, 6f
+3:
+	bge	a0, t4, 5f
 	lbu	t0, 0(a0)
 	lbu	t1, 0(a1)
 	addi	a0, a0, 1
 	addi	a1, a1, 1
-	bne	t0, t1, 5f
-	bnez	t0, 4b
+	bne	t0, t1, 4f
+	bnez	t0, 3b
 
-5:
+4:
 	sub	a0, t0, t1
 	ret
 
-6:
+5:
 	li	a0, 0
 	ret
 
-- 
2.39.0


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2023-02-08 22:53 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-08 22:53 [PATCH 0/2] Small fixups for the Zbb string functions Heiko Stuebner
2023-02-08 22:53 ` Heiko Stuebner
2023-02-08 22:53 ` [PATCH 1/2] RISC-V: fix ordering of Zbb extension Heiko Stuebner
2023-02-08 22:53   ` Heiko Stuebner
2023-02-08 23:20   ` Conor Dooley
2023-02-08 23:20     ` Conor Dooley
2023-02-08 23:26     ` Heiko Stübner
2023-02-08 23:26       ` Heiko Stübner
2023-02-09  8:25     ` Andrew Jones
2023-02-09  8:25       ` Andrew Jones
2023-02-09  9:03       ` Conor Dooley
2023-02-09  9:03         ` Conor Dooley
2023-02-09  9:28         ` Andrew Jones
2023-02-09  9:28           ` Andrew Jones
2023-02-09  8:23   ` Andrew Jones
2023-02-09  8:23     ` Andrew Jones
2023-02-08 22:53 ` Heiko Stuebner [this message]
2023-02-08 22:53   ` [PATCH 2/2] RISC-V: improve string-function assembly Heiko Stuebner
2023-02-09  8:25   ` Andrew Jones
2023-02-09  8:25     ` Andrew Jones
2023-02-10 20:49 ` [PATCH 0/2] Small fixups for the Zbb string functions Conor Dooley
2023-02-10 20:49   ` Conor Dooley
2023-02-22 15:00 ` patchwork-bot+linux-riscv
2023-02-22 15:00   ` patchwork-bot+linux-riscv
2023-02-28 20:51 ` (subset) " Palmer Dabbelt
2023-02-28 20:51   ` Palmer Dabbelt
2023-02-28 21:11 ` patchwork-bot+linux-riscv
2023-02-28 21:11   ` patchwork-bot+linux-riscv

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230208225328.1636017-3-heiko@sntech.de \
    --to=heiko@sntech.de \
    --cc=ajones@ventanamicro.com \
    --cc=christoph.muellner@vrull.eu \
    --cc=conor@kernel.org \
    --cc=heiko.stuebner@vrull.eu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@rivosinc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.