All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stafford Horne <shorne@gmail.com>
To: Jonas Bonn <jonas@southpole.se>,
	Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: linux@roeck-us.net, openrisc@lists.librecores.org,
	linux-kernel@vger.kernel.org,
	Olof Kindgren <olof.kindgren@gmail.com>,
	Stafford Horne <shorne@gmail.com>
Subject: [PATCH v3 15/25] openrisc: Add optimized memset
Date: Wed, 22 Feb 2017 04:11:44 +0900	[thread overview]
Message-ID: <324aa407cd6eaad42232c898c35414b6911aa993.1487702890.git.shorne@gmail.com> (raw)
In-Reply-To: <cover.1487702890.git.shorne@gmail.com>
In-Reply-To: <cover.1487702890.git.shorne@gmail.com>

From: Olof Kindgren <olof.kindgren@gmail.com>

This adds a hand-optimized assembler version of memset and sets
__HAVE_ARCH_MEMSET to use this version instead of the generic C
routine

Signed-off-by: Olof Kindgren <olof.kindgren@gmail.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/string.h |  7 +++
 arch/openrisc/kernel/or32_ksyms.c  |  1 +
 arch/openrisc/lib/Makefile         |  2 +-
 arch/openrisc/lib/memset.S         | 98 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 arch/openrisc/include/asm/string.h
 create mode 100644 arch/openrisc/lib/memset.S

diff --git a/arch/openrisc/include/asm/string.h b/arch/openrisc/include/asm/string.h
new file mode 100644
index 0000000..33470d4
--- /dev/null
+++ b/arch/openrisc/include/asm/string.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_OPENRISC_STRING_H
+#define __ASM_OPENRISC_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *s, int c, __kernel_size_t n);
+
+#endif /* __ASM_OPENRISC_STRING_H */
diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 86e31cf..5c4695d 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
 
 EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile
index 966f65d..67c583e 100644
--- a/arch/openrisc/lib/Makefile
+++ b/arch/openrisc/lib/Makefile
@@ -2,4 +2,4 @@
 # Makefile for or32 specific library files..
 #
 
-obj-y  = string.o delay.o
+obj-y  = memset.o string.o delay.o
diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S
new file mode 100644
index 0000000..92cc2ea
--- /dev/null
+++ b/arch/openrisc/lib/memset.S
@@ -0,0 +1,98 @@
+/*
+ * OpenRISC memset.S
+ *
+ * Hand-optimized assembler version of memset for OpenRISC.
+ * Algorithm inspired by several other arch-specific memset routines
+ * in the kernel tree
+ *
+ * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+	.global memset
+	.type	memset, @function
+memset:
+	/* arguments:
+	 * r3 = *s
+	 * r4 = c
+	 * r5 = n
+	 * r13, r15, r17, r19 used as temp regs
+	*/
+
+	/* Exit if n == 0 */
+	l.sfeqi		r5, 0
+	l.bf		4f
+
+	/* Truncate c to char */
+	l.andi  	r13, r4, 0xff
+
+	/* Skip word extension if c is 0 */
+	l.sfeqi		r13, 0
+	l.bf		1f
+	/* Check for at least two whole words (8 bytes) */
+	 l.sfleui	r5, 7
+
+	/* Extend char c to 32-bit word cccc in r13 */
+	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
+	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
+	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
+	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
+
+1:	l.addi		r19, r3, 0 // Set r19 = src
+	/* Jump to byte copy loop if less than two words */
+	l.bf		3f
+	 l.or		r17, r5, r0 // Set r17 = n
+
+	/* Mask out two LSBs to check alignment */
+	l.andi		r15, r3, 0x3
+
+	/* lsb == 00, jump to word copy loop */
+	l.sfeqi		r15, 0
+	l.bf		2f
+	 l.addi		r19, r3, 0 // Set r19 = src
+
+	/* lsb == 01,10 or 11 */
+	l.sb		0(r3), r13   // *src = c
+	l.addi		r17, r17, -1 // Decrease n
+
+	l.sfeqi		r15, 3
+	l.bf		2f
+	 l.addi		r19, r3, 1  // src += 1
+
+	/* lsb == 01 or 10 */
+	l.sb		1(r3), r13   // *(src+1) = c
+	l.addi		r17, r17, -1 // Decrease n
+
+	l.sfeqi		r15, 2
+	l.bf		2f
+	 l.addi		r19, r3, 2  // src += 2
+
+	/* lsb == 01 */
+	l.sb		2(r3), r13   // *(src+2) = c
+	l.addi		r17, r17, -1 // Decrease n
+	l.addi		r19, r3, 3   // src += 3
+
+	/* Word copy loop */
+2:	l.sw		0(r19), r13  // *src = cccc
+	l.addi		r17, r17, -4 // Decrease n
+	l.sfgeui	r17, 4
+	l.bf		2b
+	 l.addi		r19, r19, 4  // Increase src
+
+	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
+	l.sfeqi		r17, 0
+	l.bf		4f
+
+	/* Byte copy loop */
+3:	l.addi		r17, r17, -1 // Decrease n
+	l.sb		0(r19), r13  // *src = cccc
+	l.sfnei		r17, 0
+	l.bf		3b
+	 l.addi		r19, r19, 1  // Increase src
+
+4:	l.jr		r9
+	 l.ori		r11, r3, 0
-- 
2.9.3

WARNING: multiple messages have this Message-ID (diff)
From: Stafford Horne <shorne@gmail.com>
To: openrisc@lists.librecores.org
Subject: [OpenRISC] [PATCH v3 15/25] openrisc: Add optimized memset
Date: Wed, 22 Feb 2017 04:11:44 +0900	[thread overview]
Message-ID: <324aa407cd6eaad42232c898c35414b6911aa993.1487702890.git.shorne@gmail.com> (raw)
In-Reply-To: <cover.1487702890.git.shorne@gmail.com>

From: Olof Kindgren <olof.kindgren@gmail.com>

This adds a hand-optimized assembler version of memset and sets
__HAVE_ARCH_MEMSET to use this version instead of the generic C
routine

Signed-off-by: Olof Kindgren <olof.kindgren@gmail.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/string.h |  7 +++
 arch/openrisc/kernel/or32_ksyms.c  |  1 +
 arch/openrisc/lib/Makefile         |  2 +-
 arch/openrisc/lib/memset.S         | 98 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 arch/openrisc/include/asm/string.h
 create mode 100644 arch/openrisc/lib/memset.S

diff --git a/arch/openrisc/include/asm/string.h b/arch/openrisc/include/asm/string.h
new file mode 100644
index 0000000..33470d4
--- /dev/null
+++ b/arch/openrisc/include/asm/string.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_OPENRISC_STRING_H
+#define __ASM_OPENRISC_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *s, int c, __kernel_size_t n);
+
+#endif /* __ASM_OPENRISC_STRING_H */
diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 86e31cf..5c4695d 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
 
 EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile
index 966f65d..67c583e 100644
--- a/arch/openrisc/lib/Makefile
+++ b/arch/openrisc/lib/Makefile
@@ -2,4 +2,4 @@
 # Makefile for or32 specific library files..
 #
 
-obj-y  = string.o delay.o
+obj-y  = memset.o string.o delay.o
diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S
new file mode 100644
index 0000000..92cc2ea
--- /dev/null
+++ b/arch/openrisc/lib/memset.S
@@ -0,0 +1,98 @@
+/*
+ * OpenRISC memset.S
+ *
+ * Hand-optimized assembler version of memset for OpenRISC.
+ * Algorithm inspired by several other arch-specific memset routines
+ * in the kernel tree
+ *
+ * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+	.global memset
+	.type	memset, @function
+memset:
+	/* arguments:
+	 * r3 = *s
+	 * r4 = c
+	 * r5 = n
+	 * r13, r15, r17, r19 used as temp regs
+	*/
+
+	/* Exit if n == 0 */
+	l.sfeqi		r5, 0
+	l.bf		4f
+
+	/* Truncate c to char */
+	l.andi  	r13, r4, 0xff
+
+	/* Skip word extension if c is 0 */
+	l.sfeqi		r13, 0
+	l.bf		1f
+	/* Check for at least two whole words (8 bytes) */
+	 l.sfleui	r5, 7
+
+	/* Extend char c to 32-bit word cccc in r13 */
+	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
+	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
+	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
+	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
+
+1:	l.addi		r19, r3, 0 // Set r19 = src
+	/* Jump to byte copy loop if less than two words */
+	l.bf		3f
+	 l.or		r17, r5, r0 // Set r17 = n
+
+	/* Mask out two LSBs to check alignment */
+	l.andi		r15, r3, 0x3
+
+	/* lsb == 00, jump to word copy loop */
+	l.sfeqi		r15, 0
+	l.bf		2f
+	 l.addi		r19, r3, 0 // Set r19 = src
+
+	/* lsb == 01,10 or 11 */
+	l.sb		0(r3), r13   // *src = c
+	l.addi		r17, r17, -1 // Decrease n
+
+	l.sfeqi		r15, 3
+	l.bf		2f
+	 l.addi		r19, r3, 1  // src += 1
+
+	/* lsb == 01 or 10 */
+	l.sb		1(r3), r13   // *(src+1) = c
+	l.addi		r17, r17, -1 // Decrease n
+
+	l.sfeqi		r15, 2
+	l.bf		2f
+	 l.addi		r19, r3, 2  // src += 2
+
+	/* lsb == 01 */
+	l.sb		2(r3), r13   // *(src+2) = c
+	l.addi		r17, r17, -1 // Decrease n
+	l.addi		r19, r3, 3   // src += 3
+
+	/* Word copy loop */
+2:	l.sw		0(r19), r13  // *src = cccc
+	l.addi		r17, r17, -4 // Decrease n
+	l.sfgeui	r17, 4
+	l.bf		2b
+	 l.addi		r19, r19, 4  // Increase src
+
+	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
+	l.sfeqi		r17, 0
+	l.bf		4f
+
+	/* Byte copy loop */
+3:	l.addi		r17, r17, -1 // Decrease n
+	l.sb		0(r19), r13  // *src = cccc
+	l.sfnei		r17, 0
+	l.bf		3b
+	 l.addi		r19, r19, 1  // Increase src
+
+4:	l.jr		r9
+	 l.ori		r11, r3, 0
-- 
2.9.3


  parent reply	other threads:[~2017-02-21 19:13 UTC|newest]

Thread overview: 96+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-21 19:11 [PATCH v3 00/25] OpenRISC patches for 4.11 final call Stafford Horne
2017-02-21 19:11 ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 01/25] openrisc: use SPARSE_IRQ Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 02/25] openrisc: add cache way information to cpuinfo Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-03-14 12:08   ` Sudeep Holla
2017-03-14 12:08     ` [OpenRISC] " Sudeep Holla
2017-03-14 13:11     ` Stefan Kristiansson
2017-03-14 13:11       ` [OpenRISC] " Stefan Kristiansson
2017-03-14 13:45       ` Sudeep Holla
2017-03-14 13:45         ` [OpenRISC] " Sudeep Holla
2017-03-14 14:09         ` Stafford Horne
2017-03-14 14:09           ` [OpenRISC] " Stafford Horne
2017-03-14 15:55           ` Sudeep Holla
2017-03-14 15:55             ` [OpenRISC] " Sudeep Holla
2017-02-21 19:11 ` [PATCH v3 03/25] openrisc: tlb miss handler optimizations Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 04/25] openrisc: head: use THREAD_SIZE instead of magic constant Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 05/25] openrisc: head: refactor out tlb flush into it's own function Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 06/25] openrisc: add l.lwa/l.swa emulation Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 07/25] openrisc: add atomic bitops Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 08/25] openrisc: add cmpxchg and xchg implementations Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-22 11:19   ` Peter Zijlstra
2017-02-22 11:19     ` [OpenRISC] " Peter Zijlstra
2017-02-22 14:20     ` Stafford Horne
2017-02-22 14:20       ` [OpenRISC] " Stafford Horne
2017-02-22 17:30       ` Richard Henderson
2017-02-22 17:30         ` Richard Henderson
2017-02-22 22:43         ` Stafford Horne
2017-02-22 22:43           ` Stafford Horne
2017-02-21 19:11 ` [PATCH v3 09/25] openrisc: add optimized atomic operations Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-22 11:27   ` Peter Zijlstra
2017-02-22 11:27     ` [OpenRISC] " Peter Zijlstra
2017-02-22 14:22     ` Stafford Horne
2017-02-22 14:22       ` [OpenRISC] " Stafford Horne
2017-02-22 17:31       ` Richard Henderson
2017-02-22 17:31         ` Richard Henderson
2017-02-22 22:42         ` Stafford Horne
2017-02-22 22:42           ` Stafford Horne
2017-02-21 19:11 ` [PATCH v3 10/25] openrisc: add spinlock implementation Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-22 11:29   ` Peter Zijlstra
2017-02-22 11:29     ` [OpenRISC] " Peter Zijlstra
2017-02-22 11:32   ` Peter Zijlstra
2017-02-22 11:32     ` [OpenRISC] " Peter Zijlstra
2017-02-22 11:37   ` Peter Zijlstra
2017-02-22 11:37     ` [OpenRISC] " Peter Zijlstra
2017-02-22 12:02     ` Peter Zijlstra
2017-02-22 12:02       ` [OpenRISC] " Peter Zijlstra
2017-02-22 11:38   ` Peter Zijlstra
2017-02-22 11:38     ` [OpenRISC] " Peter Zijlstra
2017-02-22 11:41   ` Peter Zijlstra
2017-02-22 11:41     ` [OpenRISC] " Peter Zijlstra
2017-02-22 12:08     ` Peter Zijlstra
2017-02-22 12:08       ` [OpenRISC] " Peter Zijlstra
2017-02-21 19:11 ` [PATCH v3 11/25] openrisc: add futex_atomic_* implementations Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 12/25] openrisc: remove unnecessary stddef.h include Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 13/25] openrisc: Fix the bitmask for the unit present register Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 14/25] openrisc: Initial support for the idle state Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 20:24   ` Joe Perches
2017-02-21 20:24     ` [OpenRISC] " Joe Perches
2017-02-22 14:19     ` Stafford Horne
2017-02-22 14:19       ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` Stafford Horne [this message]
2017-02-21 19:11   ` [OpenRISC] [PATCH v3 15/25] openrisc: Add optimized memset Stafford Horne
2017-02-21 19:11 ` [PATCH v3 16/25] openrisc: Add optimized memcpy routine Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 17/25] openrisc: Add .gitignore Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 18/25] MAINTAINERS: Add the openrisc official repository Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 19/25] scripts/checkstack.pl: Add openrisc support Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 20/25] openrisc: entry: Whitespace and comment cleanups Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 21/25] openrisc: entry: Fix delay slot detection Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 22/25] openrisc: head: Move init strings to rodata section Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 23/25] arch/openrisc/lib/memcpy.c: use correct OR1200 option Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 24/25] openrisc: Export ioremap symbols used by modules Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne
2017-02-21 19:11 ` [PATCH v3 25/25] openrisc: head: Init r0 to 0 on start Stafford Horne
2017-02-21 19:11   ` [OpenRISC] " Stafford Horne

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=324aa407cd6eaad42232c898c35414b6911aa993.1487702890.git.shorne@gmail.com \
    --to=shorne@gmail.com \
    --cc=jonas@southpole.se \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@roeck-us.net \
    --cc=olof.kindgren@gmail.com \
    --cc=openrisc@lists.librecores.org \
    --cc=stefan.kristiansson@saunalahti.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.