From: Matthew Wilcox <willy@infradead.org>
To: linux-kernel@vger.kernel.org
Cc: linux-fbdev@vger.kernel.org, linux-arch@vger.kernel.org,
linux-alpha@vger.kernel.org,
linux-arm-kernel@lists.infradead.org, x86@kernel.org,
linux-mips@linux-mips.org, linuxppc-dev@lists.ozlabs.org,
sparclinux@vger.kernel.org, Minchan Kim <minchan@kernel.org>,
Matthew Wilcox <mawilcox@microsoft.com>
Subject: [PATCH v3 2/7] ARM: Implement memset16, memset32 & memset64
Date: Fri, 24 Mar 2017 09:13:13 -0700 [thread overview]
Message-ID: <20170324161318.18718-3-willy@infradead.org> (raw)
In-Reply-To: <20170324161318.18718-1-willy@infradead.org>
From: Matthew Wilcox <mawilcox@microsoft.com>
ARM is only 32-bit, so it doesn't really need a memset64, but it was
essentially free to add it to the existing implementation.
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
---
arch/arm/include/asm/string.h | 21 +++++++++++++++++++++
arch/arm/kernel/armksyms.c | 3 +++
arch/arm/lib/memset.S | 44 ++++++++++++++++++++++++++++++++++---------
3 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index cf4f3aad0fc1..bc7a1be7a76a 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -24,6 +24,27 @@ extern void * memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void * memset(void *, int, __kernel_size_t);
+#define __HAVE_ARCH_MEMSET16
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+ return __memset16(p, v, n * 2);
+}
+
+#define __HAVE_ARCH_MEMSET32
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+ return __memset32(p, v, n * 4);
+}
+
+#define __HAVE_ARCH_MEMSET64
+extern void *__memset64(uint64_t *, uint32_t low, __kernel_size_t, uint32_t hi);
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+ return __memset64(p, v, n * 8, v >> 32);
+}
+
extern void __memzero(void *ptr, __kernel_size_t n);
#define memset(p,v,n) \
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 8e8d20cdbce7..633341ed0713 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -87,6 +87,9 @@ EXPORT_SYMBOL(__raw_writesl);
EXPORT_SYMBOL(strchr);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__memset16);
+EXPORT_SYMBOL(__memset32);
+EXPORT_SYMBOL(__memset64);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 3c65e3bd790f..9adc9bdf3ffb 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -21,14 +21,14 @@ ENTRY(memset)
UNWIND( .fnstart )
ands r3, r0, #3 @ 1 unaligned?
mov ip, r0 @ preserve r0 as return value
+ orr r1, r1, r1, lsl #8
bne 6f @ 1
/*
* we know that the pointer in ip is aligned to a word boundary.
*/
-1: orr r1, r1, r1, lsl #8
- orr r1, r1, r1, lsl #16
+1: orr r1, r1, r1, lsl #16
mov r3, r1
- cmp r2, #16
+7: cmp r2, #16
blt 4f
#if ! CALGN(1)+0
@@ -41,7 +41,7 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r8, lr} )
mov r8, r1
- mov lr, r1
+ mov lr, r3
2: subs r2, r2, #64
stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
@@ -73,11 +73,11 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r4-r8, lr} )
mov r4, r1
- mov r5, r1
+ mov r5, r3
mov r6, r1
- mov r7, r1
+ mov r7, r3
mov r8, r1
- mov lr, r1
+ mov lr, r3
cmp r2, #96
tstgt ip, #31
@@ -114,12 +114,13 @@ UNWIND( .fnstart )
tst r2, #4
strne r1, [ip], #4
/*
- * When we get here, we've got less than 4 bytes to zero. We
+ * When we get here, we've got less than 4 bytes to set. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
+ movne r3, r1, lsr #8 @ the top half of a 16-bit pattern
strneb r1, [ip], #1
- strneb r1, [ip], #1
+ strneb r3, [ip], #1
tst r2, #1
strneb r1, [ip], #1
ret lr
@@ -135,3 +136,28 @@ UNWIND( .fnstart )
UNWIND( .fnend )
ENDPROC(memset)
ENDPROC(mmioset)
+
+ENTRY(__memset16)
+UNWIND( .fnstart )
+ tst r0, #2 @ pointer unaligned?
+ mov ip, r0 @ preserve r0 as return value
+ beq 1b @ jump into the middle of memset
+ subs r2, r2, #2 @ cope with n == 0
+ movge r3, r1, lsr #8 @ r3 = r1 >> 8
+ strgeb r1, [ip], #1 @ *ip = r1
+ strgeb r3, [ip], #1 @ *ip = r3
+ bgt 1b @ back into memset if n > 0
+ ret lr @ otherwise return
+UNWIND( .fnend )
+ENDPROC(__memset16)
+ENTRY(__memset32)
+UNWIND( .fnstart )
+ mov r3, r1 @ copy r1 to r3 and fall into memset64
+UNWIND( .fnend )
+ENDPROC(__memset32)
+ENTRY(__memset64)
+UNWIND( .fnstart )
+ mov ip, r0 @ preserve r0 as return value
+ b 7b @ jump into the middle of memset
+UNWIND( .fnend )
+ENDPROC(__memset64)
--
2.11.0
WARNING: multiple messages have this Message-ID (diff)
From: Matthew Wilcox <willy@infradead.org>
To: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org, linux-mips@linux-mips.org,
linux-fbdev@vger.kernel.org,
Matthew Wilcox <mawilcox@microsoft.com>,
x86@kernel.org, Minchan Kim <minchan@kernel.org>,
linux-alpha@vger.kernel.org, sparclinux@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
Subject: [PATCH v3 2/7] ARM: Implement memset16, memset32 & memset64
Date: Fri, 24 Mar 2017 09:13:13 -0700 [thread overview]
Message-ID: <20170324161318.18718-3-willy@infradead.org> (raw)
In-Reply-To: <20170324161318.18718-1-willy@infradead.org>
From: Matthew Wilcox <mawilcox@microsoft.com>
ARM is only 32-bit, so it doesn't really need a memset64, but it was
essentially free to add it to the existing implementation.
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
---
arch/arm/include/asm/string.h | 21 +++++++++++++++++++++
arch/arm/kernel/armksyms.c | 3 +++
arch/arm/lib/memset.S | 44 ++++++++++++++++++++++++++++++++++---------
3 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index cf4f3aad0fc1..bc7a1be7a76a 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -24,6 +24,27 @@ extern void * memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void * memset(void *, int, __kernel_size_t);
+#define __HAVE_ARCH_MEMSET16
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+ return __memset16(p, v, n * 2);
+}
+
+#define __HAVE_ARCH_MEMSET32
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+ return __memset32(p, v, n * 4);
+}
+
+#define __HAVE_ARCH_MEMSET64
+extern void *__memset64(uint64_t *, uint32_t low, __kernel_size_t, uint32_t hi);
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+ return __memset64(p, v, n * 8, v >> 32);
+}
+
extern void __memzero(void *ptr, __kernel_size_t n);
#define memset(p,v,n) \
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 8e8d20cdbce7..633341ed0713 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -87,6 +87,9 @@ EXPORT_SYMBOL(__raw_writesl);
EXPORT_SYMBOL(strchr);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__memset16);
+EXPORT_SYMBOL(__memset32);
+EXPORT_SYMBOL(__memset64);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 3c65e3bd790f..9adc9bdf3ffb 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -21,14 +21,14 @@ ENTRY(memset)
UNWIND( .fnstart )
ands r3, r0, #3 @ 1 unaligned?
mov ip, r0 @ preserve r0 as return value
+ orr r1, r1, r1, lsl #8
bne 6f @ 1
/*
* we know that the pointer in ip is aligned to a word boundary.
*/
-1: orr r1, r1, r1, lsl #8
- orr r1, r1, r1, lsl #16
+1: orr r1, r1, r1, lsl #16
mov r3, r1
- cmp r2, #16
+7: cmp r2, #16
blt 4f
#if ! CALGN(1)+0
@@ -41,7 +41,7 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r8, lr} )
mov r8, r1
- mov lr, r1
+ mov lr, r3
2: subs r2, r2, #64
stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
@@ -73,11 +73,11 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r4-r8, lr} )
mov r4, r1
- mov r5, r1
+ mov r5, r3
mov r6, r1
- mov r7, r1
+ mov r7, r3
mov r8, r1
- mov lr, r1
+ mov lr, r3
cmp r2, #96
tstgt ip, #31
@@ -114,12 +114,13 @@ UNWIND( .fnstart )
tst r2, #4
strne r1, [ip], #4
/*
- * When we get here, we've got less than 4 bytes to zero. We
+ * When we get here, we've got less than 4 bytes to set. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
+ movne r3, r1, lsr #8 @ the top half of a 16-bit pattern
strneb r1, [ip], #1
- strneb r1, [ip], #1
+ strneb r3, [ip], #1
tst r2, #1
strneb r1, [ip], #1
ret lr
@@ -135,3 +136,28 @@ UNWIND( .fnstart )
UNWIND( .fnend )
ENDPROC(memset)
ENDPROC(mmioset)
+
+ENTRY(__memset16)
+UNWIND( .fnstart )
+ tst r0, #2 @ pointer unaligned?
+ mov ip, r0 @ preserve r0 as return value
+ beq 1b @ jump into the middle of memset
+ subs r2, r2, #2 @ cope with n == 0
+ movge r3, r1, lsr #8 @ r3 = r1 >> 8
+ strgeb r1, [ip], #1 @ *ip = r1
+ strgeb r3, [ip], #1 @ *ip = r3
+ bgt 1b @ back into memset if n > 0
+ ret lr @ otherwise return
+UNWIND( .fnend )
+ENDPROC(__memset16)
+ENTRY(__memset32)
+UNWIND( .fnstart )
+ mov r3, r1 @ copy r1 to r3 and fall into memset64
+UNWIND( .fnend )
+ENDPROC(__memset32)
+ENTRY(__memset64)
+UNWIND( .fnstart )
+ mov ip, r0 @ preserve r0 as return value
+ b 7b @ jump into the middle of memset
+UNWIND( .fnend )
+ENDPROC(__memset64)
--
2.11.0
WARNING: multiple messages have this Message-ID (diff)
From: Matthew Wilcox <willy@infradead.org>
To: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org, linux-mips@linux-mips.org,
linux-fbdev@vger.kernel.org,
Matthew Wilcox <mawilcox@microsoft.com>,
x86@kernel.org, Minchan Kim <minchan@kernel.org>,
linux-alpha@vger.kernel.org, sparclinux@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
Subject: [PATCH v3 2/7] ARM: Implement memset16, memset32 & memset64
Date: Fri, 24 Mar 2017 16:13:13 +0000 [thread overview]
Message-ID: <20170324161318.18718-3-willy@infradead.org> (raw)
In-Reply-To: <20170324161318.18718-1-willy@infradead.org>
From: Matthew Wilcox <mawilcox@microsoft.com>
ARM is only 32-bit, so it doesn't really need a memset64, but it was
essentially free to add it to the existing implementation.
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
---
arch/arm/include/asm/string.h | 21 +++++++++++++++++++++
arch/arm/kernel/armksyms.c | 3 +++
arch/arm/lib/memset.S | 44 ++++++++++++++++++++++++++++++++++---------
3 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index cf4f3aad0fc1..bc7a1be7a76a 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -24,6 +24,27 @@ extern void * memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void * memset(void *, int, __kernel_size_t);
+#define __HAVE_ARCH_MEMSET16
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+ return __memset16(p, v, n * 2);
+}
+
+#define __HAVE_ARCH_MEMSET32
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+ return __memset32(p, v, n * 4);
+}
+
+#define __HAVE_ARCH_MEMSET64
+extern void *__memset64(uint64_t *, uint32_t low, __kernel_size_t, uint32_t hi);
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+ return __memset64(p, v, n * 8, v >> 32);
+}
+
extern void __memzero(void *ptr, __kernel_size_t n);
#define memset(p,v,n) \
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 8e8d20cdbce7..633341ed0713 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -87,6 +87,9 @@ EXPORT_SYMBOL(__raw_writesl);
EXPORT_SYMBOL(strchr);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__memset16);
+EXPORT_SYMBOL(__memset32);
+EXPORT_SYMBOL(__memset64);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 3c65e3bd790f..9adc9bdf3ffb 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -21,14 +21,14 @@ ENTRY(memset)
UNWIND( .fnstart )
ands r3, r0, #3 @ 1 unaligned?
mov ip, r0 @ preserve r0 as return value
+ orr r1, r1, r1, lsl #8
bne 6f @ 1
/*
* we know that the pointer in ip is aligned to a word boundary.
*/
-1: orr r1, r1, r1, lsl #8
- orr r1, r1, r1, lsl #16
+1: orr r1, r1, r1, lsl #16
mov r3, r1
- cmp r2, #16
+7: cmp r2, #16
blt 4f
#if ! CALGN(1)+0
@@ -41,7 +41,7 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r8, lr} )
mov r8, r1
- mov lr, r1
+ mov lr, r3
2: subs r2, r2, #64
stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
@@ -73,11 +73,11 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r4-r8, lr} )
mov r4, r1
- mov r5, r1
+ mov r5, r3
mov r6, r1
- mov r7, r1
+ mov r7, r3
mov r8, r1
- mov lr, r1
+ mov lr, r3
cmp r2, #96
tstgt ip, #31
@@ -114,12 +114,13 @@ UNWIND( .fnstart )
tst r2, #4
strne r1, [ip], #4
/*
- * When we get here, we've got less than 4 bytes to zero. We
+ * When we get here, we've got less than 4 bytes to set. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
+ movne r3, r1, lsr #8 @ the top half of a 16-bit pattern
strneb r1, [ip], #1
- strneb r1, [ip], #1
+ strneb r3, [ip], #1
tst r2, #1
strneb r1, [ip], #1
ret lr
@@ -135,3 +136,28 @@ UNWIND( .fnstart )
UNWIND( .fnend )
ENDPROC(memset)
ENDPROC(mmioset)
+
+ENTRY(__memset16)
+UNWIND( .fnstart )
+ tst r0, #2 @ pointer unaligned?
+ mov ip, r0 @ preserve r0 as return value
+ beq 1b @ jump into the middle of memset
+ subs r2, r2, #2 @ cope with n = 0
+ movge r3, r1, lsr #8 @ r3 = r1 >> 8
+ strgeb r1, [ip], #1 @ *ip = r1
+ strgeb r3, [ip], #1 @ *ip = r3
+ bgt 1b @ back into memset if n > 0
+ ret lr @ otherwise return
+UNWIND( .fnend )
+ENDPROC(__memset16)
+ENTRY(__memset32)
+UNWIND( .fnstart )
+ mov r3, r1 @ copy r1 to r3 and fall into memset64
+UNWIND( .fnend )
+ENDPROC(__memset32)
+ENTRY(__memset64)
+UNWIND( .fnstart )
+ mov ip, r0 @ preserve r0 as return value
+ b 7b @ jump into the middle of memset
+UNWIND( .fnend )
+ENDPROC(__memset64)
--
2.11.0
WARNING: multiple messages have this Message-ID (diff)
From: willy@infradead.org (Matthew Wilcox)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v3 2/7] ARM: Implement memset16, memset32 & memset64
Date: Fri, 24 Mar 2017 09:13:13 -0700 [thread overview]
Message-ID: <20170324161318.18718-3-willy@infradead.org> (raw)
In-Reply-To: <20170324161318.18718-1-willy@infradead.org>
From: Matthew Wilcox <mawilcox@microsoft.com>
ARM is only 32-bit, so it doesn't really need a memset64, but it was
essentially free to add it to the existing implementation.
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
---
arch/arm/include/asm/string.h | 21 +++++++++++++++++++++
arch/arm/kernel/armksyms.c | 3 +++
arch/arm/lib/memset.S | 44 ++++++++++++++++++++++++++++++++++---------
3 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index cf4f3aad0fc1..bc7a1be7a76a 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -24,6 +24,27 @@ extern void * memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void * memset(void *, int, __kernel_size_t);
+#define __HAVE_ARCH_MEMSET16
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+ return __memset16(p, v, n * 2);
+}
+
+#define __HAVE_ARCH_MEMSET32
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+ return __memset32(p, v, n * 4);
+}
+
+#define __HAVE_ARCH_MEMSET64
+extern void *__memset64(uint64_t *, uint32_t low, __kernel_size_t, uint32_t hi);
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+ return __memset64(p, v, n * 8, v >> 32);
+}
+
extern void __memzero(void *ptr, __kernel_size_t n);
#define memset(p,v,n) \
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 8e8d20cdbce7..633341ed0713 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -87,6 +87,9 @@ EXPORT_SYMBOL(__raw_writesl);
EXPORT_SYMBOL(strchr);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__memset16);
+EXPORT_SYMBOL(__memset32);
+EXPORT_SYMBOL(__memset64);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 3c65e3bd790f..9adc9bdf3ffb 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -21,14 +21,14 @@ ENTRY(memset)
UNWIND( .fnstart )
ands r3, r0, #3 @ 1 unaligned?
mov ip, r0 @ preserve r0 as return value
+ orr r1, r1, r1, lsl #8
bne 6f @ 1
/*
* we know that the pointer in ip is aligned to a word boundary.
*/
-1: orr r1, r1, r1, lsl #8
- orr r1, r1, r1, lsl #16
+1: orr r1, r1, r1, lsl #16
mov r3, r1
- cmp r2, #16
+7: cmp r2, #16
blt 4f
#if ! CALGN(1)+0
@@ -41,7 +41,7 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r8, lr} )
mov r8, r1
- mov lr, r1
+ mov lr, r3
2: subs r2, r2, #64
stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
@@ -73,11 +73,11 @@ UNWIND( .fnend )
UNWIND( .fnstart )
UNWIND( .save {r4-r8, lr} )
mov r4, r1
- mov r5, r1
+ mov r5, r3
mov r6, r1
- mov r7, r1
+ mov r7, r3
mov r8, r1
- mov lr, r1
+ mov lr, r3
cmp r2, #96
tstgt ip, #31
@@ -114,12 +114,13 @@ UNWIND( .fnstart )
tst r2, #4
strne r1, [ip], #4
/*
- * When we get here, we've got less than 4 bytes to zero. We
+ * When we get here, we've got less than 4 bytes to set. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
+ movne r3, r1, lsr #8 @ the top half of a 16-bit pattern
strneb r1, [ip], #1
- strneb r1, [ip], #1
+ strneb r3, [ip], #1
tst r2, #1
strneb r1, [ip], #1
ret lr
@@ -135,3 +136,28 @@ UNWIND( .fnstart )
UNWIND( .fnend )
ENDPROC(memset)
ENDPROC(mmioset)
+
+ENTRY(__memset16)
+UNWIND( .fnstart )
+ tst r0, #2 @ pointer unaligned?
+ mov ip, r0 @ preserve r0 as return value
+ beq 1b @ jump into the middle of memset
+ subs r2, r2, #2 @ cope with n == 0
+ movge r3, r1, lsr #8 @ r3 = r1 >> 8
+ strgeb r1, [ip], #1 @ *ip = r1
+ strgeb r3, [ip], #1 @ *ip = r3
+ bgt 1b @ back into memset if n > 0
+ ret lr @ otherwise return
+UNWIND( .fnend )
+ENDPROC(__memset16)
+ENTRY(__memset32)
+UNWIND( .fnstart )
+ mov r3, r1 @ copy r1 to r3 and fall into memset64
+UNWIND( .fnend )
+ENDPROC(__memset32)
+ENTRY(__memset64)
+UNWIND( .fnstart )
+ mov ip, r0 @ preserve r0 as return value
+ b 7b @ jump into the middle of memset
+UNWIND( .fnend )
+ENDPROC(__memset64)
--
2.11.0
next prev parent reply other threads:[~2017-03-24 16:14 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-03-24 16:13 [PATCH v3 0/7] Add memsetN functions Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` [PATCH v3 1/7] Add multibyte memset functions Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox [this message]
2017-03-24 16:13 ` [PATCH v3 2/7] ARM: Implement memset16, memset32 & memset64 Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` [PATCH v3 3/7] x86: " Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-26 7:44 ` kbuild test robot
2017-03-26 7:44 ` kbuild test robot
2017-03-26 7:44 ` kbuild test robot
2017-03-26 7:44 ` kbuild test robot
2017-03-26 7:44 ` kbuild test robot
2017-03-24 16:13 ` [PATCH v3 4/7] alpha: Add support for memset16 Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-26 7:28 ` kbuild test robot
2017-03-26 7:28 ` kbuild test robot
2017-03-26 7:28 ` kbuild test robot
2017-03-26 7:28 ` kbuild test robot
2017-03-24 16:13 ` [PATCH v3 5/7] zram: Convert to using memset_l Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-27 5:01 ` Minchan Kim
2017-03-27 5:01 ` Minchan Kim
2017-03-27 5:01 ` Minchan Kim
2017-03-27 5:01 ` Minchan Kim
2017-03-24 16:13 ` [PATCH v3 6/7] sym53c8xx_2: Convert to use memset32 Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` [PATCH v3 7/7] vga: Optimise console scrolling Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-24 16:13 ` Matthew Wilcox
2017-03-26 8:45 ` kbuild test robot
2017-03-26 8:45 ` kbuild test robot
2017-03-26 8:45 ` kbuild test robot
2017-03-26 8:45 ` kbuild test robot
2017-03-26 8:45 ` kbuild test robot
2017-03-26 9:53 ` kbuild test robot
2017-03-26 9:53 ` kbuild test robot
2017-03-26 9:53 ` kbuild test robot
2017-03-26 9:53 ` kbuild test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170324161318.18718-3-willy@infradead.org \
--to=willy@infradead.org \
--cc=linux-alpha@vger.kernel.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-fbdev@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mips@linux-mips.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mawilcox@microsoft.com \
--cc=minchan@kernel.org \
--cc=sparclinux@vger.kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.