All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] riscv:lib: optimize memcmp with ld insn
@ 2022-08-31 13:07 Yipeng Zou
  2022-08-31 13:16 ` Conor.Dooley
  0 siblings, 1 reply; 5+ messages in thread
From: Yipeng Zou @ 2022-08-31 13:07 UTC (permalink / raw)
  To: linux-riscv, paul.walmsley, palmer, aou, Conor.Dooley
  Cc: zouyipeng, liaochang1, chris.zjh

Currently memcmp was implemented in c code(lib/string.c), which compare
memory per byte.

This patch use ld insn compare memory per word to improve. From the test
Results, this will take several times optimized.

Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
---
 arch/riscv/include/asm/string.h |  3 ++
 arch/riscv/lib/Makefile         |  1 +
 arch/riscv/lib/memcmp.S         | 59 +++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)
 create mode 100644 arch/riscv/lib/memcmp.S

diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 909049366555..3337b43d3803 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
 #define __HAVE_ARCH_MEMMOVE
 extern asmlinkage void *memmove(void *, const void *, size_t);
 extern asmlinkage void *__memmove(void *, const void *, size_t);
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *, const void *, size_t);
+
 /* For those files which don't want to check by kasan. */
 #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 25d5c9664e57..70773bf0c471 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -3,6 +3,7 @@ lib-y			+= delay.o
 lib-y			+= memcpy.o
 lib-y			+= memset.o
 lib-y			+= memmove.o
+lib-y			+= memcmp.o
 lib-$(CONFIG_MMU)	+= uaccess.o
 lib-$(CONFIG_64BIT)	+= tishift.o
 
diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
new file mode 100644
index 000000000000..83af1c433e6f
--- /dev/null
+++ b/arch/riscv/lib/memcmp.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 zouyipeng@huawei.com
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+/* argrments:
+* a0: addr0
+* a1: addr1
+* a2: size
+*/
+#define addr0	a0
+#define addr1	a1
+#define limit	a2
+
+#define data0	a3
+#define data1	a4
+#define tmp	t3
+#define aaddr	t4
+#define return	a0
+
+/* load and compare */
+.macro LD_CMP op d0 d1 a0 a1 offset
+	\op \d0, 0(\a0)
+	\op \d1, 0(\a1)
+	addi \a0, \a0, \offset
+	addi \a1, \a1, \offset
+	sub tmp, \d0, \d1
+.endm
+
+ENTRY(memcmp)
+	/* test limit aligend with SZREG */
+	andi tmp, limit, SZREG - 1
+	/* load tail */
+	add aaddr, addr0, limit
+	sub aaddr, aaddr, tmp
+	add limit, addr0, limit
+
+.LloopWord:
+	sltu tmp, addr0, aaddr
+	beqz tmp, .LloopByte
+
+	LD_CMP REG_L data0 data1 addr0 addr1 SZREG
+	beqz tmp, .LloopWord
+	j .Lreturn
+
+.LloopByte:
+	sltu tmp, addr0, limit
+	beqz tmp, .Lreturn
+
+	LD_CMP lbu data0 data1 addr0 addr1 1
+	beqz tmp, .LloopByte
+.Lreturn:
+	mv return, tmp
+	ret
+END(memcmp)
+EXPORT_SYMBOL(memcmp);
-- 
2.17.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] riscv:lib: optimize memcmp with ld insn
  2022-08-31 13:07 [PATCH] riscv:lib: optimize memcmp with ld insn Yipeng Zou
@ 2022-08-31 13:16 ` Conor.Dooley
  2022-09-01 13:53   ` Yipeng Zou
  0 siblings, 1 reply; 5+ messages in thread
From: Conor.Dooley @ 2022-08-31 13:16 UTC (permalink / raw)
  To: zouyipeng, linux-riscv, paul.walmsley, palmer, aou; +Cc: liaochang1, chris.zjh

On 31/08/2022 14:07, Yipeng Zou wrote:
> riscv:lib: optimize memcmp with ld insn

Minor nit: "riscv: lib:

> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> Currently memcmp was implemented in c code(lib/string.c), which compare
> memory per byte.
> 
> This patch use ld insn compare memory per word to improve. From the test
> Results, this will take several times optimized.

Hey Yipeng,
Could you share some more information about the tests you did?
The test results showing the % improvement would be nice :)
Thanks,
Conor.

> 
> Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
> ---
>   arch/riscv/include/asm/string.h |  3 ++
>   arch/riscv/lib/Makefile         |  1 +
>   arch/riscv/lib/memcmp.S         | 59 +++++++++++++++++++++++++++++++++
>   3 files changed, 63 insertions(+)
>   create mode 100644 arch/riscv/lib/memcmp.S
> 
> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
> index 909049366555..3337b43d3803 100644
> --- a/arch/riscv/include/asm/string.h
> +++ b/arch/riscv/include/asm/string.h
> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>   #define __HAVE_ARCH_MEMMOVE
>   extern asmlinkage void *memmove(void *, const void *, size_t);
>   extern asmlinkage void *__memmove(void *, const void *, size_t);
> +#define __HAVE_ARCH_MEMCMP
> +extern int memcmp(const void *, const void *, size_t);
> +
>   /* For those files which don't want to check by kasan. */
>   #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>   #define memcpy(dst, src, len) __memcpy(dst, src, len)
> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
> index 25d5c9664e57..70773bf0c471 100644
> --- a/arch/riscv/lib/Makefile
> +++ b/arch/riscv/lib/Makefile
> @@ -3,6 +3,7 @@ lib-y                   += delay.o
>   lib-y                  += memcpy.o
>   lib-y                  += memset.o
>   lib-y                  += memmove.o
> +lib-y                  += memcmp.o
>   lib-$(CONFIG_MMU)      += uaccess.o
>   lib-$(CONFIG_64BIT)    += tishift.o
> 
> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
> new file mode 100644
> index 000000000000..83af1c433e6f
> --- /dev/null
> +++ b/arch/riscv/lib/memcmp.S
> @@ -0,0 +1,59 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2022 zouyipeng@huawei.com
> + */
> +#include <linux/linkage.h>
> +#include <asm-generic/export.h>
> +#include <asm/asm.h>
> +
> +/* argrments:
> +* a0: addr0
> +* a1: addr1
> +* a2: size
> +*/
> +#define addr0  a0
> +#define addr1  a1
> +#define limit  a2
> +
> +#define data0  a3
> +#define data1  a4
> +#define tmp    t3
> +#define aaddr  t4
> +#define return a0
> +
> +/* load and compare */
> +.macro LD_CMP op d0 d1 a0 a1 offset
> +       \op \d0, 0(\a0)
> +       \op \d1, 0(\a1)
> +       addi \a0, \a0, \offset
> +       addi \a1, \a1, \offset
> +       sub tmp, \d0, \d1
> +.endm
> +
> +ENTRY(memcmp)
> +       /* test limit aligend with SZREG */
> +       andi tmp, limit, SZREG - 1
> +       /* load tail */
> +       add aaddr, addr0, limit
> +       sub aaddr, aaddr, tmp
> +       add limit, addr0, limit
> +
> +.LloopWord:
> +       sltu tmp, addr0, aaddr
> +       beqz tmp, .LloopByte
> +
> +       LD_CMP REG_L data0 data1 addr0 addr1 SZREG
> +       beqz tmp, .LloopWord
> +       j .Lreturn
> +
> +.LloopByte:
> +       sltu tmp, addr0, limit
> +       beqz tmp, .Lreturn
> +
> +       LD_CMP lbu data0 data1 addr0 addr1 1
> +       beqz tmp, .LloopByte
> +.Lreturn:
> +       mv return, tmp
> +       ret
> +END(memcmp)
> +EXPORT_SYMBOL(memcmp);
> --
> 2.17.1
> 

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] riscv:lib: optimize memcmp with ld insn
  2022-08-31 13:16 ` Conor.Dooley
@ 2022-09-01 13:53   ` Yipeng Zou
  2022-09-01 15:40     ` Conor.Dooley
  0 siblings, 1 reply; 5+ messages in thread
From: Yipeng Zou @ 2022-09-01 13:53 UTC (permalink / raw)
  To: Conor.Dooley, linux-riscv, paul.walmsley, palmer, aou
  Cc: liaochang1, chris.zjh


在 2022/8/31 21:16, Conor.Dooley@microchip.com 写道:
> On 31/08/2022 14:07, Yipeng Zou wrote:
>> riscv:lib: optimize memcmp with ld insn
> Minor nit: "riscv: lib:
ok
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>
>> Currently memcmp was implemented in c code(lib/string.c), which compare
>> memory per byte.
>>
>> This patch use ld insn compare memory per word to improve. From the test
>> Results, this will take several times optimized.
> Hey Yipeng,
> Could you share some more information about the tests you did?
> The test results showing the % improvement would be nice :)
> Thanks,
> Conor.

Ofcourse, But My board was not ready, So i just test this patch on qemu 
RV64 & RV32 .

Alloc 8,4,1KB buffer to compare, each loop 10k times.

Size(B)

	

8k

	

4k

	

1k

Min(ns)

	

40800

	

26500

	

15600

Min(ns)

	

16100

	

14200

	

12400

Size(B)

	

8k

	

4k

	

1k

AVG(ns)

	

46316

	

32302

	

17965

AVG(ns)

	

21281

	

16446

	

14316

>> Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
>> ---
>>    arch/riscv/include/asm/string.h |  3 ++
>>    arch/riscv/lib/Makefile         |  1 +
>>    arch/riscv/lib/memcmp.S         | 59 +++++++++++++++++++++++++++++++++
>>    3 files changed, 63 insertions(+)
>>    create mode 100644 arch/riscv/lib/memcmp.S
>>
>> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
>> index 909049366555..3337b43d3803 100644
>> --- a/arch/riscv/include/asm/string.h
>> +++ b/arch/riscv/include/asm/string.h
>> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>>    #define __HAVE_ARCH_MEMMOVE
>>    extern asmlinkage void *memmove(void *, const void *, size_t);
>>    extern asmlinkage void *__memmove(void *, const void *, size_t);
>> +#define __HAVE_ARCH_MEMCMP
>> +extern int memcmp(const void *, const void *, size_t);
>> +
>>    /* For those files which don't want to check by kasan. */
>>    #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>>    #define memcpy(dst, src, len) __memcpy(dst, src, len)
>> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
>> index 25d5c9664e57..70773bf0c471 100644
>> --- a/arch/riscv/lib/Makefile
>> +++ b/arch/riscv/lib/Makefile
>> @@ -3,6 +3,7 @@ lib-y                   += delay.o
>>    lib-y                  += memcpy.o
>>    lib-y                  += memset.o
>>    lib-y                  += memmove.o
>> +lib-y                  += memcmp.o
>>    lib-$(CONFIG_MMU)      += uaccess.o
>>    lib-$(CONFIG_64BIT)    += tishift.o
>>
>> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
>> new file mode 100644
>> index 000000000000..83af1c433e6f
>> --- /dev/null
>> +++ b/arch/riscv/lib/memcmp.S
>> @@ -0,0 +1,59 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (C) 2022 zouyipeng@huawei.com
>> + */
>> +#include <linux/linkage.h>
>> +#include <asm-generic/export.h>
>> +#include <asm/asm.h>
>> +
>> +/* argrments:
>> +* a0: addr0
>> +* a1: addr1
>> +* a2: size
>> +*/
>> +#define addr0  a0
>> +#define addr1  a1
>> +#define limit  a2
>> +
>> +#define data0  a3
>> +#define data1  a4
>> +#define tmp    t3
>> +#define aaddr  t4
>> +#define return a0
>> +
>> +/* load and compare */
>> +.macro LD_CMP op d0 d1 a0 a1 offset
>> +       \op \d0, 0(\a0)
>> +       \op \d1, 0(\a1)
>> +       addi \a0, \a0, \offset
>> +       addi \a1, \a1, \offset
>> +       sub tmp, \d0, \d1
>> +.endm
>> +
>> +ENTRY(memcmp)
>> +       /* test limit aligend with SZREG */
>> +       andi tmp, limit, SZREG - 1
>> +       /* load tail */
>> +       add aaddr, addr0, limit
>> +       sub aaddr, aaddr, tmp
>> +       add limit, addr0, limit
>> +
>> +.LloopWord:
>> +       sltu tmp, addr0, aaddr
>> +       beqz tmp, .LloopByte
>> +
>> +       LD_CMP REG_L data0 data1 addr0 addr1 SZREG
>> +       beqz tmp, .LloopWord
>> +       j .Lreturn
>> +
>> +.LloopByte:
>> +       sltu tmp, addr0, limit
>> +       beqz tmp, .Lreturn
>> +
>> +       LD_CMP lbu data0 data1 addr0 addr1 1
>> +       beqz tmp, .LloopByte
>> +.Lreturn:
>> +       mv return, tmp
>> +       ret
>> +END(memcmp)
>> +EXPORT_SYMBOL(memcmp);
>> --
>> 2.17.1
>>
-- 
Regards,
Yipeng Zou


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] riscv:lib: optimize memcmp with ld insn
  2022-09-01 13:53   ` Yipeng Zou
@ 2022-09-01 15:40     ` Conor.Dooley
  2022-09-02 10:32       ` Yipeng Zou
  0 siblings, 1 reply; 5+ messages in thread
From: Conor.Dooley @ 2022-09-01 15:40 UTC (permalink / raw)
  To: zouyipeng, linux-riscv, paul.walmsley, palmer, aou; +Cc: liaochang1, chris.zjh

On 01/09/2022 14:53, Yipeng Zou wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> 在 2022/8/31 21:16, Conor.Dooley@microchip.com 写道:
>> On 31/08/2022 14:07, Yipeng Zou wrote:
>>> riscv:lib: optimize memcmp with ld insn
>> Minor nit: "riscv: lib:
> ok
>>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>>
>>> Currently memcmp was implemented in c code(lib/string.c), which compare
>>> memory per byte.
>>>
>>> This patch use ld insn compare memory per word to improve. From the test
>>> Results, this will take several times optimized.
>> Hey Yipeng,
>> Could you share some more information about the tests you did?
>> The test results showing the % improvement would be nice :)
>> Thanks,
>> Conor.
> 
> Ofcourse, But My board was not ready, So i just test this patch on qemu
> RV64 & RV32 .
> 
> Alloc 8,4,1KB buffer to compare, each loop 10k times.

I fixed that up since it was fairly unreadable..

Size(B) Min(ns) AVG(ns) //before

8k      40800   46316
4k      26500   32302
1k      15600   17965

Size(B) Min(ns) AVG(ns) //after

8k      16100   21281
4k      14200   16446
1k      12400   14316


I think putting this into the commit message would be nice.

I am no whizz on these kinds of things, but with the commit message
fixed up:
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>

>>> Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
>>> ---
>>>    arch/riscv/include/asm/string.h |  3 ++
>>>    arch/riscv/lib/Makefile         |  1 +
>>>    arch/riscv/lib/memcmp.S         | 59 +++++++++++++++++++++++++++++++++
>>>    3 files changed, 63 insertions(+)
>>>    create mode 100644 arch/riscv/lib/memcmp.S
>>>
>>> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
>>> index 909049366555..3337b43d3803 100644
>>> --- a/arch/riscv/include/asm/string.h
>>> +++ b/arch/riscv/include/asm/string.h
>>> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>>>    #define __HAVE_ARCH_MEMMOVE
>>>    extern asmlinkage void *memmove(void *, const void *, size_t);
>>>    extern asmlinkage void *__memmove(void *, const void *, size_t);
>>> +#define __HAVE_ARCH_MEMCMP
>>> +extern int memcmp(const void *, const void *, size_t);
>>> +
>>>    /* For those files which don't want to check by kasan. */
>>>    #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>>>    #define memcpy(dst, src, len) __memcpy(dst, src, len)
>>> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
>>> index 25d5c9664e57..70773bf0c471 100644
>>> --- a/arch/riscv/lib/Makefile
>>> +++ b/arch/riscv/lib/Makefile
>>> @@ -3,6 +3,7 @@ lib-y                   += delay.o
>>>    lib-y                  += memcpy.o
>>>    lib-y                  += memset.o
>>>    lib-y                  += memmove.o
>>> +lib-y                  += memcmp.o
>>>    lib-$(CONFIG_MMU)      += uaccess.o
>>>    lib-$(CONFIG_64BIT)    += tishift.o
>>>
>>> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
>>> new file mode 100644
>>> index 000000000000..83af1c433e6f
>>> --- /dev/null
>>> +++ b/arch/riscv/lib/memcmp.S
>>> @@ -0,0 +1,59 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>> +/*
>>> + * Copyright (C) 2022 zouyipeng@huawei.com
>>> + */
>>> +#include <linux/linkage.h>
>>> +#include <asm-generic/export.h>
>>> +#include <asm/asm.h>
>>> +
>>> +/* argrments:
>>> +* a0: addr0
>>> +* a1: addr1
>>> +* a2: size
>>> +*/
>>> +#define addr0  a0
>>> +#define addr1  a1
>>> +#define limit  a2
>>> +
>>> +#define data0  a3
>>> +#define data1  a4
>>> +#define tmp    t3
>>> +#define aaddr  t4
>>> +#define return a0
>>> +
>>> +/* load and compare */
>>> +.macro LD_CMP op d0 d1 a0 a1 offset
>>> +       \op \d0, 0(\a0)
>>> +       \op \d1, 0(\a1)
>>> +       addi \a0, \a0, \offset
>>> +       addi \a1, \a1, \offset
>>> +       sub tmp, \d0, \d1
>>> +.endm
>>> +
>>> +ENTRY(memcmp)
>>> +       /* test limit aligend with SZREG */
>>> +       andi tmp, limit, SZREG - 1
>>> +       /* load tail */
>>> +       add aaddr, addr0, limit
>>> +       sub aaddr, aaddr, tmp
>>> +       add limit, addr0, limit
>>> +
>>> +.LloopWord:
>>> +       sltu tmp, addr0, aaddr
>>> +       beqz tmp, .LloopByte
>>> +
>>> +       LD_CMP REG_L data0 data1 addr0 addr1 SZREG
>>> +       beqz tmp, .LloopWord
>>> +       j .Lreturn
>>> +
>>> +.LloopByte:
>>> +       sltu tmp, addr0, limit
>>> +       beqz tmp, .Lreturn
>>> +
>>> +       LD_CMP lbu data0 data1 addr0 addr1 1
>>> +       beqz tmp, .LloopByte
>>> +.Lreturn:
>>> +       mv return, tmp
>>> +       ret
>>> +END(memcmp)
>>> +EXPORT_SYMBOL(memcmp);
>>> -- 
>>> 2.17.1
>>>
> -- 
> Regards,
> Yipeng Zou
> 

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] riscv:lib: optimize memcmp with ld insn
  2022-09-01 15:40     ` Conor.Dooley
@ 2022-09-02 10:32       ` Yipeng Zou
  0 siblings, 0 replies; 5+ messages in thread
From: Yipeng Zou @ 2022-09-02 10:32 UTC (permalink / raw)
  To: Conor.Dooley, linux-riscv, paul.walmsley, palmer, aou
  Cc: liaochang1, chris.zjh


在 2022/9/1 23:40, Conor.Dooley@microchip.com 写道:
> On 01/09/2022 14:53, Yipeng Zou wrote:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>
>> 在 2022/8/31 21:16, Conor.Dooley@microchip.com 写道:
>>> On 31/08/2022 14:07, Yipeng Zou wrote:
>>>> riscv:lib: optimize memcmp with ld insn
>>> Minor nit: "riscv: lib:
>> ok
>>>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>>>
>>>> Currently memcmp was implemented in c code(lib/string.c), which compare
>>>> memory per byte.
>>>>
>>>> This patch use ld insn compare memory per word to improve. From the test
>>>> Results, this will take several times optimized.
>>> Hey Yipeng,
>>> Could you share some more information about the tests you did?
>>> The test results showing the % improvement would be nice :)
>>> Thanks,
>>> Conor.
>> Ofcourse, But My board was not ready, So i just test this patch on qemu
>> RV64 & RV32 .
>>
>> Alloc 8,4,1KB buffer to compare, each loop 10k times.
> I fixed that up since it was fairly unreadable..
>
> Size(B) Min(ns) AVG(ns) //before
>
> 8k      40800   46316
> 4k      26500   32302
> 1k      15600   17965
>
> Size(B) Min(ns) AVG(ns) //after
>
> 8k      16100   21281
> 4k      14200   16446
> 1k      12400   14316
Sorry, the text format is broken and thank you very much for fix that up.
>
> I think putting this into the commit message would be nice.
>
> I am no whizz on these kinds of things, but with the commit message
> fixed up:
> Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Will putting this in v2 and thanks again.
>>>> Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
>>>> ---
>>>>     arch/riscv/include/asm/string.h |  3 ++
>>>>     arch/riscv/lib/Makefile         |  1 +
>>>>     arch/riscv/lib/memcmp.S         | 59 +++++++++++++++++++++++++++++++++
>>>>     3 files changed, 63 insertions(+)
>>>>     create mode 100644 arch/riscv/lib/memcmp.S
>>>>
>>>> diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
>>>> index 909049366555..3337b43d3803 100644
>>>> --- a/arch/riscv/include/asm/string.h
>>>> +++ b/arch/riscv/include/asm/string.h
>>>> @@ -18,6 +18,9 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
>>>>     #define __HAVE_ARCH_MEMMOVE
>>>>     extern asmlinkage void *memmove(void *, const void *, size_t);
>>>>     extern asmlinkage void *__memmove(void *, const void *, size_t);
>>>> +#define __HAVE_ARCH_MEMCMP
>>>> +extern int memcmp(const void *, const void *, size_t);
>>>> +
>>>>     /* For those files which don't want to check by kasan. */
>>>>     #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
>>>>     #define memcpy(dst, src, len) __memcpy(dst, src, len)
>>>> diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
>>>> index 25d5c9664e57..70773bf0c471 100644
>>>> --- a/arch/riscv/lib/Makefile
>>>> +++ b/arch/riscv/lib/Makefile
>>>> @@ -3,6 +3,7 @@ lib-y                   += delay.o
>>>>     lib-y                  += memcpy.o
>>>>     lib-y                  += memset.o
>>>>     lib-y                  += memmove.o
>>>> +lib-y                  += memcmp.o
>>>>     lib-$(CONFIG_MMU)      += uaccess.o
>>>>     lib-$(CONFIG_64BIT)    += tishift.o
>>>>
>>>> diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
>>>> new file mode 100644
>>>> index 000000000000..83af1c433e6f
>>>> --- /dev/null
>>>> +++ b/arch/riscv/lib/memcmp.S
>>>> @@ -0,0 +1,59 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>>> +/*
>>>> + * Copyright (C) 2022 zouyipeng@huawei.com
>>>> + */
>>>> +#include <linux/linkage.h>
>>>> +#include <asm-generic/export.h>
>>>> +#include <asm/asm.h>
>>>> +
>>>> +/* argrments:
>>>> +* a0: addr0
>>>> +* a1: addr1
>>>> +* a2: size
>>>> +*/
>>>> +#define addr0  a0
>>>> +#define addr1  a1
>>>> +#define limit  a2
>>>> +
>>>> +#define data0  a3
>>>> +#define data1  a4
>>>> +#define tmp    t3
>>>> +#define aaddr  t4
>>>> +#define return a0
>>>> +
>>>> +/* load and compare */
>>>> +.macro LD_CMP op d0 d1 a0 a1 offset
>>>> +       \op \d0, 0(\a0)
>>>> +       \op \d1, 0(\a1)
>>>> +       addi \a0, \a0, \offset
>>>> +       addi \a1, \a1, \offset
>>>> +       sub tmp, \d0, \d1
>>>> +.endm
>>>> +
>>>> +ENTRY(memcmp)
>>>> +       /* test limit aligend with SZREG */
>>>> +       andi tmp, limit, SZREG - 1
>>>> +       /* load tail */
>>>> +       add aaddr, addr0, limit
>>>> +       sub aaddr, aaddr, tmp
>>>> +       add limit, addr0, limit
>>>> +
>>>> +.LloopWord:
>>>> +       sltu tmp, addr0, aaddr
>>>> +       beqz tmp, .LloopByte
>>>> +
>>>> +       LD_CMP REG_L data0 data1 addr0 addr1 SZREG
>>>> +       beqz tmp, .LloopWord
>>>> +       j .Lreturn
>>>> +
>>>> +.LloopByte:
>>>> +       sltu tmp, addr0, limit
>>>> +       beqz tmp, .Lreturn
>>>> +
>>>> +       LD_CMP lbu data0 data1 addr0 addr1 1
>>>> +       beqz tmp, .LloopByte
>>>> +.Lreturn:
>>>> +       mv return, tmp
>>>> +       ret
>>>> +END(memcmp)
>>>> +EXPORT_SYMBOL(memcmp);
>>>> -- 
>>>> 2.17.1
>>>>
>> -- 
>> Regards,
>> Yipeng Zou
>>
-- 
Regards,
Yipeng Zou


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-09-02 10:32 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-31 13:07 [PATCH] riscv:lib: optimize memcmp with ld insn Yipeng Zou
2022-08-31 13:16 ` Conor.Dooley
2022-09-01 13:53   ` Yipeng Zou
2022-09-01 15:40     ` Conor.Dooley
2022-09-02 10:32       ` Yipeng Zou

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.