All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/5] sparc64: Optimize fls and __fls
@ 2017-10-11 18:50 ` Vijay Kumar
  0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.

v3->v4:
 -  Fixed a typo.
v2->v3:
 -  Using ENTRY(), ENDPROC() for assembler functions.
 -  Removed BITS_PER_LONG from __fls.
 -  Using generic fls64().
 -  Replaced lzcnt instruction with .word directive.
v1->v2:
 - Fixed delay slot issue.

Vijay Kumar (5):
  sparc64: Define SPARC default fls function
  sparc64: Define SPARC default __fls function
  sparc64: SPARC optimized fls function
  sparc64: SPARC optimized __fls function
  sparc64: Use sparc optimized fls and __fls for T4 and above

 arch/sparc/include/asm/bitops_64.h |    5 ++-
 arch/sparc/kernel/head_64.S        |    2 +
 arch/sparc/lib/Makefile            |    3 ++
 arch/sparc/lib/NG4fls.S            |   30 ++++++++++++++++
 arch/sparc/lib/NG4patch.S          |    9 +++++
 arch/sparc/lib/fls.S               |   67 ++++++++++++++++++++++++++++++++++++
 arch/sparc/lib/fls64.S             |   61 ++++++++++++++++++++++++++++++++
 7 files changed, 175 insertions(+), 2 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S
 create mode 100644 arch/sparc/lib/fls64.S

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 0/5] sparc64: Optimize fls and __fls
@ 2017-10-11 18:50 ` Vijay Kumar
  0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.

v3->v4:
 -  Fixed a typo.
v2->v3:
 -  Using ENTRY(), ENDPROC() for assembler functions.
 -  Removed BITS_PER_LONG from __fls.
 -  Using generic fls64().
 -  Replaced lzcnt instruction with .word directive.
v1->v2:
 - Fixed delay slot issue.

Vijay Kumar (5):
  sparc64: Define SPARC default fls function
  sparc64: Define SPARC default __fls function
  sparc64: SPARC optimized fls function
  sparc64: SPARC optimized __fls function
  sparc64: Use sparc optimized fls and __fls for T4 and above

 arch/sparc/include/asm/bitops_64.h |    5 ++-
 arch/sparc/kernel/head_64.S        |    2 +
 arch/sparc/lib/Makefile            |    3 ++
 arch/sparc/lib/NG4fls.S            |   30 ++++++++++++++++
 arch/sparc/lib/NG4patch.S          |    9 +++++
 arch/sparc/lib/fls.S               |   67 ++++++++++++++++++++++++++++++++++++
 arch/sparc/lib/fls64.S             |   61 ++++++++++++++++++++++++++++++++
 7 files changed, 175 insertions(+), 2 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S
 create mode 100644 arch/sparc/lib/fls64.S


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4 1/5] sparc64: Define SPARC default fls function
  2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50   ` Vijay Kumar
  -1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |    3 +-
 arch/sparc/lib/Makefile            |    1 +
 arch/sparc/lib/fls.S               |   67 ++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 2d52240..30aea56 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,9 +22,10 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+int fls(unsigned int word);
+
 #include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39..3b9f5e0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 0000000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+ENTRY(fls)
+	brz,pn	%o0, 6f
+	 mov	0, %o1
+	sethi	%hi(0xffff0000), %g3
+	mov	%o0, %g2
+	andcc	%o0, %g3, %g0
+	be,pt	%icc, 8f
+	 mov	32, %o1
+	sethi	%hi(0xff000000), %g3
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 3f
+	 sethi	%hi(0xf0000000), %g3
+	sll	%o0, 8, %o0
+1:
+	add	%o1, -8, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+2:
+	sethi	%hi(0xf0000000), %g3
+3:
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 4f
+	 sethi	%hi(0xc0000000), %g3
+	sll	%o0, 4, %o0
+	add	%o1, -4, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+4:
+	andcc	%g2, %g3, %g0
+	be,a,pt	%icc, 7f
+	 sll	%o0, 2, %o0
+5:
+	xnor	%g0, %o0, %o0
+	srl	%o0, 31, %o0
+	sub	%o1, %o0, %o1
+6:
+	jmp	%o7 + 8
+	 sra	%o1, 0, %o0
+7:
+	add	%o1, -2, %o1
+	ba,pt	%xcc, 5b
+	 sra	%o0, 0, %o0
+8:
+	sll	%o0, 16, %o0
+	sethi	%hi(0xff000000), %g3
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 2b
+	 mov	16, %o1
+	ba,pt	%xcc, 1b
+	 sll	%o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 1/5] sparc64: Define SPARC default fls function
@ 2017-10-11 18:50   ` Vijay Kumar
  0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |    3 +-
 arch/sparc/lib/Makefile            |    1 +
 arch/sparc/lib/fls.S               |   67 ++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 2d52240..30aea56 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,9 +22,10 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+int fls(unsigned int word);
+
 #include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39..3b9f5e0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 0000000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+ENTRY(fls)
+	brz,pn	%o0, 6f
+	 mov	0, %o1
+	sethi	%hi(0xffff0000), %g3
+	mov	%o0, %g2
+	andcc	%o0, %g3, %g0
+	be,pt	%icc, 8f
+	 mov	32, %o1
+	sethi	%hi(0xff000000), %g3
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 3f
+	 sethi	%hi(0xf0000000), %g3
+	sll	%o0, 8, %o0
+1:
+	add	%o1, -8, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+2:
+	sethi	%hi(0xf0000000), %g3
+3:
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 4f
+	 sethi	%hi(0xc0000000), %g3
+	sll	%o0, 4, %o0
+	add	%o1, -4, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+4:
+	andcc	%g2, %g3, %g0
+	be,a,pt	%icc, 7f
+	 sll	%o0, 2, %o0
+5:
+	xnor	%g0, %o0, %o0
+	srl	%o0, 31, %o0
+	sub	%o1, %o0, %o1
+6:
+	jmp	%o7 + 8
+	 sra	%o1, 0, %o0
+7:
+	add	%o1, -2, %o1
+	ba,pt	%xcc, 5b
+	 sra	%o0, 0, %o0
+8:
+	sll	%o0, 16, %o0
+	sethi	%hi(0xff000000), %g3
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 2b
+	 mov	16, %o1
+	ba,pt	%xcc, 1b
+	 sll	%o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 2/5] sparc64: Define SPARC default __fls function
  2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50   ` Vijay Kumar
  -1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

__fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |    2 +-
 arch/sparc/lib/Makefile            |    1 +
 arch/sparc/lib/fls64.S             |   61 ++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 30aea56..d7a46e2 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,10 @@
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
 int fls(unsigned int word);
+int __fls(unsigned long word);
 
 #include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 3b9f5e0..5380c59 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 0000000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+ENTRY(__fls)
+	mov	-1, %g2
+	sllx	%g2, 32, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 1f
+	 mov	63, %g1
+	sllx	%o0, 32, %o0
+	mov	31, %g1
+1:
+	mov	-1, %g2
+	sllx	%g2, 48, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 2f
+	 mov	-1, %g2
+	sllx	%o0, 16, %o0
+	add	%g1, -16, %g1
+2:
+	mov	-1, %g2
+	sllx	%g2, 56, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 3f
+	 mov	-1, %g2
+	sllx	%o0, 8, %o0
+	add	%g1, -8, %g1
+3:
+	sllx	%g2, 60, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 4f
+	 mov	-1, %g2
+	sllx	%o0, 4, %o0
+	add	%g1, -4, %g1
+4:
+	sllx	%g2, 62, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 5f
+	 mov	-1, %g3
+	sllx	%o0, 2, %o0
+	add	%g1, -2, %g1
+5:
+	mov	0, %g2
+	sllx	%g3, 63, %g3
+	and	%o0, %g3, %o0
+	movre	%o0, 1, %g2
+	sub	%g1, %g2, %g1
+	jmp	%o7+8
+	 sra	%g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 2/5] sparc64: Define SPARC default __fls function
@ 2017-10-11 18:50   ` Vijay Kumar
  0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

__fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |    2 +-
 arch/sparc/lib/Makefile            |    1 +
 arch/sparc/lib/fls64.S             |   61 ++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 30aea56..d7a46e2 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,10 @@
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
 int fls(unsigned int word);
+int __fls(unsigned long word);
 
 #include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 3b9f5e0..5380c59 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 0000000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+ENTRY(__fls)
+	mov	-1, %g2
+	sllx	%g2, 32, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 1f
+	 mov	63, %g1
+	sllx	%o0, 32, %o0
+	mov	31, %g1
+1:
+	mov	-1, %g2
+	sllx	%g2, 48, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 2f
+	 mov	-1, %g2
+	sllx	%o0, 16, %o0
+	add	%g1, -16, %g1
+2:
+	mov	-1, %g2
+	sllx	%g2, 56, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 3f
+	 mov	-1, %g2
+	sllx	%o0, 8, %o0
+	add	%g1, -8, %g1
+3:
+	sllx	%g2, 60, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 4f
+	 mov	-1, %g2
+	sllx	%o0, 4, %o0
+	add	%g1, -4, %g1
+4:
+	sllx	%g2, 62, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 5f
+	 mov	-1, %g3
+	sllx	%o0, 2, %o0
+	add	%g1, -2, %g1
+5:
+	mov	0, %g2
+	sllx	%g3, 63, %g3
+	and	%o0, %g3, %o0
+	movre	%o0, 1, %g2
+	sub	%g1, %g2, %g1
+	jmp	%o7+8
+	 sra	%g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 3/5] sparc64: SPARC optimized fls function
  2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50   ` Vijay Kumar
  -1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

Defined SPARC optimized fls using lzcnt opcode.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/lib/Makefile |    1 +
 arch/sparc/lib/NG4fls.S |   20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5380c59..2823b8e 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
 lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 0000000..bc17b65
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,20 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define LZCNT_O0_G2	\
+	.word	0x85b002e8
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+ENTRY(NG4fls)
+	LZCNT_O0_G2	!lzcnt	%o0, %g2
+	mov	64, %g3
+	retl
+	 sub	%g3, %g2, %o0
+ENDPROC(NG4fls)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 3/5] sparc64: SPARC optimized fls function
@ 2017-10-11 18:50   ` Vijay Kumar
  0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

Defined SPARC optimized fls using lzcnt opcode.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/lib/Makefile |    1 +
 arch/sparc/lib/NG4fls.S |   20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5380c59..2823b8e 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
 lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 0000000..bc17b65
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,20 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define LZCNT_O0_G2	\
+	.word	0x85b002e8
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+ENTRY(NG4fls)
+	LZCNT_O0_G2	!lzcnt	%o0, %g2
+	mov	64, %g3
+	retl
+	 sub	%g3, %g2, %o0
+ENDPROC(NG4fls)
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 4/5] sparc64: SPARC optimized __fls function
  2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50   ` Vijay Kumar
  -1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

Defined SPARC optimized __fls using lzcnt opcode.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/lib/NG4fls.S |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
index bc17b65..2d0991e 100644
--- a/arch/sparc/lib/NG4fls.S
+++ b/arch/sparc/lib/NG4fls.S
@@ -18,3 +18,13 @@ ENTRY(NG4fls)
 	retl
 	 sub	%g3, %g2, %o0
 ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+	brz,pn	%o0, 1f
+	LZCNT_O0_G2	!lzcnt	%o0, %g2
+	mov	63, %g3
+	sub	%g3, %g2, %o0
+1:
+	retl
+	 nop
+ENDPROC(__NG4fls)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 4/5] sparc64: SPARC optimized __fls function
@ 2017-10-11 18:50   ` Vijay Kumar
  0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

Defined SPARC optimized __fls using lzcnt opcode.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/lib/NG4fls.S |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
index bc17b65..2d0991e 100644
--- a/arch/sparc/lib/NG4fls.S
+++ b/arch/sparc/lib/NG4fls.S
@@ -18,3 +18,13 @@ ENTRY(NG4fls)
 	retl
 	 sub	%g3, %g2, %o0
 ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+	brz,pn	%o0, 1f
+	LZCNT_O0_G2	!lzcnt	%o0, %g2
+	mov	63, %g3
+	sub	%g3, %g2, %o0
+1:
+	retl
+	 nop
+ENDPROC(__NG4fls)
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above
  2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50   ` Vijay Kumar
  -1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/kernel/head_64.S |    2 ++
 arch/sparc/lib/NG4patch.S   |    9 +++++++++
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd..f362ecb 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
 	 nop
 	call	niagara4_patch_pageops
 	 nop
+	call	niagara4_patch_fls
+	 nop
 
 	ba,a,pt	%xcc, 80f
 	 nop
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..da65a3e 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
  * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/linkage.h>
+
 #define BRANCH_ALWAYS	0x10680000
 #define NOP		0x01000000
 #define NG_DO_PATCH(OLD, NEW)	\
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
 	retl
 	 nop
 	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+	NG_DO_PATCH(fls, NG4fls)
+	NG_DO_PATCH(__fls, __NG4fls)
+	retl
+	 nop
+ENDPROC(niagara4_patch_fls)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above
@ 2017-10-11 18:50   ` Vijay Kumar
  0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
 arch/sparc/kernel/head_64.S |    2 ++
 arch/sparc/lib/NG4patch.S   |    9 +++++++++
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd..f362ecb 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
 	 nop
 	call	niagara4_patch_pageops
 	 nop
+	call	niagara4_patch_fls
+	 nop
 
 	ba,a,pt	%xcc, 80f
 	 nop
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..da65a3e 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
  * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/linkage.h>
+
 #define BRANCH_ALWAYS	0x10680000
 #define NOP		0x01000000
 #define NG_DO_PATCH(OLD, NEW)	\
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
 	retl
 	 nop
 	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+	NG_DO_PATCH(fls, NG4fls)
+	NG_DO_PATCH(__fls, __NG4fls)
+	retl
+	 nop
+ENDPROC(niagara4_patch_fls)
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 0/5] sparc64: Optimize fls and __fls
  2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-12 20:29   ` Babu Moger
  -1 siblings, 0 replies; 16+ messages in thread
From: Babu Moger @ 2017-10-12 20:29 UTC (permalink / raw)
  To: Vijay Kumar, davem
  Cc: linux-kernel, sparclinux, rob.gardner, sam, anthony.yznaga

Looked at all the patches. Looks good to me.

Reviewed-by: Babu Moger <babu.moger@oracle.com>

On 10/11/2017 1:50 PM, Vijay Kumar wrote:
> SPARC provides lzcnt instruction (with VIS3) which can be used to
> optimize fls, __fls and fls64 functions. For the systems that supports
> lzcnt instruction, we now do boot time patching to use sparc
> optimized fls, __fls and fls64 functions.
>
> v3->v4:
>   -  Fixed a typo.
> v2->v3:
>   -  Using ENTRY(), ENDPROC() for assembler functions.
>   -  Removed BITS_PER_LONG from __fls.
>   -  Using generic fls64().
>   -  Replaced lzcnt instruction with .word directive.
> v1->v2:
>   - Fixed delay slot issue.
>
> Vijay Kumar (5):
>    sparc64: Define SPARC default fls function
>    sparc64: Define SPARC default __fls function
>    sparc64: SPARC optimized fls function
>    sparc64: SPARC optimized __fls function
>    sparc64: Use sparc optimized fls and __fls for T4 and above
>
>   arch/sparc/include/asm/bitops_64.h |    5 ++-
>   arch/sparc/kernel/head_64.S        |    2 +
>   arch/sparc/lib/Makefile            |    3 ++
>   arch/sparc/lib/NG4fls.S            |   30 ++++++++++++++++
>   arch/sparc/lib/NG4patch.S          |    9 +++++
>   arch/sparc/lib/fls.S               |   67 ++++++++++++++++++++++++++++++++++++
>   arch/sparc/lib/fls64.S             |   61 ++++++++++++++++++++++++++++++++
>   7 files changed, 175 insertions(+), 2 deletions(-)
>   create mode 100644 arch/sparc/lib/NG4fls.S
>   create mode 100644 arch/sparc/lib/fls.S
>   create mode 100644 arch/sparc/lib/fls64.S
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 0/5] sparc64: Optimize fls and __fls
@ 2017-10-12 20:29   ` Babu Moger
  0 siblings, 0 replies; 16+ messages in thread
From: Babu Moger @ 2017-10-12 20:29 UTC (permalink / raw)
  To: Vijay Kumar, davem
  Cc: linux-kernel, sparclinux, rob.gardner, sam, anthony.yznaga

Looked at all the patches. Looks good to me.

Reviewed-by: Babu Moger <babu.moger@oracle.com>

On 10/11/2017 1:50 PM, Vijay Kumar wrote:
> SPARC provides lzcnt instruction (with VIS3) which can be used to
> optimize fls, __fls and fls64 functions. For the systems that supports
> lzcnt instruction, we now do boot time patching to use sparc
> optimized fls, __fls and fls64 functions.
>
> v3->v4:
>   -  Fixed a typo.
> v2->v3:
>   -  Using ENTRY(), ENDPROC() for assembler functions.
>   -  Removed BITS_PER_LONG from __fls.
>   -  Using generic fls64().
>   -  Replaced lzcnt instruction with .word directive.
> v1->v2:
>   - Fixed delay slot issue.
>
> Vijay Kumar (5):
>    sparc64: Define SPARC default fls function
>    sparc64: Define SPARC default __fls function
>    sparc64: SPARC optimized fls function
>    sparc64: SPARC optimized __fls function
>    sparc64: Use sparc optimized fls and __fls for T4 and above
>
>   arch/sparc/include/asm/bitops_64.h |    5 ++-
>   arch/sparc/kernel/head_64.S        |    2 +
>   arch/sparc/lib/Makefile            |    3 ++
>   arch/sparc/lib/NG4fls.S            |   30 ++++++++++++++++
>   arch/sparc/lib/NG4patch.S          |    9 +++++
>   arch/sparc/lib/fls.S               |   67 ++++++++++++++++++++++++++++++++++++
>   arch/sparc/lib/fls64.S             |   61 ++++++++++++++++++++++++++++++++
>   7 files changed, 175 insertions(+), 2 deletions(-)
>   create mode 100644 arch/sparc/lib/NG4fls.S
>   create mode 100644 arch/sparc/lib/fls.S
>   create mode 100644 arch/sparc/lib/fls64.S
>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 0/5] sparc64: Optimize fls and __fls
  2017-10-11 18:50 ` Vijay Kumar
@ 2017-11-15  5:27   ` David Miller
  -1 siblings, 0 replies; 16+ messages in thread
From: David Miller @ 2017-11-15  5:27 UTC (permalink / raw)
  To: vijay.ac.kumar
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

From: Vijay Kumar <vijay.ac.kumar@oracle.com>
Date: Wed, 11 Oct 2017 12:50:01 -0600

> SPARC provides lzcnt instruction (with VIS3) which can be used to
> optimize fls, __fls and fls64 functions. For the systems that supports
> lzcnt instruction, we now do boot time patching to use sparc
> optimized fls, __fls and fls64 functions.
> 
> v3->v4:
>  -  Fixed a typo.
> v2->v3:
>  -  Using ENTRY(), ENDPROC() for assembler functions.
>  -  Removed BITS_PER_LONG from __fls.
>  -  Using generic fls64().
>  -  Replaced lzcnt instruction with .word directive.
> v1->v2:
>  - Fixed delay slot issue.

Series applied, thank you.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v4 0/5] sparc64: Optimize fls and __fls
@ 2017-11-15  5:27   ` David Miller
  0 siblings, 0 replies; 16+ messages in thread
From: David Miller @ 2017-11-15  5:27 UTC (permalink / raw)
  To: vijay.ac.kumar
  Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga

From: Vijay Kumar <vijay.ac.kumar@oracle.com>
Date: Wed, 11 Oct 2017 12:50:01 -0600

> SPARC provides lzcnt instruction (with VIS3) which can be used to
> optimize fls, __fls and fls64 functions. For the systems that supports
> lzcnt instruction, we now do boot time patching to use sparc
> optimized fls, __fls and fls64 functions.
> 
> v3->v4:
>  -  Fixed a typo.
> v2->v3:
>  -  Using ENTRY(), ENDPROC() for assembler functions.
>  -  Removed BITS_PER_LONG from __fls.
>  -  Using generic fls64().
>  -  Replaced lzcnt instruction with .word directive.
> v1->v2:
>  - Fixed delay slot issue.

Series applied, thank you.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2017-11-15  5:27 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-11 18:50 [PATCH v4 0/5] sparc64: Optimize fls and __fls Vijay Kumar
2017-10-11 18:50 ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 1/5] sparc64: Define SPARC default fls function Vijay Kumar
2017-10-11 18:50   ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 2/5] sparc64: Define SPARC default __fls function Vijay Kumar
2017-10-11 18:50   ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 3/5] sparc64: SPARC optimized fls function Vijay Kumar
2017-10-11 18:50   ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 4/5] sparc64: SPARC optimized __fls function Vijay Kumar
2017-10-11 18:50   ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above Vijay Kumar
2017-10-11 18:50   ` Vijay Kumar
2017-10-12 20:29 ` [PATCH v4 0/5] sparc64: Optimize fls and __fls Babu Moger
2017-10-12 20:29   ` Babu Moger
2017-11-15  5:27 ` David Miller
2017-11-15  5:27   ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.