All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls
@ 2017-09-27 19:25 ` Vijay Kumar
  0 siblings, 0 replies; 20+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, fls64 and __fls functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, fls64 and __fls functions.

v1->v2: 
 - Fixed delay slot issue pointed by Rob Gardner in patch 2/2.

Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile                |    1 +
 arch/sparc/include/asm/bitops_64.h |    7 +-
 arch/sparc/kernel/head_64.S        |    2 +
 arch/sparc/lib/Makefile            |    4 +
 arch/sparc/lib/NG4fls.S            |   31 +++++++++
 arch/sparc/lib/NG4patch.S          |    9 +++
 arch/sparc/lib/fls.S               |  126 ++++++++++++++++++++++++++++++++++++
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls
@ 2017-09-27 19:25 ` Vijay Kumar
  0 siblings, 0 replies; 20+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, fls64 and __fls functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, fls64 and __fls functions.

v1->v2: 
 - Fixed delay slot issue pointed by Rob Gardner in patch 2/2.

Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile                |    1 +
 arch/sparc/include/asm/bitops_64.h |    7 +-
 arch/sparc/kernel/head_64.S        |    2 +
 arch/sparc/lib/Makefile            |    4 +
 arch/sparc/lib/NG4fls.S            |   31 +++++++++
 arch/sparc/lib/NG4patch.S          |    9 +++
 arch/sparc/lib/fls.S               |  126 ++++++++++++++++++++++++++++++++++++
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
  2017-09-27 19:25 ` Vijay Kumar
@ 2017-09-27 19:25   ` Vijay Kumar
  -1 siblings, 0 replies; 20+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

fls and __fls will now require boot time patching on T4
and above. Redefining these functions under arc/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |    7 +-
 arch/sparc/lib/Makefile            |    1 +
 arch/sparc/lib/fls.S               |  126 ++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 2d52240..946c236 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,11 +22,12 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+#define fls64(word)  (((word)?(__fls(word) + 1):0))
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
 #include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
-#include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..eefbb9c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 0000000..a19bff2
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,126 @@
+/* fls.S: SPARC default fls and __fls definitions.
+ *
+ * SPARC default fls and __fls definitions, which follows the same
+ * algorithm as in generic fls() and __fls(). These functions will
+ * be boot time patched on T4 and onward.
+ */
+
+#include <asm/bitsperlong.h>
+#include <asm/export.h>
+
+	.text
+	.align	32
+
+	.global	fls, __fls
+	.type	fls,	#function
+	.type	__fls,	#function
+
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+EXPORT_SYMBOL(__fls)
+EXPORT_SYMBOL(fls)
+
+fls:
+	brz,pn	%o0, 6f
+	 mov	0, %o1
+	sethi	%hi(0xffff0000), %g3
+	mov	%o0, %g2
+	andcc	%o0, %g3, %g0
+	be,pt	%icc, 8f
+	 mov	32, %o1
+	sethi	%hi(0xff000000), %g3
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 3f
+	 sethi	%hi(0xf0000000), %g3
+	sll	%o0, 8, %o0
+1:
+	add	%o1, -8, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+2:
+	sethi	%hi(0xf0000000), %g3
+3:
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 4f
+	 sethi	%hi(0xc0000000), %g3
+	sll	%o0, 4, %o0
+	add	%o1, -4, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+4:
+	andcc	%g2, %g3, %g0
+	be,a,pt	%icc, 7f
+	 sll	%o0, 2, %o0
+5:
+	xnor	%g0, %o0, %o0
+	srl	%o0, 31, %o0
+	sub	%o1, %o0, %o1
+6:
+	jmp	%o7 + 8
+	 sra	%o1, 0, %o0
+7:
+	add	%o1, -2, %o1
+	ba,pt	%xcc, 5b
+	 sra	%o0, 0, %o0
+8:
+	sll	%o0, 16, %o0
+	sethi	%hi(0xff000000), %g3
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 2b
+	 mov	16, %o1
+	ba,pt	%xcc, 1b
+	 sll	%o0, 8, %o0
+	.size	fls, .-fls
+
+__fls:
+#if BITS_PER_LONG == 64
+	mov	-1, %g2
+	sllx	%g2, 32, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 1f
+	 mov	63, %g1
+	sllx	%o0, 32, %o0
+#endif
+	mov	31, %g1
+1:
+	mov	-1, %g2
+	sllx	%g2, (BITS_PER_LONG-16), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 2f
+	 mov	-1, %g2
+	sllx	%o0, 16, %o0
+	add	%g1, -16, %g1
+2:
+	mov	-1, %g2
+	sllx	%g2, (BITS_PER_LONG-8), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 3f
+	 mov	-1, %g2
+	sllx	%o0, 8, %o0
+	add	%g1, -8, %g1
+3:
+	sllx	%g2, (BITS_PER_LONG-4), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 4f
+	 mov	-1, %g2
+	sllx	%o0, 4, %o0
+	add	%g1, -4, %g1
+4:
+	sllx	%g2, (BITS_PER_LONG-2), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 5f
+	 mov	-1, %g3
+	sllx	%o0, 2, %o0
+	add	%g1, -2, %g1
+5:
+	mov	0, %g2
+	sllx	%g3, (BITS_PER_LONG-1), %g3
+	and	%o0, %g3, %o0
+	movre	%o0, 1, %g2
+	sub	%g1, %g2, %g1
+	jmp	%o7+8
+	 sra	%g1, 0, %o0
+	.size	__fls, .-__fls
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
@ 2017-09-27 19:25   ` Vijay Kumar
  0 siblings, 0 replies; 20+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

fls and __fls will now require boot time patching on T4
and above. Redefining these functions under arc/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |    7 +-
 arch/sparc/lib/Makefile            |    1 +
 arch/sparc/lib/fls.S               |  126 ++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 2d52240..946c236 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,11 +22,12 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+#define fls64(word)  (((word)?(__fls(word) + 1):0))
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
 #include <asm-generic/bitops/non-atomic.h>
 
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
-#include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..eefbb9c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 0000000..a19bff2
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,126 @@
+/* fls.S: SPARC default fls and __fls definitions.
+ *
+ * SPARC default fls and __fls definitions, which follows the same
+ * algorithm as in generic fls() and __fls(). These functions will
+ * be boot time patched on T4 and onward.
+ */
+
+#include <asm/bitsperlong.h>
+#include <asm/export.h>
+
+	.text
+	.align	32
+
+	.global	fls, __fls
+	.type	fls,	#function
+	.type	__fls,	#function
+
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+EXPORT_SYMBOL(__fls)
+EXPORT_SYMBOL(fls)
+
+fls:
+	brz,pn	%o0, 6f
+	 mov	0, %o1
+	sethi	%hi(0xffff0000), %g3
+	mov	%o0, %g2
+	andcc	%o0, %g3, %g0
+	be,pt	%icc, 8f
+	 mov	32, %o1
+	sethi	%hi(0xff000000), %g3
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 3f
+	 sethi	%hi(0xf0000000), %g3
+	sll	%o0, 8, %o0
+1:
+	add	%o1, -8, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+2:
+	sethi	%hi(0xf0000000), %g3
+3:
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 4f
+	 sethi	%hi(0xc0000000), %g3
+	sll	%o0, 4, %o0
+	add	%o1, -4, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+4:
+	andcc	%g2, %g3, %g0
+	be,a,pt	%icc, 7f
+	 sll	%o0, 2, %o0
+5:
+	xnor	%g0, %o0, %o0
+	srl	%o0, 31, %o0
+	sub	%o1, %o0, %o1
+6:
+	jmp	%o7 + 8
+	 sra	%o1, 0, %o0
+7:
+	add	%o1, -2, %o1
+	ba,pt	%xcc, 5b
+	 sra	%o0, 0, %o0
+8:
+	sll	%o0, 16, %o0
+	sethi	%hi(0xff000000), %g3
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 2b
+	 mov	16, %o1
+	ba,pt	%xcc, 1b
+	 sll	%o0, 8, %o0
+	.size	fls, .-fls
+
+__fls:
+#if BITS_PER_LONG = 64
+	mov	-1, %g2
+	sllx	%g2, 32, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 1f
+	 mov	63, %g1
+	sllx	%o0, 32, %o0
+#endif
+	mov	31, %g1
+1:
+	mov	-1, %g2
+	sllx	%g2, (BITS_PER_LONG-16), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 2f
+	 mov	-1, %g2
+	sllx	%o0, 16, %o0
+	add	%g1, -16, %g1
+2:
+	mov	-1, %g2
+	sllx	%g2, (BITS_PER_LONG-8), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 3f
+	 mov	-1, %g2
+	sllx	%o0, 8, %o0
+	add	%g1, -8, %g1
+3:
+	sllx	%g2, (BITS_PER_LONG-4), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 4f
+	 mov	-1, %g2
+	sllx	%o0, 4, %o0
+	add	%g1, -4, %g1
+4:
+	sllx	%g2, (BITS_PER_LONG-2), %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 5f
+	 mov	-1, %g3
+	sllx	%o0, 2, %o0
+	add	%g1, -2, %g1
+5:
+	mov	0, %g2
+	sllx	%g3, (BITS_PER_LONG-1), %g3
+	and	%o0, %g3, %o0
+	movre	%o0, 1, %g2
+	sub	%g1, %g2, %g1
+	jmp	%o7+8
+	 sra	%g1, 0, %o0
+	.size	__fls, .-__fls
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:25 ` Vijay Kumar
@ 2017-09-27 19:25   ` Vijay Kumar
  -1 siblings, 0 replies; 20+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
---
 arch/sparc/Makefile         |    1 +
 arch/sparc/kernel/head_64.S |    2 ++
 arch/sparc/lib/Makefile     |    3 +++
 arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
 arch/sparc/lib/NG4patch.S   |    9 +++++++++
 5 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 8496a07..0763cd8 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
 KBUILD_CFLAGS += -Wa,--undeclared-regs
 KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
 KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
+KBUILD_AFLAGS += -Wa,-Asparc4
 
 ifeq ($(CONFIG_MCOUNT),y)
   KBUILD_CFLAGS += -pg
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 78e0211..1165254 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -628,6 +628,8 @@ niagara4_patch:
 	 nop
 	call	niagara4_patch_pageops
 	 nop
+	call	niagara4_patch_fls
+	 nop
 
 	ba,a,pt	%xcc, 80f
 	 nop
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index eefbb9c..72d2d8c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
+
+obj-$(CONFIG_SPARC64) += fls.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 0000000..eb239aa
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,30 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+	.text
+	.align 32
+
+	.globl NG4fls
+	.globl __NG4fls
+	.type  NG4fls, #function
+	.type  __NG4fls, #function
+
+NG4fls:
+	lzcnt   %o0, %o1
+	mov     64, %o2
+	retl
+	 sub     %o2, %o1, %o0
+	.size   NG4fls, .-NG4fls
+
+__NG4fls:
+	brz,pn  %o0, 1f
+        mov	%o0, %o1
+	lzcnt	%o1, %o0
+	mov     63, %o2
+	sub     %o2, %o0, %o0
+1:
+	retl
+	 nop
+	.size   __NG4fls, .-__NG4fls
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..1010d53 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -52,3 +52,12 @@ niagara4_patch_pageops:
 	retl
 	 nop
 	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
+
+	.globl	niagara4_patch_fls
+	.type	niagara4_patch_fls,#function
+niagara4_patch_fls:
+	NG_DO_PATCH(fls, NG4fls)
+	NG_DO_PATCH(__fls, __NG4fls)
+	retl
+	 nop
+	.size	niagara4_patch_fls,.-niagara4_patch_fls
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
@ 2017-09-27 19:25   ` Vijay Kumar
  0 siblings, 0 replies; 20+ messages in thread
From: Vijay Kumar @ 2017-09-27 19:25 UTC (permalink / raw)
  To: davem; +Cc: linux-kernel, sparclinux, babu.moger, rob.gardner

For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
---
 arch/sparc/Makefile         |    1 +
 arch/sparc/kernel/head_64.S |    2 ++
 arch/sparc/lib/Makefile     |    3 +++
 arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
 arch/sparc/lib/NG4patch.S   |    9 +++++++++
 5 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 8496a07..0763cd8 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
 KBUILD_CFLAGS += -Wa,--undeclared-regs
 KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
 KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
+KBUILD_AFLAGS += -Wa,-Asparc4
 
 ifeq ($(CONFIG_MCOUNT),y)
   KBUILD_CFLAGS += -pg
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 78e0211..1165254 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -628,6 +628,8 @@ niagara4_patch:
 	 nop
 	call	niagara4_patch_pageops
 	 nop
+	call	niagara4_patch_fls
+	 nop
 
 	ba,a,pt	%xcc, 80f
 	 nop
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index eefbb9c..72d2d8c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
+
+obj-$(CONFIG_SPARC64) += fls.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 0000000..eb239aa
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,30 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+	.text
+	.align 32
+
+	.globl NG4fls
+	.globl __NG4fls
+	.type  NG4fls, #function
+	.type  __NG4fls, #function
+
+NG4fls:
+	lzcnt   %o0, %o1
+	mov     64, %o2
+	retl
+	 sub     %o2, %o1, %o0
+	.size   NG4fls, .-NG4fls
+
+__NG4fls:
+	brz,pn  %o0, 1f
+        mov	%o0, %o1
+	lzcnt	%o1, %o0
+	mov     63, %o2
+	sub     %o2, %o0, %o0
+1:
+	retl
+	 nop
+	.size   __NG4fls, .-__NG4fls
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..1010d53 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -52,3 +52,12 @@ niagara4_patch_pageops:
 	retl
 	 nop
 	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
+
+	.globl	niagara4_patch_fls
+	.type	niagara4_patch_fls,#function
+niagara4_patch_fls:
+	NG_DO_PATCH(fls, NG4fls)
+	NG_DO_PATCH(__fls, __NG4fls)
+	retl
+	 nop
+	.size	niagara4_patch_fls,.-niagara4_patch_fls
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
  2017-09-27 19:25   ` Vijay Kumar
@ 2017-09-27 19:50     ` Sam Ravnborg
  -1 siblings, 0 replies; 20+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:50 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Vijay.

Some feedback - see below.
The comment about ENTRY() ENDPROC() is also valid for patch 2/2

	Sam

> 
> diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
> index 2d52240..946c236 100644
> --- a/arch/sparc/include/asm/bitops_64.h
> +++ b/arch/sparc/include/asm/bitops_64.h
> @@ -22,11 +22,12 @@
>  void clear_bit(unsigned long nr, volatile unsigned long *addr);
>  void change_bit(unsigned long nr, volatile unsigned long *addr);
>  
> +#define fls64(word)  (((word)?(__fls(word) + 1):0))
This macro could result in unwanted sideeffects.
If I use:

	fls64(i++)

for some obscure reason, then i will be incremented twice if i != 0.
Using the asm-generic version would be better.

> +int fls(unsigned int word);
> +int __fls(unsigned long word);
> +
>  #include <asm-generic/bitops/non-atomic.h>
>  
> -#include <asm-generic/bitops/fls.h>
> -#include <asm-generic/bitops/__fls.h>
> -#include <asm-generic/bitops/fls64.h>
>  
>  #ifdef __KERNEL__
>  
> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
> index 07c03e7..eefbb9c 100644
> --- a/arch/sparc/lib/Makefile
> +++ b/arch/sparc/lib/Makefile
> @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
>  lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
>  lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
>  lib-$(CONFIG_SPARC64) += multi3.o
> +lib-$(CONFIG_SPARC64) += fls.o
>  
>  lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
>  lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
> diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
> new file mode 100644
> index 0000000..a19bff2
> --- /dev/null
> +++ b/arch/sparc/lib/fls.S
> @@ -0,0 +1,126 @@
> +/* fls.S: SPARC default fls and __fls definitions.
> + *
> + * SPARC default fls and __fls definitions, which follows the same
> + * algorithm as in generic fls() and __fls(). These functions will
> + * be boot time patched on T4 and onward.
> + */
> +
> +#include <asm/bitsperlong.h>
> +#include <asm/export.h>
> +
> +	.text
> +	.align	32
> +
> +	.global	fls, __fls
> +	.type	fls,	#function
> +	.type	__fls,	#function
> +
> +	.register	%g2, #scratch
> +	.register	%g3, #scratch
> +
> +EXPORT_SYMBOL(__fls)
> +EXPORT_SYMBOL(fls)
> +
> +fls:
Use ENTRY(), ENDPROC() for assembler functions.
> +	brz,pn	%o0, 6f
> +	 mov	0, %o1
> +	sethi	%hi(0xffff0000), %g3
> +	mov	%o0, %g2
> +	andcc	%o0, %g3, %g0
> +	be,pt	%icc, 8f
> +	 mov	32, %o1
> +	sethi	%hi(0xff000000), %g3
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 3f
> +	 sethi	%hi(0xf0000000), %g3
> +	sll	%o0, 8, %o0
> +1:
> +	add	%o1, -8, %o1
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +2:
> +	sethi	%hi(0xf0000000), %g3
> +3:
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 4f
> +	 sethi	%hi(0xc0000000), %g3
> +	sll	%o0, 4, %o0
> +	add	%o1, -4, %o1
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +4:
> +	andcc	%g2, %g3, %g0
> +	be,a,pt	%icc, 7f
> +	 sll	%o0, 2, %o0
> +5:
> +	xnor	%g0, %o0, %o0
> +	srl	%o0, 31, %o0
> +	sub	%o1, %o0, %o1
> +6:
> +	jmp	%o7 + 8
> +	 sra	%o1, 0, %o0
> +7:
> +	add	%o1, -2, %o1
> +	ba,pt	%xcc, 5b
> +	 sra	%o0, 0, %o0
> +8:
> +	sll	%o0, 16, %o0
> +	sethi	%hi(0xff000000), %g3
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 2b
> +	 mov	16, %o1
> +	ba,pt	%xcc, 1b
> +	 sll	%o0, 8, %o0
> +	.size	fls, .-fls
> +
> +__fls:
Same here, use ENTRY(), ENDPROC()
> +#if BITS_PER_LONG == 64
> +	mov	-1, %g2
> +	sllx	%g2, 32, %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 1f
> +	 mov	63, %g1
> +	sllx	%o0, 32, %o0
> +#endif

Testign for BITS_PER_LONG seems not necessary as long as this is sparc64 only.
And sparc32 has no optimized bit operations not even LEON
so this would not make sense in sparc32 land anyway.

> +	mov	31, %g1
> +1:
> +	mov	-1, %g2
> +	sllx	%g2, (BITS_PER_LONG-16), %g2
spaces around operators please. It is no excuse that the source did not have so.

> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 2f
> +	 mov	-1, %g2
> +	sllx	%o0, 16, %o0
> +	add	%g1, -16, %g1
> +2:
> +	mov	-1, %g2
> +	sllx	%g2, (BITS_PER_LONG-8), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 3f
> +	 mov	-1, %g2
> +	sllx	%o0, 8, %o0
> +	add	%g1, -8, %g1
> +3:
> +	sllx	%g2, (BITS_PER_LONG-4), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 4f
> +	 mov	-1, %g2
> +	sllx	%o0, 4, %o0
> +	add	%g1, -4, %g1
> +4:
> +	sllx	%g2, (BITS_PER_LONG-2), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 5f
> +	 mov	-1, %g3
> +	sllx	%o0, 2, %o0
> +	add	%g1, -2, %g1
> +5:
> +	mov	0, %g2
> +	sllx	%g3, (BITS_PER_LONG-1), %g3
> +	and	%o0, %g3, %o0
> +	movre	%o0, 1, %g2
> +	sub	%g1, %g2, %g1
> +	jmp	%o7+8
> +	 sra	%g1, 0, %o0
> +	.size	__fls, .-__fls

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
@ 2017-09-27 19:50     ` Sam Ravnborg
  0 siblings, 0 replies; 20+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:50 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Vijay.

Some feedback - see below.
The comment about ENTRY() ENDPROC() is also valid for patch 2/2

	Sam

> 
> diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
> index 2d52240..946c236 100644
> --- a/arch/sparc/include/asm/bitops_64.h
> +++ b/arch/sparc/include/asm/bitops_64.h
> @@ -22,11 +22,12 @@
>  void clear_bit(unsigned long nr, volatile unsigned long *addr);
>  void change_bit(unsigned long nr, volatile unsigned long *addr);
>  
> +#define fls64(word)  (((word)?(__fls(word) + 1):0))
This macro could result in unwanted sideeffects.
If I use:

	fls64(i++)

for some obscure reason, then i will be incremented twice if i != 0.
Using the asm-generic version would be better.

> +int fls(unsigned int word);
> +int __fls(unsigned long word);
> +
>  #include <asm-generic/bitops/non-atomic.h>
>  
> -#include <asm-generic/bitops/fls.h>
> -#include <asm-generic/bitops/__fls.h>
> -#include <asm-generic/bitops/fls64.h>
>  
>  #ifdef __KERNEL__
>  
> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
> index 07c03e7..eefbb9c 100644
> --- a/arch/sparc/lib/Makefile
> +++ b/arch/sparc/lib/Makefile
> @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
>  lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
>  lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
>  lib-$(CONFIG_SPARC64) += multi3.o
> +lib-$(CONFIG_SPARC64) += fls.o
>  
>  lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
>  lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
> diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
> new file mode 100644
> index 0000000..a19bff2
> --- /dev/null
> +++ b/arch/sparc/lib/fls.S
> @@ -0,0 +1,126 @@
> +/* fls.S: SPARC default fls and __fls definitions.
> + *
> + * SPARC default fls and __fls definitions, which follows the same
> + * algorithm as in generic fls() and __fls(). These functions will
> + * be boot time patched on T4 and onward.
> + */
> +
> +#include <asm/bitsperlong.h>
> +#include <asm/export.h>
> +
> +	.text
> +	.align	32
> +
> +	.global	fls, __fls
> +	.type	fls,	#function
> +	.type	__fls,	#function
> +
> +	.register	%g2, #scratch
> +	.register	%g3, #scratch
> +
> +EXPORT_SYMBOL(__fls)
> +EXPORT_SYMBOL(fls)
> +
> +fls:
Use ENTRY(), ENDPROC() for assembler functions.
> +	brz,pn	%o0, 6f
> +	 mov	0, %o1
> +	sethi	%hi(0xffff0000), %g3
> +	mov	%o0, %g2
> +	andcc	%o0, %g3, %g0
> +	be,pt	%icc, 8f
> +	 mov	32, %o1
> +	sethi	%hi(0xff000000), %g3
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 3f
> +	 sethi	%hi(0xf0000000), %g3
> +	sll	%o0, 8, %o0
> +1:
> +	add	%o1, -8, %o1
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +2:
> +	sethi	%hi(0xf0000000), %g3
> +3:
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 4f
> +	 sethi	%hi(0xc0000000), %g3
> +	sll	%o0, 4, %o0
> +	add	%o1, -4, %o1
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +4:
> +	andcc	%g2, %g3, %g0
> +	be,a,pt	%icc, 7f
> +	 sll	%o0, 2, %o0
> +5:
> +	xnor	%g0, %o0, %o0
> +	srl	%o0, 31, %o0
> +	sub	%o1, %o0, %o1
> +6:
> +	jmp	%o7 + 8
> +	 sra	%o1, 0, %o0
> +7:
> +	add	%o1, -2, %o1
> +	ba,pt	%xcc, 5b
> +	 sra	%o0, 0, %o0
> +8:
> +	sll	%o0, 16, %o0
> +	sethi	%hi(0xff000000), %g3
> +	sra	%o0, 0, %o0
> +	mov	%o0, %g2
> +	andcc	%g2, %g3, %g0
> +	bne,pt	%icc, 2b
> +	 mov	16, %o1
> +	ba,pt	%xcc, 1b
> +	 sll	%o0, 8, %o0
> +	.size	fls, .-fls
> +
> +__fls:
Same here, use ENTRY(), ENDPROC()
> +#if BITS_PER_LONG = 64
> +	mov	-1, %g2
> +	sllx	%g2, 32, %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 1f
> +	 mov	63, %g1
> +	sllx	%o0, 32, %o0
> +#endif

Testign for BITS_PER_LONG seems not necessary as long as this is sparc64 only.
And sparc32 has no optimized bit operations not even LEON
so this would not make sense in sparc32 land anyway.

> +	mov	31, %g1
> +1:
> +	mov	-1, %g2
> +	sllx	%g2, (BITS_PER_LONG-16), %g2
spaces around operators please. It is no excuse that the source did not have so.

> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 2f
> +	 mov	-1, %g2
> +	sllx	%o0, 16, %o0
> +	add	%g1, -16, %g1
> +2:
> +	mov	-1, %g2
> +	sllx	%g2, (BITS_PER_LONG-8), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 3f
> +	 mov	-1, %g2
> +	sllx	%o0, 8, %o0
> +	add	%g1, -8, %g1
> +3:
> +	sllx	%g2, (BITS_PER_LONG-4), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 4f
> +	 mov	-1, %g2
> +	sllx	%o0, 4, %o0
> +	add	%g1, -4, %g1
> +4:
> +	sllx	%g2, (BITS_PER_LONG-2), %g2
> +	and	%o0, %g2, %g2
> +	brnz,pt	%g2, 5f
> +	 mov	-1, %g3
> +	sllx	%o0, 2, %o0
> +	add	%g1, -2, %g1
> +5:
> +	mov	0, %g2
> +	sllx	%g3, (BITS_PER_LONG-1), %g3
> +	and	%o0, %g3, %o0
> +	movre	%o0, 1, %g2
> +	sub	%g1, %g2, %g1
> +	jmp	%o7+8
> +	 sra	%g1, 0, %o0
> +	.size	__fls, .-__fls

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:25   ` Vijay Kumar
@ 2017-09-27 19:56     ` Sam Ravnborg
  -1 siblings, 0 replies; 20+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:56 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Vijay.

On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
> For T4 and above, patch fls and __fls functions
> at the boot time to use lzcnt instruction.
> 
> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
> Reviewed-by: Babu Moger <babu.moger@oracle.com>
> ---
>  arch/sparc/Makefile         |    1 +
>  arch/sparc/kernel/head_64.S |    2 ++
>  arch/sparc/lib/Makefile     |    3 +++
>  arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>  arch/sparc/lib/NG4patch.S   |    9 +++++++++
>  5 files changed, 45 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
> index 8496a07..0763cd8 100644
> --- a/arch/sparc/Makefile
> +++ b/arch/sparc/Makefile
> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>  KBUILD_CFLAGS += -Wa,--undeclared-regs
>  KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>  KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
> +KBUILD_AFLAGS += -Wa,-Asparc4
This change is not justified anywhere??

>  
>  ifeq ($(CONFIG_MCOUNT),y)
>    KBUILD_CFLAGS += -pg
> diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
> index 78e0211..1165254 100644
> --- a/arch/sparc/kernel/head_64.S
> +++ b/arch/sparc/kernel/head_64.S
> @@ -628,6 +628,8 @@ niagara4_patch:
>  	 nop
>  	call	niagara4_patch_pageops
>  	 nop
> +	call	niagara4_patch_fls
> +	 nop
>  
>  	ba,a,pt	%xcc, 80f
>  	 nop
> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
> index eefbb9c..72d2d8c 100644
> --- a/arch/sparc/lib/Makefile
> +++ b/arch/sparc/lib/Makefile
> @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
>  obj-$(CONFIG_SPARC64) += iomap.o
>  obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
>  obj-$(CONFIG_SPARC64) += PeeCeeI.o
> +
> +obj-$(CONFIG_SPARC64) += fls.o
> +obj-$(CONFIG_SPARC64) += NG4fls.o
> diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
> new file mode 100644
> index 0000000..eb239aa
> --- /dev/null
> +++ b/arch/sparc/lib/NG4fls.S
> @@ -0,0 +1,30 @@
> +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
> + *
> + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +	.text
> +	.align 32
> +
> +	.globl NG4fls
> +	.globl __NG4fls
> +	.type  NG4fls, #function
> +	.type  __NG4fls, #function

Use ENTRY(), ENDPROC() as already mentioned.
> +NG4fls:
> +	lzcnt   %o0, %o1
> +	mov     64, %o2
> +	retl
> +	 sub     %o2, %o1, %o0
> +	.size   NG4fls, .-NG4fls
> +
> +__NG4fls:
> +	brz,pn  %o0, 1f
> +        mov	%o0, %o1
Use tabs for indent, not spaces. In this case one tab + one space.

> +	lzcnt	%o1, %o0
> +	mov     63, %o2
> +	sub     %o2, %o0, %o0
> +1:
> +	retl
> +	 nop
> +	.size   __NG4fls, .-__NG4fls
> diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
> index 3cc0f8c..1010d53 100644
> --- a/arch/sparc/lib/NG4patch.S
> +++ b/arch/sparc/lib/NG4patch.S
> @@ -52,3 +52,12 @@ niagara4_patch_pageops:
>  	retl
>  	 nop
>  	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
> +
> +	.globl	niagara4_patch_fls
> +	.type	niagara4_patch_fls,#function
> +niagara4_patch_fls:
> +	NG_DO_PATCH(fls, NG4fls)
> +	NG_DO_PATCH(__fls, __NG4fls)
> +	retl
> +	 nop
> +	.size	niagara4_patch_fls,.-niagara4_patch_fls

Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.

	Sam

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
@ 2017-09-27 19:56     ` Sam Ravnborg
  0 siblings, 0 replies; 20+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:56 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Vijay.

On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
> For T4 and above, patch fls and __fls functions
> at the boot time to use lzcnt instruction.
> 
> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
> Reviewed-by: Babu Moger <babu.moger@oracle.com>
> ---
>  arch/sparc/Makefile         |    1 +
>  arch/sparc/kernel/head_64.S |    2 ++
>  arch/sparc/lib/Makefile     |    3 +++
>  arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>  arch/sparc/lib/NG4patch.S   |    9 +++++++++
>  5 files changed, 45 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
> index 8496a07..0763cd8 100644
> --- a/arch/sparc/Makefile
> +++ b/arch/sparc/Makefile
> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>  KBUILD_CFLAGS += -Wa,--undeclared-regs
>  KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>  KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
> +KBUILD_AFLAGS += -Wa,-Asparc4
This change is not justified anywhere??

>  
>  ifeq ($(CONFIG_MCOUNT),y)
>    KBUILD_CFLAGS += -pg
> diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
> index 78e0211..1165254 100644
> --- a/arch/sparc/kernel/head_64.S
> +++ b/arch/sparc/kernel/head_64.S
> @@ -628,6 +628,8 @@ niagara4_patch:
>  	 nop
>  	call	niagara4_patch_pageops
>  	 nop
> +	call	niagara4_patch_fls
> +	 nop
>  
>  	ba,a,pt	%xcc, 80f
>  	 nop
> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
> index eefbb9c..72d2d8c 100644
> --- a/arch/sparc/lib/Makefile
> +++ b/arch/sparc/lib/Makefile
> @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
>  obj-$(CONFIG_SPARC64) += iomap.o
>  obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
>  obj-$(CONFIG_SPARC64) += PeeCeeI.o
> +
> +obj-$(CONFIG_SPARC64) += fls.o
> +obj-$(CONFIG_SPARC64) += NG4fls.o
> diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
> new file mode 100644
> index 0000000..eb239aa
> --- /dev/null
> +++ b/arch/sparc/lib/NG4fls.S
> @@ -0,0 +1,30 @@
> +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
> + *
> + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
> + */
> +
> +	.text
> +	.align 32
> +
> +	.globl NG4fls
> +	.globl __NG4fls
> +	.type  NG4fls, #function
> +	.type  __NG4fls, #function

Use ENTRY(), ENDPROC() as already mentioned.
> +NG4fls:
> +	lzcnt   %o0, %o1
> +	mov     64, %o2
> +	retl
> +	 sub     %o2, %o1, %o0
> +	.size   NG4fls, .-NG4fls
> +
> +__NG4fls:
> +	brz,pn  %o0, 1f
> +        mov	%o0, %o1
Use tabs for indent, not spaces. In this case one tab + one space.

> +	lzcnt	%o1, %o0
> +	mov     63, %o2
> +	sub     %o2, %o0, %o0
> +1:
> +	retl
> +	 nop
> +	.size   __NG4fls, .-__NG4fls
> diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
> index 3cc0f8c..1010d53 100644
> --- a/arch/sparc/lib/NG4patch.S
> +++ b/arch/sparc/lib/NG4patch.S
> @@ -52,3 +52,12 @@ niagara4_patch_pageops:
>  	retl
>  	 nop
>  	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
> +
> +	.globl	niagara4_patch_fls
> +	.type	niagara4_patch_fls,#function
> +niagara4_patch_fls:
> +	NG_DO_PATCH(fls, NG4fls)
> +	NG_DO_PATCH(__fls, __NG4fls)
> +	retl
> +	 nop
> +	.size	niagara4_patch_fls,.-niagara4_patch_fls

Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.

	Sam

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
  2017-09-27 19:50     ` Sam Ravnborg
@ 2017-09-27 19:59       ` Sam Ravnborg
  -1 siblings, 0 replies; 20+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:59 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

> > +
> > +EXPORT_SYMBOL(__fls)
> > +EXPORT_SYMBOL(fls)
> > +
> > +fls:
> Use ENTRY(), ENDPROC() for assembler functions.
And locate EXPORT_SYMBOL() right after ENDPROC().

	Sam

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
@ 2017-09-27 19:59       ` Sam Ravnborg
  0 siblings, 0 replies; 20+ messages in thread
From: Sam Ravnborg @ 2017-09-27 19:59 UTC (permalink / raw)
  To: Vijay Kumar; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

> > +
> > +EXPORT_SYMBOL(__fls)
> > +EXPORT_SYMBOL(fls)
> > +
> > +fls:
> Use ENTRY(), ENDPROC() for assembler functions.
And locate EXPORT_SYMBOL() right after ENDPROC().

	Sam

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:56     ` Sam Ravnborg
@ 2017-09-27 20:29       ` Vijay Kumar
  -1 siblings, 0 replies; 20+ messages in thread
From: Vijay Kumar @ 2017-09-27 20:29 UTC (permalink / raw)
  To: Sam Ravnborg; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Sam,

On 9/27/2017 2:56 PM, Sam Ravnborg wrote:
>>   	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
>> +
>> +	.globl	niagara4_patch_fls
>> +	.type	niagara4_patch_fls,#function
>> +niagara4_patch_fls:
>> +	NG_DO_PATCH(fls, NG4fls)
>> +	NG_DO_PATCH(__fls, __NG4fls)
>> +	retl
>> +	 nop
>> +	.size	niagara4_patch_fls,.-niagara4_patch_fls
> Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.

Thanks for your comment. Sure, I will make the changes and address other 
comments as well in my revised version.

- Vijay

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
@ 2017-09-27 20:29       ` Vijay Kumar
  0 siblings, 0 replies; 20+ messages in thread
From: Vijay Kumar @ 2017-09-27 20:29 UTC (permalink / raw)
  To: Sam Ravnborg; +Cc: davem, linux-kernel, sparclinux, babu.moger, rob.gardner

Hi Sam,

On 9/27/2017 2:56 PM, Sam Ravnborg wrote:
>>   	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
>> +
>> +	.globl	niagara4_patch_fls
>> +	.type	niagara4_patch_fls,#function
>> +niagara4_patch_fls:
>> +	NG_DO_PATCH(fls, NG4fls)
>> +	NG_DO_PATCH(__fls, __NG4fls)
>> +	retl
>> +	 nop
>> +	.size	niagara4_patch_fls,.-niagara4_patch_fls
> Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.

Thanks for your comment. Sure, I will make the changes and address other 
comments as well in my revised version.

- Vijay

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:56     ` Sam Ravnborg
@ 2017-09-27 21:02       ` David Miller
  -1 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2017-09-27 21:02 UTC (permalink / raw)
  To: sam; +Cc: vijay.ac.kumar, linux-kernel, sparclinux, babu.moger, rob.gardner

From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 27 Sep 2017 21:56:37 +0200

> Hi Vijay.
> 
> On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
>> For T4 and above, patch fls and __fls functions
>> at the boot time to use lzcnt instruction.
>> 
>> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
>> Reviewed-by: Babu Moger <babu.moger@oracle.com>
>> ---
>>  arch/sparc/Makefile         |    1 +
>>  arch/sparc/kernel/head_64.S |    2 ++
>>  arch/sparc/lib/Makefile     |    3 +++
>>  arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>>  arch/sparc/lib/NG4patch.S   |    9 +++++++++
>>  5 files changed, 45 insertions(+), 0 deletions(-)
>> 
>> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
>> index 8496a07..0763cd8 100644
>> --- a/arch/sparc/Makefile
>> +++ b/arch/sparc/Makefile
>> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>>  KBUILD_CFLAGS += -Wa,--undeclared-regs
>>  KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>>  KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
>> +KBUILD_AFLAGS += -Wa,-Asparc4
> This change is not justified anywhere??

It also will likely break with older tools.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
@ 2017-09-27 21:02       ` David Miller
  0 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2017-09-27 21:02 UTC (permalink / raw)
  To: sam; +Cc: vijay.ac.kumar, linux-kernel, sparclinux, babu.moger, rob.gardner

From: Sam Ravnborg <sam@ravnborg.org>
Date: Wed, 27 Sep 2017 21:56:37 +0200

> Hi Vijay.
> 
> On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
>> For T4 and above, patch fls and __fls functions
>> at the boot time to use lzcnt instruction.
>> 
>> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
>> Reviewed-by: Babu Moger <babu.moger@oracle.com>
>> ---
>>  arch/sparc/Makefile         |    1 +
>>  arch/sparc/kernel/head_64.S |    2 ++
>>  arch/sparc/lib/Makefile     |    3 +++
>>  arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>>  arch/sparc/lib/NG4patch.S   |    9 +++++++++
>>  5 files changed, 45 insertions(+), 0 deletions(-)
>> 
>> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
>> index 8496a07..0763cd8 100644
>> --- a/arch/sparc/Makefile
>> +++ b/arch/sparc/Makefile
>> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>>  KBUILD_CFLAGS += -Wa,--undeclared-regs
>>  KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>>  KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
>> +KBUILD_AFLAGS += -Wa,-Asparc4
> This change is not justified anywhere??

It also will likely break with older tools.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 19:56     ` Sam Ravnborg
@ 2017-09-27 21:45       ` Anthony Yznaga
  -1 siblings, 0 replies; 20+ messages in thread
From: Anthony Yznaga @ 2017-09-27 21:45 UTC (permalink / raw)
  To: Sam Ravnborg
  Cc: Vijay Kumar, davem, linux-kernel, sparclinux, babu.moger, rob.gardner


> On Sep 27, 2017, at 12:56 PM, Sam Ravnborg <sam@ravnborg.org> wrote:
> 
> Hi Vijay.
> 
> On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
>> For T4 and above, patch fls and __fls functions
>> at the boot time to use lzcnt instruction.
>> 
>> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
>> Reviewed-by: Babu Moger <babu.moger@oracle.com>
>> ---
>> arch/sparc/Makefile         |    1 +
>> arch/sparc/kernel/head_64.S |    2 ++
>> arch/sparc/lib/Makefile     |    3 +++
>> arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>> arch/sparc/lib/NG4patch.S   |    9 +++++++++
>> 5 files changed, 45 insertions(+), 0 deletions(-)
>> 
>> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
>> index 8496a07..0763cd8 100644
>> --- a/arch/sparc/Makefile
>> +++ b/arch/sparc/Makefile
>> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>> KBUILD_CFLAGS += -Wa,--undeclared-regs
>> KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>> KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
>> +KBUILD_AFLAGS += -Wa,-Asparc4
> This change is not justified anywhere??

It looks like this is to get the assembler to recognize the lzcnt instruction.

Vijay,
Older assemblers may not support this flag so you’ll need to hardcode the lzcnt instructions using .word directives.

Anthony

> 
>> 
>> ifeq ($(CONFIG_MCOUNT),y)
>>   KBUILD_CFLAGS += -pg
>> diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
>> index 78e0211..1165254 100644
>> --- a/arch/sparc/kernel/head_64.S
>> +++ b/arch/sparc/kernel/head_64.S
>> @@ -628,6 +628,8 @@ niagara4_patch:
>> 	 nop
>> 	call	niagara4_patch_pageops
>> 	 nop
>> +	call	niagara4_patch_fls
>> +	 nop
>> 
>> 	ba,a,pt	%xcc, 80f
>> 	 nop
>> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
>> index eefbb9c..72d2d8c 100644
>> --- a/arch/sparc/lib/Makefile
>> +++ b/arch/sparc/lib/Makefile
>> @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
>> obj-$(CONFIG_SPARC64) += iomap.o
>> obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
>> obj-$(CONFIG_SPARC64) += PeeCeeI.o
>> +
>> +obj-$(CONFIG_SPARC64) += fls.o
>> +obj-$(CONFIG_SPARC64) += NG4fls.o
>> diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
>> new file mode 100644
>> index 0000000..eb239aa
>> --- /dev/null
>> +++ b/arch/sparc/lib/NG4fls.S
>> @@ -0,0 +1,30 @@
>> +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
>> + *
>> + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
>> + */
>> +
>> +	.text
>> +	.align 32
>> +
>> +	.globl NG4fls
>> +	.globl __NG4fls
>> +	.type  NG4fls, #function
>> +	.type  __NG4fls, #function
> 
> Use ENTRY(), ENDPROC() as already mentioned.
>> +NG4fls:
>> +	lzcnt   %o0, %o1
>> +	mov     64, %o2
>> +	retl
>> +	 sub     %o2, %o1, %o0
>> +	.size   NG4fls, .-NG4fls
>> +
>> +__NG4fls:
>> +	brz,pn  %o0, 1f
>> +        mov	%o0, %o1
> Use tabs for indent, not spaces. In this case one tab + one space.
> 
>> +	lzcnt	%o1, %o0
>> +	mov     63, %o2
>> +	sub     %o2, %o0, %o0
>> +1:
>> +	retl
>> +	 nop
>> +	.size   __NG4fls, .-__NG4fls
>> diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
>> index 3cc0f8c..1010d53 100644
>> --- a/arch/sparc/lib/NG4patch.S
>> +++ b/arch/sparc/lib/NG4patch.S
>> @@ -52,3 +52,12 @@ niagara4_patch_pageops:
>> 	retl
>> 	 nop
>> 	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
>> +
>> +	.globl	niagara4_patch_fls
>> +	.type	niagara4_patch_fls,#function
>> +niagara4_patch_fls:
>> +	NG_DO_PATCH(fls, NG4fls)
>> +	NG_DO_PATCH(__fls, __NG4fls)
>> +	retl
>> +	 nop
>> +	.size	niagara4_patch_fls,.-niagara4_patch_fls
> 
> Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.
> 
> 	Sam
> --
> To unsubscribe from this list: send the line "unsubscribe sparclinux" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
@ 2017-09-27 21:45       ` Anthony Yznaga
  0 siblings, 0 replies; 20+ messages in thread
From: Anthony Yznaga @ 2017-09-27 21:45 UTC (permalink / raw)
  To: Sam Ravnborg
  Cc: Vijay Kumar, davem, linux-kernel, sparclinux, babu.moger, rob.gardner


> On Sep 27, 2017, at 12:56 PM, Sam Ravnborg <sam@ravnborg.org> wrote:
> 
> Hi Vijay.
> 
> On Wed, Sep 27, 2017 at 01:25:26PM -0600, Vijay Kumar wrote:
>> For T4 and above, patch fls and __fls functions
>> at the boot time to use lzcnt instruction.
>> 
>> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
>> Reviewed-by: Babu Moger <babu.moger@oracle.com>
>> ---
>> arch/sparc/Makefile         |    1 +
>> arch/sparc/kernel/head_64.S |    2 ++
>> arch/sparc/lib/Makefile     |    3 +++
>> arch/sparc/lib/NG4fls.S     |   30 ++++++++++++++++++++++++++++++
>> arch/sparc/lib/NG4patch.S   |    9 +++++++++
>> 5 files changed, 45 insertions(+), 0 deletions(-)
>> 
>> diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
>> index 8496a07..0763cd8 100644
>> --- a/arch/sparc/Makefile
>> +++ b/arch/sparc/Makefile
>> @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
>> KBUILD_CFLAGS += -Wa,--undeclared-regs
>> KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
>> KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
>> +KBUILD_AFLAGS += -Wa,-Asparc4
> This change is not justified anywhere??

It looks like this is to get the assembler to recognize the lzcnt instruction.

Vijay,
Older assemblers may not support this flag so you’ll need to hardcode the lzcnt instructions using .word directives.

Anthony

> 
>> 
>> ifeq ($(CONFIG_MCOUNT),y)
>>   KBUILD_CFLAGS += -pg
>> diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
>> index 78e0211..1165254 100644
>> --- a/arch/sparc/kernel/head_64.S
>> +++ b/arch/sparc/kernel/head_64.S
>> @@ -628,6 +628,8 @@ niagara4_patch:
>> 	 nop
>> 	call	niagara4_patch_pageops
>> 	 nop
>> +	call	niagara4_patch_fls
>> +	 nop
>> 
>> 	ba,a,pt	%xcc, 80f
>> 	 nop
>> diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
>> index eefbb9c..72d2d8c 100644
>> --- a/arch/sparc/lib/Makefile
>> +++ b/arch/sparc/lib/Makefile
>> @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
>> obj-$(CONFIG_SPARC64) += iomap.o
>> obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
>> obj-$(CONFIG_SPARC64) += PeeCeeI.o
>> +
>> +obj-$(CONFIG_SPARC64) += fls.o
>> +obj-$(CONFIG_SPARC64) += NG4fls.o
>> diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
>> new file mode 100644
>> index 0000000..eb239aa
>> --- /dev/null
>> +++ b/arch/sparc/lib/NG4fls.S
>> @@ -0,0 +1,30 @@
>> +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
>> + *
>> + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
>> + */
>> +
>> +	.text
>> +	.align 32
>> +
>> +	.globl NG4fls
>> +	.globl __NG4fls
>> +	.type  NG4fls, #function
>> +	.type  __NG4fls, #function
> 
> Use ENTRY(), ENDPROC() as already mentioned.
>> +NG4fls:
>> +	lzcnt   %o0, %o1
>> +	mov     64, %o2
>> +	retl
>> +	 sub     %o2, %o1, %o0
>> +	.size   NG4fls, .-NG4fls
>> +
>> +__NG4fls:
>> +	brz,pn  %o0, 1f
>> +        mov	%o0, %o1
> Use tabs for indent, not spaces. In this case one tab + one space.
> 
>> +	lzcnt	%o1, %o0
>> +	mov     63, %o2
>> +	sub     %o2, %o0, %o0
>> +1:
>> +	retl
>> +	 nop
>> +	.size   __NG4fls, .-__NG4fls
>> diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
>> index 3cc0f8c..1010d53 100644
>> --- a/arch/sparc/lib/NG4patch.S
>> +++ b/arch/sparc/lib/NG4patch.S
>> @@ -52,3 +52,12 @@ niagara4_patch_pageops:
>> 	retl
>> 	 nop
>> 	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
>> +
>> +	.globl	niagara4_patch_fls
>> +	.type	niagara4_patch_fls,#function
>> +niagara4_patch_fls:
>> +	NG_DO_PATCH(fls, NG4fls)
>> +	NG_DO_PATCH(__fls, __NG4fls)
>> +	retl
>> +	 nop
>> +	.size	niagara4_patch_fls,.-niagara4_patch_fls
> 
> Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch.
> 
> 	Sam
> --
> To unsubscribe from this list: send the line "unsubscribe sparclinux" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
  2017-09-27 21:45       ` Anthony Yznaga
@ 2017-09-27 22:10         ` David Miller
  -1 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2017-09-27 22:10 UTC (permalink / raw)
  To: anthony.yznaga
  Cc: sam, vijay.ac.kumar, linux-kernel, sparclinux, babu.moger, rob.gardner

From: Anthony Yznaga <anthony.yznaga@oracle.com>
Date: Wed, 27 Sep 2017 14:45:34 -0700

> Vijay,
> Older assemblers may not support this flag so you’ll need to
> hardcode the lzcnt instructions using .word directives.

Right, older binutils do not support the T4 instructions.

This is why we hardcode the opcodes for all of the crypto
instructions used under arch/sparc64/crypto/, for example.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
@ 2017-09-27 22:10         ` David Miller
  0 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2017-09-27 22:10 UTC (permalink / raw)
  To: anthony.yznaga
  Cc: sam, vijay.ac.kumar, linux-kernel, sparclinux, babu.moger, rob.gardner

RnJvbTogQW50aG9ueSBZem5hZ2EgPGFudGhvbnkueXpuYWdhQG9yYWNsZS5jb20+DQpEYXRlOiBX
ZWQsIDI3IFNlcCAyMDE3IDE0OjQ1OjM0IC0wNzAwDQoNCj4gVmlqYXksDQo+IE9sZGVyIGFzc2Vt
YmxlcnMgbWF5IG5vdCBzdXBwb3J0IHRoaXMgZmxhZyBzbyB5b3WibGwgbmVlZCB0bw0KPiBoYXJk
Y29kZSB0aGUgbHpjbnQgaW5zdHJ1Y3Rpb25zIHVzaW5nIC53b3JkIGRpcmVjdGl2ZXMuDQoNClJp
Z2h0LCBvbGRlciBiaW51dGlscyBkbyBub3Qgc3VwcG9ydCB0aGUgVDQgaW5zdHJ1Y3Rpb25zLg0K
DQpUaGlzIGlzIHdoeSB3ZSBoYXJkY29kZSB0aGUgb3Bjb2RlcyBmb3IgYWxsIG9mIHRoZSBjcnlw
dG8NCmluc3RydWN0aW9ucyB1c2VkIHVuZGVyIGFyY2gvc3BhcmM2NC9jcnlwdG8vLCBmb3IgZXhh
bXBsZS4NCg0K

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-09-27 22:10 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-27 19:25 [PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls Vijay Kumar
2017-09-27 19:25 ` Vijay Kumar
2017-09-27 19:25 ` [PATCH v2 1/2] sparc64: Define SPARC default fls " Vijay Kumar
2017-09-27 19:25   ` Vijay Kumar
2017-09-27 19:50   ` Sam Ravnborg
2017-09-27 19:50     ` Sam Ravnborg
2017-09-27 19:59     ` Sam Ravnborg
2017-09-27 19:59       ` Sam Ravnborg
2017-09-27 19:25 ` [PATCH v2 2/2] sparc64: Use lzcnt instruction for " Vijay Kumar
2017-09-27 19:25   ` Vijay Kumar
2017-09-27 19:56   ` Sam Ravnborg
2017-09-27 19:56     ` Sam Ravnborg
2017-09-27 20:29     ` Vijay Kumar
2017-09-27 20:29       ` Vijay Kumar
2017-09-27 21:02     ` David Miller
2017-09-27 21:02       ` David Miller
2017-09-27 21:45     ` Anthony Yznaga
2017-09-27 21:45       ` Anthony Yznaga
2017-09-27 22:10       ` David Miller
2017-09-27 22:10         ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.