* [PATCH v4 0/5] sparc64: Optimize fls and __fls
@ 2017-10-11 18:50 ` Vijay Kumar
0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.
v3->v4:
- Fixed a typo.
v2->v3:
- Using ENTRY(), ENDPROC() for assembler functions.
- Removed BITS_PER_LONG from __fls.
- Using generic fls64().
- Replaced lzcnt instruction with .word directive.
v1->v2:
- Fixed delay slot issue.
Vijay Kumar (5):
sparc64: Define SPARC default fls function
sparc64: Define SPARC default __fls function
sparc64: SPARC optimized fls function
sparc64: SPARC optimized __fls function
sparc64: Use sparc optimized fls and __fls for T4 and above
arch/sparc/include/asm/bitops_64.h | 5 ++-
arch/sparc/kernel/head_64.S | 2 +
arch/sparc/lib/Makefile | 3 ++
arch/sparc/lib/NG4fls.S | 30 ++++++++++++++++
arch/sparc/lib/NG4patch.S | 9 +++++
arch/sparc/lib/fls.S | 67 ++++++++++++++++++++++++++++++++++++
arch/sparc/lib/fls64.S | 61 ++++++++++++++++++++++++++++++++
7 files changed, 175 insertions(+), 2 deletions(-)
create mode 100644 arch/sparc/lib/NG4fls.S
create mode 100644 arch/sparc/lib/fls.S
create mode 100644 arch/sparc/lib/fls64.S
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v4 0/5] sparc64: Optimize fls and __fls
@ 2017-10-11 18:50 ` Vijay Kumar
0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.
v3->v4:
- Fixed a typo.
v2->v3:
- Using ENTRY(), ENDPROC() for assembler functions.
- Removed BITS_PER_LONG from __fls.
- Using generic fls64().
- Replaced lzcnt instruction with .word directive.
v1->v2:
- Fixed delay slot issue.
Vijay Kumar (5):
sparc64: Define SPARC default fls function
sparc64: Define SPARC default __fls function
sparc64: SPARC optimized fls function
sparc64: SPARC optimized __fls function
sparc64: Use sparc optimized fls and __fls for T4 and above
arch/sparc/include/asm/bitops_64.h | 5 ++-
arch/sparc/kernel/head_64.S | 2 +
arch/sparc/lib/Makefile | 3 ++
arch/sparc/lib/NG4fls.S | 30 ++++++++++++++++
arch/sparc/lib/NG4patch.S | 9 +++++
arch/sparc/lib/fls.S | 67 ++++++++++++++++++++++++++++++++++++
arch/sparc/lib/fls64.S | 61 ++++++++++++++++++++++++++++++++
7 files changed, 175 insertions(+), 2 deletions(-)
create mode 100644 arch/sparc/lib/NG4fls.S
create mode 100644 arch/sparc/lib/fls.S
create mode 100644 arch/sparc/lib/fls64.S
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v4 1/5] sparc64: Define SPARC default fls function
2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50 ` Vijay Kumar
-1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/include/asm/bitops_64.h | 3 +-
arch/sparc/lib/Makefile | 1 +
arch/sparc/lib/fls.S | 67 ++++++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 1 deletions(-)
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 2d52240..30aea56 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,9 +22,10 @@
void clear_bit(unsigned long nr, volatile unsigned long *addr);
void change_bit(unsigned long nr, volatile unsigned long *addr);
+int fls(unsigned int word);
+
#include <asm-generic/bitops/non-atomic.h>
-#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39..3b9f5e0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 0000000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+ENTRY(fls)
+ brz,pn %o0, 6f
+ mov 0, %o1
+ sethi %hi(0xffff0000), %g3
+ mov %o0, %g2
+ andcc %o0, %g3, %g0
+ be,pt %icc, 8f
+ mov 32, %o1
+ sethi %hi(0xff000000), %g3
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 3f
+ sethi %hi(0xf0000000), %g3
+ sll %o0, 8, %o0
+1:
+ add %o1, -8, %o1
+ sra %o0, 0, %o0
+ mov %o0, %g2
+2:
+ sethi %hi(0xf0000000), %g3
+3:
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 4f
+ sethi %hi(0xc0000000), %g3
+ sll %o0, 4, %o0
+ add %o1, -4, %o1
+ sra %o0, 0, %o0
+ mov %o0, %g2
+4:
+ andcc %g2, %g3, %g0
+ be,a,pt %icc, 7f
+ sll %o0, 2, %o0
+5:
+ xnor %g0, %o0, %o0
+ srl %o0, 31, %o0
+ sub %o1, %o0, %o1
+6:
+ jmp %o7 + 8
+ sra %o1, 0, %o0
+7:
+ add %o1, -2, %o1
+ ba,pt %xcc, 5b
+ sra %o0, 0, %o0
+8:
+ sll %o0, 16, %o0
+ sethi %hi(0xff000000), %g3
+ sra %o0, 0, %o0
+ mov %o0, %g2
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 2b
+ mov 16, %o1
+ ba,pt %xcc, 1b
+ sll %o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 1/5] sparc64: Define SPARC default fls function
@ 2017-10-11 18:50 ` Vijay Kumar
0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/include/asm/bitops_64.h | 3 +-
arch/sparc/lib/Makefile | 1 +
arch/sparc/lib/fls.S | 67 ++++++++++++++++++++++++++++++++++++
3 files changed, 70 insertions(+), 1 deletions(-)
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 2d52240..30aea56 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,9 +22,10 @@
void clear_bit(unsigned long nr, volatile unsigned long *addr);
void change_bit(unsigned long nr, volatile unsigned long *addr);
+int fls(unsigned int word);
+
#include <asm-generic/bitops/non-atomic.h>
-#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39..3b9f5e0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 0000000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+ENTRY(fls)
+ brz,pn %o0, 6f
+ mov 0, %o1
+ sethi %hi(0xffff0000), %g3
+ mov %o0, %g2
+ andcc %o0, %g3, %g0
+ be,pt %icc, 8f
+ mov 32, %o1
+ sethi %hi(0xff000000), %g3
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 3f
+ sethi %hi(0xf0000000), %g3
+ sll %o0, 8, %o0
+1:
+ add %o1, -8, %o1
+ sra %o0, 0, %o0
+ mov %o0, %g2
+2:
+ sethi %hi(0xf0000000), %g3
+3:
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 4f
+ sethi %hi(0xc0000000), %g3
+ sll %o0, 4, %o0
+ add %o1, -4, %o1
+ sra %o0, 0, %o0
+ mov %o0, %g2
+4:
+ andcc %g2, %g3, %g0
+ be,a,pt %icc, 7f
+ sll %o0, 2, %o0
+5:
+ xnor %g0, %o0, %o0
+ srl %o0, 31, %o0
+ sub %o1, %o0, %o1
+6:
+ jmp %o7 + 8
+ sra %o1, 0, %o0
+7:
+ add %o1, -2, %o1
+ ba,pt %xcc, 5b
+ sra %o0, 0, %o0
+8:
+ sll %o0, 16, %o0
+ sethi %hi(0xff000000), %g3
+ sra %o0, 0, %o0
+ mov %o0, %g2
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 2b
+ mov 16, %o1
+ ba,pt %xcc, 1b
+ sll %o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 2/5] sparc64: Define SPARC default __fls function
2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50 ` Vijay Kumar
-1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
__fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/include/asm/bitops_64.h | 2 +-
arch/sparc/lib/Makefile | 1 +
arch/sparc/lib/fls64.S | 61 ++++++++++++++++++++++++++++++++++++
3 files changed, 63 insertions(+), 1 deletions(-)
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 30aea56..d7a46e2 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,10 @@
void change_bit(unsigned long nr, volatile unsigned long *addr);
int fls(unsigned int word);
+int __fls(unsigned long word);
#include <asm-generic/bitops/non-atomic.h>
-#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#ifdef __KERNEL__
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 3b9f5e0..5380c59 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 0000000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+ENTRY(__fls)
+ mov -1, %g2
+ sllx %g2, 32, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 1f
+ mov 63, %g1
+ sllx %o0, 32, %o0
+ mov 31, %g1
+1:
+ mov -1, %g2
+ sllx %g2, 48, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 2f
+ mov -1, %g2
+ sllx %o0, 16, %o0
+ add %g1, -16, %g1
+2:
+ mov -1, %g2
+ sllx %g2, 56, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 3f
+ mov -1, %g2
+ sllx %o0, 8, %o0
+ add %g1, -8, %g1
+3:
+ sllx %g2, 60, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 4f
+ mov -1, %g2
+ sllx %o0, 4, %o0
+ add %g1, -4, %g1
+4:
+ sllx %g2, 62, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 5f
+ mov -1, %g3
+ sllx %o0, 2, %o0
+ add %g1, -2, %g1
+5:
+ mov 0, %g2
+ sllx %g3, 63, %g3
+ and %o0, %g3, %o0
+ movre %o0, 1, %g2
+ sub %g1, %g2, %g1
+ jmp %o7+8
+ sra %g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 2/5] sparc64: Define SPARC default __fls function
@ 2017-10-11 18:50 ` Vijay Kumar
0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
__fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/include/asm/bitops_64.h | 2 +-
arch/sparc/lib/Makefile | 1 +
arch/sparc/lib/fls64.S | 61 ++++++++++++++++++++++++++++++++++++
3 files changed, 63 insertions(+), 1 deletions(-)
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 30aea56..d7a46e2 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,10 @@
void change_bit(unsigned long nr, volatile unsigned long *addr);
int fls(unsigned int word);
+int __fls(unsigned long word);
#include <asm-generic/bitops/non-atomic.h>
-#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#ifdef __KERNEL__
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 3b9f5e0..5380c59 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 0000000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+ENTRY(__fls)
+ mov -1, %g2
+ sllx %g2, 32, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 1f
+ mov 63, %g1
+ sllx %o0, 32, %o0
+ mov 31, %g1
+1:
+ mov -1, %g2
+ sllx %g2, 48, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 2f
+ mov -1, %g2
+ sllx %o0, 16, %o0
+ add %g1, -16, %g1
+2:
+ mov -1, %g2
+ sllx %g2, 56, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 3f
+ mov -1, %g2
+ sllx %o0, 8, %o0
+ add %g1, -8, %g1
+3:
+ sllx %g2, 60, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 4f
+ mov -1, %g2
+ sllx %o0, 4, %o0
+ add %g1, -4, %g1
+4:
+ sllx %g2, 62, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 5f
+ mov -1, %g3
+ sllx %o0, 2, %o0
+ add %g1, -2, %g1
+5:
+ mov 0, %g2
+ sllx %g3, 63, %g3
+ and %o0, %g3, %o0
+ movre %o0, 1, %g2
+ sub %g1, %g2, %g1
+ jmp %o7+8
+ sra %g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 3/5] sparc64: SPARC optimized fls function
2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50 ` Vijay Kumar
-1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
Defined SPARC optimized fls using lzcnt opcode.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/lib/Makefile | 1 +
arch/sparc/lib/NG4fls.S | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5380c59..2823b8e 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
lib-$(CONFIG_SPARC64) += fls.o
lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 0000000..bc17b65
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,20 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define LZCNT_O0_G2 \
+ .word 0x85b002e8
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ENTRY(NG4fls)
+ LZCNT_O0_G2 !lzcnt %o0, %g2
+ mov 64, %g3
+ retl
+ sub %g3, %g2, %o0
+ENDPROC(NG4fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 3/5] sparc64: SPARC optimized fls function
@ 2017-10-11 18:50 ` Vijay Kumar
0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
Defined SPARC optimized fls using lzcnt opcode.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/lib/Makefile | 1 +
arch/sparc/lib/NG4fls.S | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5380c59..2823b8e 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
lib-$(CONFIG_SPARC64) += fls.o
lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 0000000..bc17b65
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,20 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define LZCNT_O0_G2 \
+ .word 0x85b002e8
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ENTRY(NG4fls)
+ LZCNT_O0_G2 !lzcnt %o0, %g2
+ mov 64, %g3
+ retl
+ sub %g3, %g2, %o0
+ENDPROC(NG4fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 4/5] sparc64: SPARC optimized __fls function
2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50 ` Vijay Kumar
-1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
Defined SPARC optimized __fls using lzcnt opcode.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/lib/NG4fls.S | 10 ++++++++++
1 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
index bc17b65..2d0991e 100644
--- a/arch/sparc/lib/NG4fls.S
+++ b/arch/sparc/lib/NG4fls.S
@@ -18,3 +18,13 @@ ENTRY(NG4fls)
retl
sub %g3, %g2, %o0
ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+ brz,pn %o0, 1f
+ LZCNT_O0_G2 !lzcnt %o0, %g2
+ mov 63, %g3
+ sub %g3, %g2, %o0
+1:
+ retl
+ nop
+ENDPROC(__NG4fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 4/5] sparc64: SPARC optimized __fls function
@ 2017-10-11 18:50 ` Vijay Kumar
0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
Defined SPARC optimized __fls using lzcnt opcode.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/lib/NG4fls.S | 10 ++++++++++
1 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
index bc17b65..2d0991e 100644
--- a/arch/sparc/lib/NG4fls.S
+++ b/arch/sparc/lib/NG4fls.S
@@ -18,3 +18,13 @@ ENTRY(NG4fls)
retl
sub %g3, %g2, %o0
ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+ brz,pn %o0, 1f
+ LZCNT_O0_G2 !lzcnt %o0, %g2
+ mov 63, %g3
+ sub %g3, %g2, %o0
+1:
+ retl
+ nop
+ENDPROC(__NG4fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above
2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-11 18:50 ` Vijay Kumar
-1 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/kernel/head_64.S | 2 ++
arch/sparc/lib/NG4patch.S | 9 +++++++++
2 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd..f362ecb 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
nop
call niagara4_patch_pageops
nop
+ call niagara4_patch_fls
+ nop
ba,a,pt %xcc, 80f
nop
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..da65a3e 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
*/
+#include <linux/linkage.h>
+
#define BRANCH_ALWAYS 0x10680000
#define NOP 0x01000000
#define NG_DO_PATCH(OLD, NEW) \
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
retl
nop
.size niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+ NG_DO_PATCH(fls, NG4fls)
+ NG_DO_PATCH(__fls, __NG4fls)
+ retl
+ nop
+ENDPROC(niagara4_patch_fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above
@ 2017-10-11 18:50 ` Vijay Kumar
0 siblings, 0 replies; 16+ messages in thread
From: Vijay Kumar @ 2017-10-11 18:50 UTC (permalink / raw)
To: davem
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.
Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
---
arch/sparc/kernel/head_64.S | 2 ++
arch/sparc/lib/NG4patch.S | 9 +++++++++
2 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd..f362ecb 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
nop
call niagara4_patch_pageops
nop
+ call niagara4_patch_fls
+ nop
ba,a,pt %xcc, 80f
nop
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..da65a3e 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
*/
+#include <linux/linkage.h>
+
#define BRANCH_ALWAYS 0x10680000
#define NOP 0x01000000
#define NG_DO_PATCH(OLD, NEW) \
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
retl
nop
.size niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+ NG_DO_PATCH(fls, NG4fls)
+ NG_DO_PATCH(__fls, __NG4fls)
+ retl
+ nop
+ENDPROC(niagara4_patch_fls)
--
1.7.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v4 0/5] sparc64: Optimize fls and __fls
2017-10-11 18:50 ` Vijay Kumar
@ 2017-10-12 20:29 ` Babu Moger
-1 siblings, 0 replies; 16+ messages in thread
From: Babu Moger @ 2017-10-12 20:29 UTC (permalink / raw)
To: Vijay Kumar, davem
Cc: linux-kernel, sparclinux, rob.gardner, sam, anthony.yznaga
Looked at all the patches. Looks good to me.
Reviewed-by: Babu Moger <babu.moger@oracle.com>
On 10/11/2017 1:50 PM, Vijay Kumar wrote:
> SPARC provides lzcnt instruction (with VIS3) which can be used to
> optimize fls, __fls and fls64 functions. For the systems that supports
> lzcnt instruction, we now do boot time patching to use sparc
> optimized fls, __fls and fls64 functions.
>
> v3->v4:
> - Fixed a typo.
> v2->v3:
> - Using ENTRY(), ENDPROC() for assembler functions.
> - Removed BITS_PER_LONG from __fls.
> - Using generic fls64().
> - Replaced lzcnt instruction with .word directive.
> v1->v2:
> - Fixed delay slot issue.
>
> Vijay Kumar (5):
> sparc64: Define SPARC default fls function
> sparc64: Define SPARC default __fls function
> sparc64: SPARC optimized fls function
> sparc64: SPARC optimized __fls function
> sparc64: Use sparc optimized fls and __fls for T4 and above
>
> arch/sparc/include/asm/bitops_64.h | 5 ++-
> arch/sparc/kernel/head_64.S | 2 +
> arch/sparc/lib/Makefile | 3 ++
> arch/sparc/lib/NG4fls.S | 30 ++++++++++++++++
> arch/sparc/lib/NG4patch.S | 9 +++++
> arch/sparc/lib/fls.S | 67 ++++++++++++++++++++++++++++++++++++
> arch/sparc/lib/fls64.S | 61 ++++++++++++++++++++++++++++++++
> 7 files changed, 175 insertions(+), 2 deletions(-)
> create mode 100644 arch/sparc/lib/NG4fls.S
> create mode 100644 arch/sparc/lib/fls.S
> create mode 100644 arch/sparc/lib/fls64.S
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v4 0/5] sparc64: Optimize fls and __fls
@ 2017-10-12 20:29 ` Babu Moger
0 siblings, 0 replies; 16+ messages in thread
From: Babu Moger @ 2017-10-12 20:29 UTC (permalink / raw)
To: Vijay Kumar, davem
Cc: linux-kernel, sparclinux, rob.gardner, sam, anthony.yznaga
Looked at all the patches. Looks good to me.
Reviewed-by: Babu Moger <babu.moger@oracle.com>
On 10/11/2017 1:50 PM, Vijay Kumar wrote:
> SPARC provides lzcnt instruction (with VIS3) which can be used to
> optimize fls, __fls and fls64 functions. For the systems that supports
> lzcnt instruction, we now do boot time patching to use sparc
> optimized fls, __fls and fls64 functions.
>
> v3->v4:
> - Fixed a typo.
> v2->v3:
> - Using ENTRY(), ENDPROC() for assembler functions.
> - Removed BITS_PER_LONG from __fls.
> - Using generic fls64().
> - Replaced lzcnt instruction with .word directive.
> v1->v2:
> - Fixed delay slot issue.
>
> Vijay Kumar (5):
> sparc64: Define SPARC default fls function
> sparc64: Define SPARC default __fls function
> sparc64: SPARC optimized fls function
> sparc64: SPARC optimized __fls function
> sparc64: Use sparc optimized fls and __fls for T4 and above
>
> arch/sparc/include/asm/bitops_64.h | 5 ++-
> arch/sparc/kernel/head_64.S | 2 +
> arch/sparc/lib/Makefile | 3 ++
> arch/sparc/lib/NG4fls.S | 30 ++++++++++++++++
> arch/sparc/lib/NG4patch.S | 9 +++++
> arch/sparc/lib/fls.S | 67 ++++++++++++++++++++++++++++++++++++
> arch/sparc/lib/fls64.S | 61 ++++++++++++++++++++++++++++++++
> 7 files changed, 175 insertions(+), 2 deletions(-)
> create mode 100644 arch/sparc/lib/NG4fls.S
> create mode 100644 arch/sparc/lib/fls.S
> create mode 100644 arch/sparc/lib/fls64.S
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v4 0/5] sparc64: Optimize fls and __fls
2017-10-11 18:50 ` Vijay Kumar
@ 2017-11-15 5:27 ` David Miller
-1 siblings, 0 replies; 16+ messages in thread
From: David Miller @ 2017-11-15 5:27 UTC (permalink / raw)
To: vijay.ac.kumar
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
From: Vijay Kumar <vijay.ac.kumar@oracle.com>
Date: Wed, 11 Oct 2017 12:50:01 -0600
> SPARC provides lzcnt instruction (with VIS3) which can be used to
> optimize fls, __fls and fls64 functions. For the systems that supports
> lzcnt instruction, we now do boot time patching to use sparc
> optimized fls, __fls and fls64 functions.
>
> v3->v4:
> - Fixed a typo.
> v2->v3:
> - Using ENTRY(), ENDPROC() for assembler functions.
> - Removed BITS_PER_LONG from __fls.
> - Using generic fls64().
> - Replaced lzcnt instruction with .word directive.
> v1->v2:
> - Fixed delay slot issue.
Series applied, thank you.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v4 0/5] sparc64: Optimize fls and __fls
@ 2017-11-15 5:27 ` David Miller
0 siblings, 0 replies; 16+ messages in thread
From: David Miller @ 2017-11-15 5:27 UTC (permalink / raw)
To: vijay.ac.kumar
Cc: linux-kernel, sparclinux, babu.moger, rob.gardner, sam, anthony.yznaga
From: Vijay Kumar <vijay.ac.kumar@oracle.com>
Date: Wed, 11 Oct 2017 12:50:01 -0600
> SPARC provides lzcnt instruction (with VIS3) which can be used to
> optimize fls, __fls and fls64 functions. For the systems that supports
> lzcnt instruction, we now do boot time patching to use sparc
> optimized fls, __fls and fls64 functions.
>
> v3->v4:
> - Fixed a typo.
> v2->v3:
> - Using ENTRY(), ENDPROC() for assembler functions.
> - Removed BITS_PER_LONG from __fls.
> - Using generic fls64().
> - Replaced lzcnt instruction with .word directive.
> v1->v2:
> - Fixed delay slot issue.
Series applied, thank you.
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2017-11-15 5:27 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-11 18:50 [PATCH v4 0/5] sparc64: Optimize fls and __fls Vijay Kumar
2017-10-11 18:50 ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 1/5] sparc64: Define SPARC default fls function Vijay Kumar
2017-10-11 18:50 ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 2/5] sparc64: Define SPARC default __fls function Vijay Kumar
2017-10-11 18:50 ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 3/5] sparc64: SPARC optimized fls function Vijay Kumar
2017-10-11 18:50 ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 4/5] sparc64: SPARC optimized __fls function Vijay Kumar
2017-10-11 18:50 ` Vijay Kumar
2017-10-11 18:50 ` [PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above Vijay Kumar
2017-10-11 18:50 ` Vijay Kumar
2017-10-12 20:29 ` [PATCH v4 0/5] sparc64: Optimize fls and __fls Babu Moger
2017-10-12 20:29 ` Babu Moger
2017-11-15 5:27 ` David Miller
2017-11-15 5:27 ` David Miller
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.