All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sergey Matyukevich <geomatsi@gmail.com>
To: linux-snps-arc@lists.infradead.org
Cc: Vineet Gupta <vgupta@kernel.org>,
	Vladimir Isaev <isaev@synopsys.com>,
	Sergey Matyukevich <geomatsi@gmail.com>,
	Sergey Matyukevich <sergey.matyukevich@synopsys.com>
Subject: [RFC PATCH 04/13] ARC: uaccess: elide ZOL, use double load/stores
Date: Tue, 22 Feb 2022 17:14:57 +0300	[thread overview]
Message-ID: <20220222141506.4003433-5-geomatsi@gmail.com> (raw)
In-Reply-To: <20220222141506.4003433-1-geomatsi@gmail.com>

From: Vineet Gupta <vgupta@kernel.org>

Upcoming ARCv3 lacks ZOL support, so provide alternative
uaccess implementations based on 64-bit memory operations.

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/asm-macro-ll64-emul.h |  28 ++++
 arch/arc/include/asm/asm-macro-ll64.h      |  20 +++
 arch/arc/include/asm/assembler.h           |  12 ++
 arch/arc/include/asm/uaccess.h             |  12 ++
 arch/arc/lib/Makefile                      |   2 +
 arch/arc/lib/uaccess.S                     | 144 +++++++++++++++++++++
 6 files changed, 218 insertions(+)
 create mode 100644 arch/arc/include/asm/asm-macro-ll64-emul.h
 create mode 100644 arch/arc/include/asm/asm-macro-ll64.h
 create mode 100644 arch/arc/lib/uaccess.S

diff --git a/arch/arc/include/asm/asm-macro-ll64-emul.h b/arch/arc/include/asm/asm-macro-ll64-emul.h
new file mode 100644
index 000000000000..886320cc74ad
--- /dev/null
+++ b/arch/arc/include/asm/asm-macro-ll64-emul.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Abstraction for 64-bit load/store:
+ *   - Emulate 64-bit access with two 32-bit load/stores.
+ *   - In the non-emulated case, output register pair r<N>:r<N+1>
+ *     so macro takes only 1 output arg and determines the 2nd.
+ */
+
+.macro ST64.ab d, s, incr
+	st.ab	\d, [\s, \incr / 2]
+	.ifeqs	"\d", "r4"
+		st.ab	r5, [\s, \incr / 2]
+	.endif
+	.ifeqs	"\d", "r6"
+		st.ab	r7, [\s, \incr / 2]
+	.endif
+.endm
+
+.macro LD64.ab d, s, incr
+	ld.ab	\d, [\s, \incr / 2]
+	.ifeqs	"\d", "r4"
+		ld.ab	r5, [\s, \incr / 2]
+	.endif
+	.ifeqs	"\d", "r6"
+		ld.ab	r7, [\s, \incr / 2]
+	.endif
+.endm
diff --git a/arch/arc/include/asm/asm-macro-ll64.h b/arch/arc/include/asm/asm-macro-ll64.h
new file mode 100644
index 000000000000..89e05c923a26
--- /dev/null
+++ b/arch/arc/include/asm/asm-macro-ll64.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Abstraction for 64-bit load/store:
+ *   - Single instruction to double load/store
+ *   - output register pair r<N>:r<N+1> but only
+ *     first register needs to be specified
+ */
+
+.irp xx,,.ab
+.macro ST64\xx d, s, off=0
+	std\xx	\d, [\s, \off]
+.endm
+.endr
+
+.irp xx,,.ab
+.macro LD64\xx d, s, off=0
+	ldd\xx	\d, [\s, \off]
+.endm
+.endr
diff --git a/arch/arc/include/asm/assembler.h b/arch/arc/include/asm/assembler.h
index 426488ef27d4..1d69390c22ba 100644
--- a/arch/arc/include/asm/assembler.h
+++ b/arch/arc/include/asm/assembler.h
@@ -5,6 +5,12 @@
 
 #ifdef __ASSEMBLY__
 
+#ifdef CONFIG_ARC_HAS_LL64
+#include <asm/asm-macro-ll64.h>
+#else
+#include <asm/asm-macro-ll64-emul.h>
+#endif
+
 #ifdef CONFIG_ARC_LACKS_ZOL
 #include <asm/asm-macro-dbnz.h>
 #else
@@ -13,6 +19,12 @@
 
 #else	/* !__ASSEMBLY__ */
 
+#ifdef CONFIG_ARC_HAS_LL64
+asm(".include \"asm/asm-macro-ll64.h\"\n");
+#else
+asm(".include \"asm/asm-macro-ll64-emul.h\"\n");
+#endif
+
 /*
  * ARCv2 cores have both LPcc and DBNZ instructions (starting 3.5a release).
  * But in this context, LP present implies DBNZ not available (ARCompact ISA)
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 9b009e64e79c..f5b97d977c1b 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -163,6 +163,7 @@
 	: "+r" (ret)				\
 	: "r" (src), "r" (dst), "ir" (-EFAULT))
 
+#ifndef CONFIG_ARC_LACKS_ZOL
 
 static inline unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
@@ -660,6 +661,17 @@ static inline unsigned long __clear_user(void __user *to, unsigned long n)
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
+#else
+
+extern unsigned long raw_copy_from_user(void *to, const void __user *from,
+					  unsigned long n);
+extern unsigned long raw_copy_to_user(void *to, const void __user *from,
+					unsigned long n);
+
+extern unsigned long __clear_user(void __user *to, unsigned long n);
+
+#endif
+
 #define __clear_user		__clear_user
 
 #include <asm/segment.h>
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
index 30158ae69fd4..87d18f5013dc 100644
--- a/arch/arc/lib/Makefile
+++ b/arch/arc/lib/Makefile
@@ -13,3 +13,5 @@ lib-$(CONFIG_ISA_ARCV2)		+=memcpy-archs-unaligned.o
 else
 lib-$(CONFIG_ISA_ARCV2)		+=memcpy-archs.o
 endif
+
+lib-$(CONFIG_ARC_LACKS_ZOL)	+= uaccess.o
diff --git a/arch/arc/lib/uaccess.S b/arch/arc/lib/uaccess.S
new file mode 100644
index 000000000000..5093160a72d3
--- /dev/null
+++ b/arch/arc/lib/uaccess.S
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * uaccess for ARCv3: avoids ZOL, uses 64-bit memory ops
+ *   ASSUMES unaligned access
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifndef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS
+#error "Unaligned access support needed"
+#endif
+
+; Input
+;  - r0: dest, kernel
+;  - r1: src, user
+;  - r2: sz
+; Output
+;  - r0: Num bytes left to copy, 0 on success
+
+ENTRY_CFI(raw_copy_from_user)
+
+	add    r8, r0, r2
+
+	lsr.f  r3, r2, 4
+	bz     .L1dobytes
+
+	; chunks of 16 bytes
+10:	LD64.ab r4, r1, 8
+11:	LD64.ab r6, r1, 8
+	ST64.ab r4, r0, 8
+	ST64.ab r6, r0, 8
+	DBNZR  r3, 10b
+
+.L1dobytes:
+	; last 1-15 bytes
+	and.f  r3, r2, 0xf
+	bz     .L1done
+
+12:	ldb.ab r4, [r1, 1]
+	stb.ab r4, [r0, 1]
+	DBNZR  r3, 12b
+
+.L1done:
+	; bytes not copied = orig_src + sz - curr_src
+	j.d    [blink]
+	sub    r0, r8, r0
+END_CFI(raw_copy_from_user)
+
+.section __ex_table, "a"
+	.word 10b, .L1done
+	.word 11b, .L1done
+	.word 12b, .L1done
+.previous
+
+; Input
+;  - r0: dest, user
+;  - r1: src, kernel
+;  - r2: sz
+; Output
+;  - r0: Num bytes left to copy, 0 on success
+
+ENTRY_CFI(raw_copy_to_user)
+
+	add    r8, r1, r2
+
+	lsr.f  r3, r2, 4
+	bz     .L2dobytes
+
+	; chunks of 16 bytes
+2:	LD64.ab r4, r1, 8
+	LD64.ab r6, r1, 8
+20:	ST64.ab r4, r0, 8
+21:	ST64.ab r6, r0, 8
+	DBNZR  r3, 2b
+
+.L2dobytes:
+	; last 1-15 bytes
+	and.f  r3, r2, 0xf
+	bz     .L2done
+
+2:	ldb.ab r4, [r1, 1]
+22:	stb.ab r4, [r0, 1]
+	DBNZR  r3, 2b
+
+.L2done:
+	; bytes not copied = orig_src + sz - curr_src
+	j.d    [blink]
+	sub    r0, r8, r1
+
+END_CFI(raw_copy_to_user)
+
+.section __ex_table, "a"
+	.word 20b, .L2done
+	.word 21b, .L2done
+	.word 22b, .L2done
+.previous
+
+ENTRY_CFI(__clear_user)
+	add    r8, r0, r1
+
+	mov    r4, 0
+	mov    r5, 0
+
+	lsr.f  r3, r1, 4
+	bz     .L3dobytes
+
+	; chunks of 16 bytes
+30:	ST64.ab r4, r0, 8
+31:	ST64.ab r4, r0, 8
+	DBNZR  r3, 30b
+
+.L3dobytes:
+	; last 1-15 bytes
+	and.f  r3, r1, 0xf
+	bz     .L3done
+
+32:	stb.ab r4, [r0, 1]
+	DBNZR  r3, 32b
+
+.L3done:
+	; bytes not copied = orig_src + sz - curr_src
+	j.d    [blink]
+	sub    r0, r8, r0
+
+END_CFI(__clear_user)
+
+; Note that .fixup section is missing and that is not an omission
+;
+; .fixup is a level of indirection for user fault handling to do some extra work
+; before jumping off to a safe instruction (past the faulting LD/ST) in uaccess
+; code. This could be say setting up -EFAULT in return register for caller.
+; But if that is not needed (such as above where number of bytes copied/not-copied
+; is already in return reg r0) and fault handler only needs to resume to a valid PC
+; that label could be placed in __ex_table entry (otherwise be in .fixup)
+; do_page_fault() -> fixup_exception() use that to setup pt_regs->ret, which the
+; CPU exception handler resumes to. This also makes the handling more efficient
+; by removing a level of indirection.
+
+.section __ex_table, "a"
+	.word 30b, .L3done
+	.word 31b, .L3done
+	.word 32b, .L3done
+.previous
-- 
2.25.1


_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

  parent reply	other threads:[~2022-02-22 14:15 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-22 14:14 [RFC PATCH 00/13] ARC: handle the lack of ZOL support Sergey Matyukevich
2022-02-22 14:14 ` [RFC PATCH 01/13] ARC: uaccess: elide unaligned handling if hardware supports Sergey Matyukevich
2022-02-22 14:14 ` [RFC PATCH 02/13] ARC: Kconfig: introduce option to disable ZOL Sergey Matyukevich
2022-02-22 14:14 ` [RFC PATCH 03/13] ARC: uaccess: drop CC_OPTIMIZE_FOR_SIZE Sergey Matyukevich
2022-02-22 14:14 ` Sergey Matyukevich [this message]
2022-02-22 14:14 ` [RFC PATCH 05/13] ARCv2: memset: don't prefetch for len == 0 which happens a lot Sergey Matyukevich
2022-02-22 14:14 ` [RFC PATCH 06/13] ARCv2: memset: elide unaligned handling if hardware supports Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 07/13] ARCv2: memset: rewrite using double load/stores Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 08/13] ARC: string: use generic C code if no ZOL support Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 09/13] ARC: delay: elide ZOL Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 10/13] ARC: checksum: " Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 11/13] ARC: head: " Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 12/13] ARC: build: inhibit ZOL generation by compiler Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 13/13] ARC: pt_regs: handle the case when ZOL is not supported Sergey Matyukevich
2022-02-28  2:09 ` [RFC PATCH 00/13] ARC: handle the lack of ZOL support Vineet Gupta
2022-03-03 19:22   ` Sergey Matyukevich
2022-03-23 10:09   ` [RFC PATCH 00/13] ARC: handle the lack of ZOL supporty Sergey Matyukevich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220222141506.4003433-5-geomatsi@gmail.com \
    --to=geomatsi@gmail.com \
    --cc=isaev@synopsys.com \
    --cc=linux-snps-arc@lists.infradead.org \
    --cc=sergey.matyukevich@synopsys.com \
    --cc=vgupta@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.