All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Nicolas Pitre <nico@fluxnic.net>,
	Russell King <rmk+kernel@armlinux.org.uk>,
	Sasha Levin <sashal@kernel.org>,
	linux-arm-kernel@lists.infradead.org
Subject: [PATCH AUTOSEL 5.10 01/34] ARM: 9058/1: cache-v7: refactor v7_invalidate_l1 to avoid clobbering r5/r6
Date: Wed, 12 May 2021 14:02:32 -0400	[thread overview]
Message-ID: <20210512180306.664925-1-sashal@kernel.org> (raw)

From: Ard Biesheuvel <ardb@kernel.org>

[ Upstream commit f9e7a99fb6b86aa6a00e53b34ee6973840e005aa ]

The cache invalidation code in v7_invalidate_l1 can be tweaked to
re-read the associativity from CCSIDR, and keep the way identifier
component in a single register that is assigned in the outer loop. This
way, we need 2 registers less.

Given that the number of sets is typically much larger than the
associativity, rearrange the code so that the outer loop has the fewer
number of iterations, ensuring that the re-read of CCSIDR only occurs a
handful of times in practice.

Fix the whitespace while at it, and update the comment to indicate that
this code is no longer a clone of anything else.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/arm/mm/cache-v7.S | 51 +++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index dc8f152f3556..e3bc1d6e13d0 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -33,41 +33,40 @@ icache_size:
  * processor.  We fix this by performing an invalidate, rather than a
  * clean + invalidate, before jumping into the kernel.
  *
- * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
- * to be called for both secondary cores startup and primary core resume
- * procedures.
+ * This function needs to be called for both secondary cores startup and
+ * primary core resume procedures.
  */
 ENTRY(v7_invalidate_l1)
        mov     r0, #0
        mcr     p15, 2, r0, c0, c0, 0
        mrc     p15, 1, r0, c0, c0, 0
 
-       movw    r1, #0x7fff
-       and     r2, r1, r0, lsr #13
+	movw	r3, #0x3ff
+	and	r3, r3, r0, lsr #3	@ 'Associativity' in CCSIDR[12:3]
+	clz	r1, r3			@ WayShift
+	mov	r2, #1
+	mov	r3, r3, lsl r1		@ NumWays-1 shifted into bits [31:...]
+	movs	r1, r2, lsl r1		@ #1 shifted left by same amount
+	moveq	r1, #1			@ r1 needs value > 0 even if only 1 way
 
-       movw    r1, #0x3ff
+	and	r2, r0, #0x7
+	add	r2, r2, #4		@ SetShift
 
-       and     r3, r1, r0, lsr #3      @ NumWays - 1
-       add     r2, r2, #1              @ NumSets
+1:	movw	r4, #0x7fff
+	and	r0, r4, r0, lsr #13	@ 'NumSets' in CCSIDR[27:13]
 
-       and     r0, r0, #0x7
-       add     r0, r0, #4      @ SetShift
-
-       clz     r1, r3          @ WayShift
-       add     r4, r3, #1      @ NumWays
-1:     sub     r2, r2, #1      @ NumSets--
-       mov     r3, r4          @ Temp = NumWays
-2:     subs    r3, r3, #1      @ Temp--
-       mov     r5, r3, lsl r1
-       mov     r6, r2, lsl r0
-       orr     r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
-       mcr     p15, 0, r5, c7, c6, 2
-       bgt     2b
-       cmp     r2, #0
-       bgt     1b
-       dsb     st
-       isb
-       ret     lr
+2:	mov	r4, r0, lsl r2		@ NumSet << SetShift
+	orr	r4, r4, r3		@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+	mcr	p15, 0, r4, c7, c6, 2
+	subs	r0, r0, #1		@ Set--
+	bpl	2b
+	subs	r3, r3, r1		@ Way--
+	bcc	3f
+	mrc	p15, 1, r0, c0, c0, 0	@ re-read cache geometry from CCSIDR
+	b	1b
+3:	dsb	st
+	isb
+	ret	lr
 ENDPROC(v7_invalidate_l1)
 
 /*
-- 
2.30.2


WARNING: multiple messages have this Message-ID (diff)
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Nicolas Pitre <nico@fluxnic.net>,
	Russell King <rmk+kernel@armlinux.org.uk>,
	Sasha Levin <sashal@kernel.org>,
	linux-arm-kernel@lists.infradead.org
Subject: [PATCH AUTOSEL 5.10 01/34] ARM: 9058/1: cache-v7: refactor v7_invalidate_l1 to avoid clobbering r5/r6
Date: Wed, 12 May 2021 14:02:32 -0400	[thread overview]
Message-ID: <20210512180306.664925-1-sashal@kernel.org> (raw)

From: Ard Biesheuvel <ardb@kernel.org>

[ Upstream commit f9e7a99fb6b86aa6a00e53b34ee6973840e005aa ]

The cache invalidation code in v7_invalidate_l1 can be tweaked to
re-read the associativity from CCSIDR, and keep the way identifier
component in a single register that is assigned in the outer loop. This
way, we need 2 registers less.

Given that the number of sets is typically much larger than the
associativity, rearrange the code so that the outer loop has the fewer
number of iterations, ensuring that the re-read of CCSIDR only occurs a
handful of times in practice.

Fix the whitespace while at it, and update the comment to indicate that
this code is no longer a clone of anything else.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/arm/mm/cache-v7.S | 51 +++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index dc8f152f3556..e3bc1d6e13d0 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -33,41 +33,40 @@ icache_size:
  * processor.  We fix this by performing an invalidate, rather than a
  * clean + invalidate, before jumping into the kernel.
  *
- * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
- * to be called for both secondary cores startup and primary core resume
- * procedures.
+ * This function needs to be called for both secondary cores startup and
+ * primary core resume procedures.
  */
 ENTRY(v7_invalidate_l1)
        mov     r0, #0
        mcr     p15, 2, r0, c0, c0, 0
        mrc     p15, 1, r0, c0, c0, 0
 
-       movw    r1, #0x7fff
-       and     r2, r1, r0, lsr #13
+	movw	r3, #0x3ff
+	and	r3, r3, r0, lsr #3	@ 'Associativity' in CCSIDR[12:3]
+	clz	r1, r3			@ WayShift
+	mov	r2, #1
+	mov	r3, r3, lsl r1		@ NumWays-1 shifted into bits [31:...]
+	movs	r1, r2, lsl r1		@ #1 shifted left by same amount
+	moveq	r1, #1			@ r1 needs value > 0 even if only 1 way
 
-       movw    r1, #0x3ff
+	and	r2, r0, #0x7
+	add	r2, r2, #4		@ SetShift
 
-       and     r3, r1, r0, lsr #3      @ NumWays - 1
-       add     r2, r2, #1              @ NumSets
+1:	movw	r4, #0x7fff
+	and	r0, r4, r0, lsr #13	@ 'NumSets' in CCSIDR[27:13]
 
-       and     r0, r0, #0x7
-       add     r0, r0, #4      @ SetShift
-
-       clz     r1, r3          @ WayShift
-       add     r4, r3, #1      @ NumWays
-1:     sub     r2, r2, #1      @ NumSets--
-       mov     r3, r4          @ Temp = NumWays
-2:     subs    r3, r3, #1      @ Temp--
-       mov     r5, r3, lsl r1
-       mov     r6, r2, lsl r0
-       orr     r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
-       mcr     p15, 0, r5, c7, c6, 2
-       bgt     2b
-       cmp     r2, #0
-       bgt     1b
-       dsb     st
-       isb
-       ret     lr
+2:	mov	r4, r0, lsl r2		@ NumSet << SetShift
+	orr	r4, r4, r3		@ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+	mcr	p15, 0, r4, c7, c6, 2
+	subs	r0, r0, #1		@ Set--
+	bpl	2b
+	subs	r3, r3, r1		@ Way--
+	bcc	3f
+	mrc	p15, 1, r0, c0, c0, 0	@ re-read cache geometry from CCSIDR
+	b	1b
+3:	dsb	st
+	isb
+	ret	lr
 ENDPROC(v7_invalidate_l1)
 
 /*
-- 
2.30.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

             reply	other threads:[~2021-05-12 19:53 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-12 18:02 Sasha Levin [this message]
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 01/34] ARM: 9058/1: cache-v7: refactor v7_invalidate_l1 to avoid clobbering r5/r6 Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 02/34] PCI: thunder: Fix compile testing Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 03/34] dmaengine: dw-edma: Fix crash on loading/unloading driver Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 04/34] ARM: 9066/1: ftrace: pause/unpause function graph tracer in cpu_suspend() Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 05/34] f2fs: fix to avoid out-of-bounds memory access Sasha Levin
2021-05-12 18:02   ` [f2fs-dev] " Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 06/34] ACPI / hotplug / PCI: Fix reference count leak in enable_slot() Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 07/34] PCI: tegra: Fix runtime PM imbalance in pex_ep_event_pex_rst_deassert() Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 08/34] Input: elants_i2c - do not bind to i2c-hid compatible ACPI instantiated devices Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 09/34] Input: silead - add workaround for x86 BIOS-es which bring the chip up in a stuck state Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 10/34] NFS: NFS_INO_REVAL_PAGECACHE should mark the change attribute invalid Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 11/34] um: Mark all kernel symbols as local Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 12/34] um: Disable CONFIG_GCOV with MODULES Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 13/34] PCI: tegra: Add Tegra194 MCFG quirks for ECAM errata Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 14/34] ARM: 9075/1: kernel: Fix interrupted SMC calls Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 15/34] platform/chrome: cros_ec_typec: Add DP mode check Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 16/34] riscv: Use $(LD) instead of $(CC) to link vDSO Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 17/34] scripts/recordmcount.pl: Fix RISC-V regex for clang Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 18/34] riscv: Workaround mcount name prior to clang-13 Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 19/34] scsi: lpfc: Fix illegal memory access on Abort IOCBs Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 20/34] ceph: fix fscache invalidation Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 21/34] ceph: don't clobber i_snap_caps on non-I_NEW inode Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 22/34] ceph: don't allow access to MDS-private inodes Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 23/34] scsi: target: tcmu: Return from tcmu_handle_completions() if cmd_id not found Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 24/34] amdgpu/pm: Prevent force of DCEFCLK on NAVI10 and SIENNA_CICHLID Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 25/34] bridge: Fix possible races between assigning rx_handler_data and setting IFF_BRIDGE_PORT bit Sasha Levin
2021-05-12 18:02   ` [Bridge] " Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 26/34] net: hsr: check skb can contain struct hsr_ethhdr in fill_frame_info Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 27/34] nvmet: remove unsupported command noise Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02 ` [PATCH AUTOSEL 5.10 28/34] drm/amd/display: Fix two cursor duplication when using overlay Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:02   ` Sasha Levin
2021-05-12 18:03 ` [PATCH AUTOSEL 5.10 29/34] gpiolib: acpi: Add quirk to ignore EC wakeups on Dell Venue 10 Pro 5055 Sasha Levin
2021-05-12 18:03 ` [PATCH AUTOSEL 5.10 30/34] net:CXGB4: fix leak if sk_buff is not used Sasha Levin
2021-05-12 18:03 ` [PATCH AUTOSEL 5.10 31/34] ALSA: hda: generic: change the DAC ctl name for LO+SPK or LO+HP Sasha Levin
2021-05-12 18:03   ` Sasha Levin
2021-05-12 18:03 ` [PATCH AUTOSEL 5.10 32/34] block: reexpand iov_iter after read/write Sasha Levin
2021-05-12 18:03 ` [PATCH AUTOSEL 5.10 33/34] lib: stackdepot: turn depot_lock spinlock to raw_spinlock Sasha Levin
2021-05-12 18:03 ` [PATCH AUTOSEL 5.10 34/34] net: stmmac: Do not enable RX FIFO overflow interrupts Sasha Levin
2021-05-12 18:03   ` Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210512180306.664925-1-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=ardb@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nico@fluxnic.net \
    --cc=rmk+kernel@armlinux.org.uk \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.