All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
@ 2020-05-05  9:02 Shaokun Zhang
  2020-05-05  9:15 ` Will Deacon
  0 siblings, 1 reply; 10+ messages in thread
From: Shaokun Zhang @ 2020-05-05  9:02 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Shaokun Zhang, Yuqi Jin, Andrew Murray, Will Deacon, Catalin Marinas

From: Yuqi Jin <jinyuqi@huawei.com>

Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
it has provided inline implementations of both LSE and ll/sc and used a static
key to select between them, which allows the compiler to generate better
atomics code.
However, xchg still uses the original method which would fail to switch to
the atomic instruction correctly, Let's fix this issue.

Fixes: addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics")
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrew Murray <amurray@thegoodpenguin.co.uk>
Signed-off-by: Yuqi Jin <jinyuqi@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
---
 arch/arm64/include/asm/atomic_ll_sc.h | 41 ++++++++++++++++++
 arch/arm64/include/asm/atomic_lse.h   | 35 +++++++++++++++
 arch/arm64/include/asm/cmpxchg.h      | 82 ++++++++---------------------------
 3 files changed, 93 insertions(+), 65 deletions(-)

diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 13869b76b58c..73fcb71ccb91 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -348,6 +348,47 @@ __CMPXCHG_DBL(   ,        ,  ,         )
 __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
 
 #undef __CMPXCHG_DBL
+
+#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl)      \
+static inline u##sz __ll_sc__xchg_case_##name##sz(u##sz x, volatile void *ptr) \
+{                                                                              \
+	u##sz ret;                                                              \
+	unsigned long tmp;                                                      \
+										\
+	asm volatile(                                                           \
+	__LL_SC_FALLBACK(                                                       \
+	"       prfm    pstl1strm, %2\n"                                        \
+	"1:     ld" #acq "xr" #sfx "\t%" #w "0, %2\n"                           \
+	"       st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n"                      \
+	"       cbnz    %w1, 1b\n"                                              \
+	"       " #mb "\n"                                                      \
+	"2:")                                                                   \
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr)                        \
+	: "r" (x)                                                               \
+	: cl);                                                                  \
+										\
+	return ret;                                                             \
+}
+
+__XCHG_CASE(w, b,     ,  8,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, h,     , 16,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w,  ,     , 32,        ,    ,  ,  ,  ,         )
+__XCHG_CASE( ,  ,     , 64,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, b, acq_,  8,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, h, acq_, 16,        ,    , a, a,  , "memory")
+__XCHG_CASE(w,  , acq_, 32,        ,    , a, a,  , "memory")
+__XCHG_CASE( ,  , acq_, 64,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, b, rel_,  8,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, h, rel_, 16,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w,  , rel_, 32,        ,    ,  ,  , l, "memory")
+__XCHG_CASE( ,  , rel_, 64,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, b,  mb_,  8, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, h,  mb_, 16, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w,  ,  mb_, 32, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE( ,  ,  mb_, 64, dmb ish, nop,  , a, l, "memory")
+
+#undef __XCHG_CASE
+
 #undef K
 
 #endif	/* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index da3280f639cd..ddb2c212faa3 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -416,4 +416,39 @@ __CMPXCHG_DBL(_mb, al, "memory")
 
 #undef __CMPXCHG_DBL
 
+#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl)             \
+static __always_inline u##sz __lse__xchg_case_##name##sz(u##sz x, volatile void *ptr) \
+{                                                                                     \
+	u##sz ret;                                                                     \
+	unsigned long tmp;                                                             \
+										       \
+	asm volatile(                                                                  \
+	__LSE_PREAMBLE                                                                 \
+	"       swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n"                   \
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr)                               \
+	: "r" (x)                                                                      \
+	: cl);                                                                         \
+										       \
+	return ret;                                                                    \
+}
+
+__XCHG_CASE(w, b,     ,  8,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, h,     , 16,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w,  ,     , 32,        ,    ,  ,  ,  ,         )
+__XCHG_CASE( ,  ,     , 64,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, b, acq_,  8,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, h, acq_, 16,        ,    , a, a,  , "memory")
+__XCHG_CASE(w,  , acq_, 32,        ,    , a, a,  , "memory")
+__XCHG_CASE( ,  , acq_, 64,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, b, rel_,  8,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, h, rel_, 16,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w,  , rel_, 32,        ,    ,  ,  , l, "memory")
+__XCHG_CASE( ,  , rel_, 64,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, b,  mb_,  8, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, h,  mb_, 16, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w,  ,  mb_, 32, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE( ,  ,  mb_, 64, dmb ish, nop,  , a, l, "memory")
+
+#undef __XCHG_CASE
+
 #endif	/* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index f9bef42c1411..084028518417 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -13,73 +13,25 @@
 #include <asm/barrier.h>
 #include <asm/lse.h>
 
-/*
- * We need separate acquire parameters for ll/sc and lse, since the full
- * barrier case is generated as release+dmb for the former and
- * acquire+release for the latter.
- */
-#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl)	\
-static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)		\
-{										\
-	u##sz ret;								\
-	unsigned long tmp;							\
-										\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(					\
-	/* LL/SC */								\
-	"	prfm	pstl1strm, %2\n"					\
-	"1:	ld" #acq "xr" #sfx "\t%" #w "0, %2\n"				\
-	"	st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n"			\
-	"	cbnz	%w1, 1b\n"						\
-	"	" #mb,								\
-	/* LSE atomics */							\
-	"	swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n"		\
-		__nops(3)							\
-	"	" #nop_lse)							\
-	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr)			\
-	: "r" (x)								\
-	: cl);									\
-										\
-	return ret;								\
-}
-
-__XCHG_CASE(w, b,     ,  8,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w, h,     , 16,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w,  ,     , 32,        ,    ,  ,  ,  ,         )
-__XCHG_CASE( ,  ,     , 64,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w, b, acq_,  8,        ,    , a, a,  , "memory")
-__XCHG_CASE(w, h, acq_, 16,        ,    , a, a,  , "memory")
-__XCHG_CASE(w,  , acq_, 32,        ,    , a, a,  , "memory")
-__XCHG_CASE( ,  , acq_, 64,        ,    , a, a,  , "memory")
-__XCHG_CASE(w, b, rel_,  8,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w, h, rel_, 16,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w,  , rel_, 32,        ,    ,  ,  , l, "memory")
-__XCHG_CASE( ,  , rel_, 64,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w, b,  mb_,  8, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE(w, h,  mb_, 16, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE(w,  ,  mb_, 32, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE( ,  ,  mb_, 64, dmb ish, nop,  , a, l, "memory")
-
-#undef __XCHG_CASE
-
 #define __XCHG_GEN(sfx)							\
-static __always_inline  unsigned long __xchg##sfx(unsigned long x,	\
-					volatile void *ptr,		\
-					int size)			\
-{									\
-	switch (size) {							\
-	case 1:								\
-		return __xchg_case##sfx##_8(x, ptr);			\
-	case 2:								\
-		return __xchg_case##sfx##_16(x, ptr);			\
-	case 4:								\
-		return __xchg_case##sfx##_32(x, ptr);			\
-	case 8:								\
-		return __xchg_case##sfx##_64(x, ptr);			\
-	default:							\
-		BUILD_BUG();						\
-	}								\
+static __always_inline  unsigned long __xchg##sfx(unsigned long x,     \
+					volatile void *ptr,             \
+					int size)                       \
+{                                                                      \
+	switch (size) {                                                 \
+	case 1:                                                         \
+		return __lse_ll_sc_body(_xchg_case##sfx##_8, x, ptr);   \
+	case 2:                                                         \
+		return __lse_ll_sc_body(_xchg_case##sfx##_16, x, ptr);  \
+	case 4:                                                         \
+		return __lse_ll_sc_body(_xchg_case##sfx##_32, x, ptr);  \
+	case 8:                                                         \
+		return __lse_ll_sc_body(_xchg_case##sfx##_64, x, ptr);  \
+	default:                                                        \
+		BUILD_BUG();                                            \
+	}                                                               \
 									\
-	unreachable();							\
+	unreachable();                                                  \
 }
 
 __XCHG_GEN()
-- 
2.7.4


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-05  9:02 [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction Shaokun Zhang
@ 2020-05-05  9:15 ` Will Deacon
  2020-05-06  7:00   ` Shaokun Zhang
  0 siblings, 1 reply; 10+ messages in thread
From: Will Deacon @ 2020-05-05  9:15 UTC (permalink / raw)
  To: Shaokun Zhang; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

On Tue, May 05, 2020 at 05:02:35PM +0800, Shaokun Zhang wrote:
> From: Yuqi Jin <jinyuqi@huawei.com>
> 
> Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
> it has provided inline implementations of both LSE and ll/sc and used a static
> key to select between them, which allows the compiler to generate better
> atomics code.
> However, xchg still uses the original method which would fail to switch to
> the atomic instruction correctly, Let's fix this issue.

Please can you elaborate on the failure mode? The current code looks alright
to me, so I'm clearly missing something. What's broken?

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-05  9:15 ` Will Deacon
@ 2020-05-06  7:00   ` Shaokun Zhang
  2020-05-06  7:53     ` Will Deacon
  0 siblings, 1 reply; 10+ messages in thread
From: Shaokun Zhang @ 2020-05-06  7:00 UTC (permalink / raw)
  To: Will Deacon; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

Hi Will,

On 2020/5/5 17:15, Will Deacon wrote:
> On Tue, May 05, 2020 at 05:02:35PM +0800, Shaokun Zhang wrote:
>> From: Yuqi Jin <jinyuqi@huawei.com>
>>
>> Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
>> it has provided inline implementations of both LSE and ll/sc and used a static
>> key to select between them, which allows the compiler to generate better
>> atomics code.
>> However, xchg still uses the original method which would fail to switch to
>> the atomic instruction correctly, Let's fix this issue.
> 
> Please can you elaborate on the failure mode? The current code looks alright

When enable CONFIG_ARM64_LSE_ATOMICS, xchg is failed to switch to swp instruction
or dynamic replacement instructions are not seen.

We do some tests on the copy of xchg_tail,:
u32 xchg_tail_my(struct qspinlock *lock, u32 tail)
{
        return (u32)xchg_relaxed(&lock->tail,
                                 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}
and the asm code is as follows:

ffff80001015b050 <xchg_tail_my>:
ffff80001015b050:       a9be7bfd        stp     x29, x30, [sp, #-32]!
ffff80001015b054:       910003fd        mov     x29, sp
ffff80001015b058:       a90153f3        stp     x19, x20, [sp, #16]
ffff80001015b05c:       2a0103f3        mov     w19, w1
ffff80001015b060:       aa0003f4        mov     x20, x0
ffff80001015b064:       aa1e03e0        mov     x0, x30
ffff80001015b068:       97fd07ee        bl      ffff80001009d020 <_mcount>
ffff80001015b06c:       53107e61        lsr     w1, w19, #16
ffff80001015b070:       91000a83        add     x3, x20, #0x2
ffff80001015b074:       f9800071        prfm    pstl1strm, [x3]
ffff80001015b078:       485f7c60        ldxrh   w0, [x3]
ffff80001015b07c:       48027c61        stxrh   w2, w1, [x3]
ffff80001015b080:       35ffffc2        cbnz    w2, ffff80001015b078 <xchg_tail_my+0x28>
ffff80001015b084:       53103c00        lsl     w0, w0, #16
ffff80001015b088:       a94153f3        ldp     x19, x20, [sp, #16]
ffff80001015b08c:       a8c27bfd        ldp     x29, x30, [sp], #32
ffff80001015b090:       d65f03c0        ret

> to me, so I'm clearly missing something. What's broken?
> 

I'm not sure whether the ARM64_LSE_ATOMIC_INSN could works correctly after the
commit addfc38672c7. If we implement xchg using __lse_ll_sc_body like cmpxchg_case,
xchg works ok.

What's more, I am wondering why xchg still uses the dynamic replacement mode,
but cmpxchg uses another mode. ;-)

Thanks,
Shaokun

> Will
> 
> .
> 


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-06  7:00   ` Shaokun Zhang
@ 2020-05-06  7:53     ` Will Deacon
  2020-05-06 10:39       ` Shaokun Zhang
  0 siblings, 1 reply; 10+ messages in thread
From: Will Deacon @ 2020-05-06  7:53 UTC (permalink / raw)
  To: Shaokun Zhang; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

On Wed, May 06, 2020 at 03:00:39PM +0800, Shaokun Zhang wrote:
> On 2020/5/5 17:15, Will Deacon wrote:
> > On Tue, May 05, 2020 at 05:02:35PM +0800, Shaokun Zhang wrote:
> >> From: Yuqi Jin <jinyuqi@huawei.com>
> >>
> >> Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
> >> it has provided inline implementations of both LSE and ll/sc and used a static
> >> key to select between them, which allows the compiler to generate better
> >> atomics code.
> >> However, xchg still uses the original method which would fail to switch to
> >> the atomic instruction correctly, Let's fix this issue.
> > 
> > Please can you elaborate on the failure mode? The current code looks alright
> 
> When enable CONFIG_ARM64_LSE_ATOMICS, xchg is failed to switch to swp instruction
> or dynamic replacement instructions are not seen.
> 
> We do some tests on the copy of xchg_tail,:
> u32 xchg_tail_my(struct qspinlock *lock, u32 tail)
> {
>         return (u32)xchg_relaxed(&lock->tail,
>                                  tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
> }
> and the asm code is as follows:
> 
> ffff80001015b050 <xchg_tail_my>:
> ffff80001015b050:       a9be7bfd        stp     x29, x30, [sp, #-32]!
> ffff80001015b054:       910003fd        mov     x29, sp
> ffff80001015b058:       a90153f3        stp     x19, x20, [sp, #16]
> ffff80001015b05c:       2a0103f3        mov     w19, w1
> ffff80001015b060:       aa0003f4        mov     x20, x0
> ffff80001015b064:       aa1e03e0        mov     x0, x30
> ffff80001015b068:       97fd07ee        bl      ffff80001009d020 <_mcount>
> ffff80001015b06c:       53107e61        lsr     w1, w19, #16
> ffff80001015b070:       91000a83        add     x3, x20, #0x2
> ffff80001015b074:       f9800071        prfm    pstl1strm, [x3]
> ffff80001015b078:       485f7c60        ldxrh   w0, [x3]
> ffff80001015b07c:       48027c61        stxrh   w2, w1, [x3]
> ffff80001015b080:       35ffffc2        cbnz    w2, ffff80001015b078 <xchg_tail_my+0x28>
> ffff80001015b084:       53103c00        lsl     w0, w0, #16
> ffff80001015b088:       a94153f3        ldp     x19, x20, [sp, #16]
> ffff80001015b08c:       a8c27bfd        ldp     x29, x30, [sp], #32
> ffff80001015b090:       d65f03c0        ret

This should get patched at runtime, but you're saying that's not happening?

> > to me, so I'm clearly missing something. What's broken?
> > 
> 
> I'm not sure whether the ARM64_LSE_ATOMIC_INSN could works correctly after the
> commit addfc38672c7. If we implement xchg using __lse_ll_sc_body like cmpxchg_case,
> xchg works ok.
> 
> What's more, I am wondering why xchg still uses the dynamic replacement mode,
> but cmpxchg uses another mode. ;-)

There's a trade-off involving the number of clobbered registers and the
number of instructions, which made a bit more sense when we used to branch
out-of-line. We also do the direct patching for the pcpu atomics.

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-06  7:53     ` Will Deacon
@ 2020-05-06 10:39       ` Shaokun Zhang
  2020-05-06 10:44         ` Will Deacon
  0 siblings, 1 reply; 10+ messages in thread
From: Shaokun Zhang @ 2020-05-06 10:39 UTC (permalink / raw)
  To: Will Deacon; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

Hi Will,

Apologies for my noise, you are right and it's my mistake.

On 2020/5/6 15:53, Will Deacon wrote:
> On Wed, May 06, 2020 at 03:00:39PM +0800, Shaokun Zhang wrote:
>> On 2020/5/5 17:15, Will Deacon wrote:
>>> On Tue, May 05, 2020 at 05:02:35PM +0800, Shaokun Zhang wrote:
>>>> From: Yuqi Jin <jinyuqi@huawei.com>
>>>>
>>>> Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
>>>> it has provided inline implementations of both LSE and ll/sc and used a static
>>>> key to select between them, which allows the compiler to generate better
>>>> atomics code.
>>>> However, xchg still uses the original method which would fail to switch to
>>>> the atomic instruction correctly, Let's fix this issue.
>>>
>>> Please can you elaborate on the failure mode? The current code looks alright
>>
>> When enable CONFIG_ARM64_LSE_ATOMICS, xchg is failed to switch to swp instruction
>> or dynamic replacement instructions are not seen.
>>
>> We do some tests on the copy of xchg_tail,:
>> u32 xchg_tail_my(struct qspinlock *lock, u32 tail)
>> {
>>         return (u32)xchg_relaxed(&lock->tail,
>>                                  tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
>> }
>> and the asm code is as follows:
>>
>> ffff80001015b050 <xchg_tail_my>:
>> ffff80001015b050:       a9be7bfd        stp     x29, x30, [sp, #-32]!
>> ffff80001015b054:       910003fd        mov     x29, sp
>> ffff80001015b058:       a90153f3        stp     x19, x20, [sp, #16]
>> ffff80001015b05c:       2a0103f3        mov     w19, w1
>> ffff80001015b060:       aa0003f4        mov     x20, x0
>> ffff80001015b064:       aa1e03e0        mov     x0, x30
>> ffff80001015b068:       97fd07ee        bl      ffff80001009d020 <_mcount>
>> ffff80001015b06c:       53107e61        lsr     w1, w19, #16
>> ffff80001015b070:       91000a83        add     x3, x20, #0x2
>> ffff80001015b074:       f9800071        prfm    pstl1strm, [x3]
>> ffff80001015b078:       485f7c60        ldxrh   w0, [x3]
>> ffff80001015b07c:       48027c61        stxrh   w2, w1, [x3]
>> ffff80001015b080:       35ffffc2        cbnz    w2, ffff80001015b078 <xchg_tail_my+0x28>
>> ffff80001015b084:       53103c00        lsl     w0, w0, #16
>> ffff80001015b088:       a94153f3        ldp     x19, x20, [sp, #16]
>> ffff80001015b08c:       a8c27bfd        ldp     x29, x30, [sp], #32
>> ffff80001015b090:       d65f03c0        ret
> 
> This should get patched at runtime, but you're saying that's not happening?
> 

My mistake, I didn't check the runtime carefully.

>>> to me, so I'm clearly missing something. What's broken?
>>>
>>
>> I'm not sure whether the ARM64_LSE_ATOMIC_INSN could works correctly after the
>> commit addfc38672c7. If we implement xchg using __lse_ll_sc_body like cmpxchg_case,
>> xchg works ok.
>>
>> What's more, I am wondering why xchg still uses the dynamic replacement mode,
>> but cmpxchg uses another mode. ;-)
> 
> There's a trade-off involving the number of clobbered registers and the
> number of instructions, which made a bit more sense when we used to branch
> out-of-line. We also do the direct patching for the pcpu atomics.
> 

Thanks your explanation, got it and I did check pcpu atomics before.

Thanks,
Shaokun

> Will
> 
> .
> 


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-06 10:39       ` Shaokun Zhang
@ 2020-05-06 10:44         ` Will Deacon
  2020-05-06 11:30           ` Shaokun Zhang
  0 siblings, 1 reply; 10+ messages in thread
From: Will Deacon @ 2020-05-06 10:44 UTC (permalink / raw)
  To: Shaokun Zhang; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

On Wed, May 06, 2020 at 06:39:16PM +0800, Shaokun Zhang wrote:
> Apologies for my noise, you are right and it's my mistake.

No need to apologise, but thanks for letting me know.

> On 2020/5/6 15:53, Will Deacon wrote:
> > On Wed, May 06, 2020 at 03:00:39PM +0800, Shaokun Zhang wrote:
> >> On 2020/5/5 17:15, Will Deacon wrote:
> >>> On Tue, May 05, 2020 at 05:02:35PM +0800, Shaokun Zhang wrote:
> >>>> From: Yuqi Jin <jinyuqi@huawei.com>
> >>>>
> >>>> Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
> >>>> it has provided inline implementations of both LSE and ll/sc and used a static
> >>>> key to select between them, which allows the compiler to generate better
> >>>> atomics code.
> >>>> However, xchg still uses the original method which would fail to switch to
> >>>> the atomic instruction correctly, Let's fix this issue.
> >>>
> >>> Please can you elaborate on the failure mode? The current code looks alright
> >>
> >> When enable CONFIG_ARM64_LSE_ATOMICS, xchg is failed to switch to swp instruction
> >> or dynamic replacement instructions are not seen.
> >>
> >> We do some tests on the copy of xchg_tail,:
> >> u32 xchg_tail_my(struct qspinlock *lock, u32 tail)
> >> {
> >>         return (u32)xchg_relaxed(&lock->tail,
> >>                                  tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
> >> }
> >> and the asm code is as follows:
> >>
> >> ffff80001015b050 <xchg_tail_my>:
> >> ffff80001015b050:       a9be7bfd        stp     x29, x30, [sp, #-32]!
> >> ffff80001015b054:       910003fd        mov     x29, sp
> >> ffff80001015b058:       a90153f3        stp     x19, x20, [sp, #16]
> >> ffff80001015b05c:       2a0103f3        mov     w19, w1
> >> ffff80001015b060:       aa0003f4        mov     x20, x0
> >> ffff80001015b064:       aa1e03e0        mov     x0, x30
> >> ffff80001015b068:       97fd07ee        bl      ffff80001009d020 <_mcount>
> >> ffff80001015b06c:       53107e61        lsr     w1, w19, #16
> >> ffff80001015b070:       91000a83        add     x3, x20, #0x2
> >> ffff80001015b074:       f9800071        prfm    pstl1strm, [x3]
> >> ffff80001015b078:       485f7c60        ldxrh   w0, [x3]
> >> ffff80001015b07c:       48027c61        stxrh   w2, w1, [x3]
> >> ffff80001015b080:       35ffffc2        cbnz    w2, ffff80001015b078 <xchg_tail_my+0x28>
> >> ffff80001015b084:       53103c00        lsl     w0, w0, #16
> >> ffff80001015b088:       a94153f3        ldp     x19, x20, [sp, #16]
> >> ffff80001015b08c:       a8c27bfd        ldp     x29, x30, [sp], #32
> >> ffff80001015b090:       d65f03c0        ret
> > 
> > This should get patched at runtime, but you're saying that's not happening?
> > 
> 
> My mistake, I didn't check the runtime carefully.

Good to hear there's not a bug, but if you see a performance benefit from
using the static-key for xchg() then I'd obviously be open to changing it
over as well.

Thanks,

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-06 10:44         ` Will Deacon
@ 2020-05-06 11:30           ` Shaokun Zhang
  2020-05-07  7:54             ` Shaokun Zhang
  0 siblings, 1 reply; 10+ messages in thread
From: Shaokun Zhang @ 2020-05-06 11:30 UTC (permalink / raw)
  To: Will Deacon; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

Hi Will,

On 2020/5/6 18:44, Will Deacon wrote:
> On Wed, May 06, 2020 at 06:39:16PM +0800, Shaokun Zhang wrote:
>> Apologies for my noise, you are right and it's my mistake.
> 
> No need to apologise, but thanks for letting me know.
> 
>> On 2020/5/6 15:53, Will Deacon wrote:
>>> On Wed, May 06, 2020 at 03:00:39PM +0800, Shaokun Zhang wrote:
>>>> On 2020/5/5 17:15, Will Deacon wrote:
>>>>> On Tue, May 05, 2020 at 05:02:35PM +0800, Shaokun Zhang wrote:
>>>>>> From: Yuqi Jin <jinyuqi@huawei.com>
>>>>>>
>>>>>> Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
>>>>>> it has provided inline implementations of both LSE and ll/sc and used a static
>>>>>> key to select between them, which allows the compiler to generate better
>>>>>> atomics code.
>>>>>> However, xchg still uses the original method which would fail to switch to
>>>>>> the atomic instruction correctly, Let's fix this issue.
>>>>>
>>>>> Please can you elaborate on the failure mode? The current code looks alright
>>>>
>>>> When enable CONFIG_ARM64_LSE_ATOMICS, xchg is failed to switch to swp instruction
>>>> or dynamic replacement instructions are not seen.
>>>>
>>>> We do some tests on the copy of xchg_tail,:
>>>> u32 xchg_tail_my(struct qspinlock *lock, u32 tail)
>>>> {
>>>>         return (u32)xchg_relaxed(&lock->tail,
>>>>                                  tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
>>>> }
>>>> and the asm code is as follows:
>>>>
>>>> ffff80001015b050 <xchg_tail_my>:
>>>> ffff80001015b050:       a9be7bfd        stp     x29, x30, [sp, #-32]!
>>>> ffff80001015b054:       910003fd        mov     x29, sp
>>>> ffff80001015b058:       a90153f3        stp     x19, x20, [sp, #16]
>>>> ffff80001015b05c:       2a0103f3        mov     w19, w1
>>>> ffff80001015b060:       aa0003f4        mov     x20, x0
>>>> ffff80001015b064:       aa1e03e0        mov     x0, x30
>>>> ffff80001015b068:       97fd07ee        bl      ffff80001009d020 <_mcount>
>>>> ffff80001015b06c:       53107e61        lsr     w1, w19, #16
>>>> ffff80001015b070:       91000a83        add     x3, x20, #0x2
>>>> ffff80001015b074:       f9800071        prfm    pstl1strm, [x3]
>>>> ffff80001015b078:       485f7c60        ldxrh   w0, [x3]
>>>> ffff80001015b07c:       48027c61        stxrh   w2, w1, [x3]
>>>> ffff80001015b080:       35ffffc2        cbnz    w2, ffff80001015b078 <xchg_tail_my+0x28>
>>>> ffff80001015b084:       53103c00        lsl     w0, w0, #16
>>>> ffff80001015b088:       a94153f3        ldp     x19, x20, [sp, #16]
>>>> ffff80001015b08c:       a8c27bfd        ldp     x29, x30, [sp], #32
>>>> ffff80001015b090:       d65f03c0        ret
>>>
>>> This should get patched at runtime, but you're saying that's not happening?
>>>
>>
>> My mistake, I didn't check the runtime carefully.
> 
> Good to hear there's not a bug, but if you see a performance benefit from
> using the static-key for xchg() then I'd obviously be open to changing it

Thanks your reply, if I follow the two methods correctly, static-key will
not consume '__nops(3)', others are the same.

I will run some tests to check the performance  ;-)

Thanks,
Shaokun


> over as well.
> 
> Thanks,
> 
> Will
> 
> .
> 


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-06 11:30           ` Shaokun Zhang
@ 2020-05-07  7:54             ` Shaokun Zhang
  2020-05-25  9:27               ` Shaokun Zhang
  0 siblings, 1 reply; 10+ messages in thread
From: Shaokun Zhang @ 2020-05-07  7:54 UTC (permalink / raw)
  To: Will Deacon; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

Hi Will,

On 2020/5/6 19:30, Shaokun Zhang wrote:
> Hi Will,
> 
> On 2020/5/6 18:44, Will Deacon wrote:
>> On Wed, May 06, 2020 at 06:39:16PM +0800, Shaokun Zhang wrote:
>>> Apologies for my noise, you are right and it's my mistake.
>>
>> No need to apologise, but thanks for letting me know.
>>
>>> On 2020/5/6 15:53, Will Deacon wrote:
>>>> On Wed, May 06, 2020 at 03:00:39PM +0800, Shaokun Zhang wrote:
>>>>> On 2020/5/5 17:15, Will Deacon wrote:
>>>>>> On Tue, May 05, 2020 at 05:02:35PM +0800, Shaokun Zhang wrote:
>>>>>>> From: Yuqi Jin <jinyuqi@huawei.com>
>>>>>>>
>>>>>>> Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
>>>>>>> it has provided inline implementations of both LSE and ll/sc and used a static
>>>>>>> key to select between them, which allows the compiler to generate better
>>>>>>> atomics code.
>>>>>>> However, xchg still uses the original method which would fail to switch to
>>>>>>> the atomic instruction correctly, Let's fix this issue.
>>>>>>
>>>>>> Please can you elaborate on the failure mode? The current code looks alright
>>>>>
>>>>> When enable CONFIG_ARM64_LSE_ATOMICS, xchg is failed to switch to swp instruction
>>>>> or dynamic replacement instructions are not seen.
>>>>>
>>>>> We do some tests on the copy of xchg_tail,:
>>>>> u32 xchg_tail_my(struct qspinlock *lock, u32 tail)
>>>>> {
>>>>>         return (u32)xchg_relaxed(&lock->tail,
>>>>>                                  tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
>>>>> }
>>>>> and the asm code is as follows:
>>>>>
>>>>> ffff80001015b050 <xchg_tail_my>:
>>>>> ffff80001015b050:       a9be7bfd        stp     x29, x30, [sp, #-32]!
>>>>> ffff80001015b054:       910003fd        mov     x29, sp
>>>>> ffff80001015b058:       a90153f3        stp     x19, x20, [sp, #16]
>>>>> ffff80001015b05c:       2a0103f3        mov     w19, w1
>>>>> ffff80001015b060:       aa0003f4        mov     x20, x0
>>>>> ffff80001015b064:       aa1e03e0        mov     x0, x30
>>>>> ffff80001015b068:       97fd07ee        bl      ffff80001009d020 <_mcount>
>>>>> ffff80001015b06c:       53107e61        lsr     w1, w19, #16
>>>>> ffff80001015b070:       91000a83        add     x3, x20, #0x2
>>>>> ffff80001015b074:       f9800071        prfm    pstl1strm, [x3]
>>>>> ffff80001015b078:       485f7c60        ldxrh   w0, [x3]
>>>>> ffff80001015b07c:       48027c61        stxrh   w2, w1, [x3]
>>>>> ffff80001015b080:       35ffffc2        cbnz    w2, ffff80001015b078 <xchg_tail_my+0x28>
>>>>> ffff80001015b084:       53103c00        lsl     w0, w0, #16
>>>>> ffff80001015b088:       a94153f3        ldp     x19, x20, [sp, #16]
>>>>> ffff80001015b08c:       a8c27bfd        ldp     x29, x30, [sp], #32
>>>>> ffff80001015b090:       d65f03c0        ret
>>>>
>>>> This should get patched at runtime, but you're saying that's not happening?
>>>>
>>>
>>> My mistake, I didn't check the runtime carefully.
>>
>> Good to hear there's not a bug, but if you see a performance benefit from
>> using the static-key for xchg() then I'd obviously be open to changing it
> 
> Thanks your reply, if I follow the two methods correctly, static-key will
> not consume '__nops(3)', others are the same.
> 
> I will run some tests to check the performance  ;-)
> 

We compare the two methods on Huawei Kunpeng920 and the throughput per second
as follows:

one core  |without delay| 200ns delay|
--------------------------------------
static-key| 55294942    | 3937156    |
--------------------------------------
runtime   | 54706282    | 3918188    |
--------------------------------------

If we run this test using 32-cores, the result is almost the same.
Test code is followed:
if(delay_o) {
	while (get_cycles() <= (time_temp + t_cnt)) {
		(void)atomic64_xchg(&wk_in->num, 1);
		myndelay(delay_o);
		(void)atomic64_xchg(&wk_in->num, 2);
		myndelay(delay_o);
		w_cnt+=2;
	}
} else {
	while (get_cycles() <= (time_temp + t_cnt)){
		(void)atomic64_xchg(&wk_in->num, 1);
		(void)atomic64_xchg(&wk_in->num, 2);
		w_cnt+=2;
	}
}

Thanks,
Shaokun

> Thanks,
> Shaokun
> 
> 
>> over as well.
>>
>> Thanks,
>>
>> Will
>>
>> .
>>


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-07  7:54             ` Shaokun Zhang
@ 2020-05-25  9:27               ` Shaokun Zhang
  2020-05-26 19:55                 ` Will Deacon
  0 siblings, 1 reply; 10+ messages in thread
From: Shaokun Zhang @ 2020-05-25  9:27 UTC (permalink / raw)
  To: Will Deacon; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

Hi Will,

On 2020/5/7 15:54, Shaokun Zhang wrote:
> Hi Will,
> 
> On 2020/5/6 19:30, Shaokun Zhang wrote:
>> Hi Will,
>>
>> On 2020/5/6 18:44, Will Deacon wrote:
>>> On Wed, May 06, 2020 at 06:39:16PM +0800, Shaokun Zhang wrote:
>>>> Apologies for my noise, you are right and it's my mistake.
>>>
>>> No need to apologise, but thanks for letting me know.
>>>
>>>> On 2020/5/6 15:53, Will Deacon wrote:
>>>>> On Wed, May 06, 2020 at 03:00:39PM +0800, Shaokun Zhang wrote:
>>>>>> On 2020/5/5 17:15, Will Deacon wrote:
>>>>>>> On Tue, May 05, 2020 at 05:02:35PM +0800, Shaokun Zhang wrote:
>>>>>>>> From: Yuqi Jin <jinyuqi@huawei.com>
>>>>>>>>
>>>>>>>> Since commit addfc38672c7 ("arm64: atomics: avoid out-of-line ll/sc atomics"),
>>>>>>>> it has provided inline implementations of both LSE and ll/sc and used a static
>>>>>>>> key to select between them, which allows the compiler to generate better
>>>>>>>> atomics code.
>>>>>>>> However, xchg still uses the original method which would fail to switch to
>>>>>>>> the atomic instruction correctly, Let's fix this issue.
>>>>>>>
>>>>>>> Please can you elaborate on the failure mode? The current code looks alright
>>>>>>
>>>>>> When enable CONFIG_ARM64_LSE_ATOMICS, xchg is failed to switch to swp instruction
>>>>>> or dynamic replacement instructions are not seen.
>>>>>>
>>>>>> We do some tests on the copy of xchg_tail,:
>>>>>> u32 xchg_tail_my(struct qspinlock *lock, u32 tail)
>>>>>> {
>>>>>>         return (u32)xchg_relaxed(&lock->tail,
>>>>>>                                  tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
>>>>>> }
>>>>>> and the asm code is as follows:
>>>>>>
>>>>>> ffff80001015b050 <xchg_tail_my>:
>>>>>> ffff80001015b050:       a9be7bfd        stp     x29, x30, [sp, #-32]!
>>>>>> ffff80001015b054:       910003fd        mov     x29, sp
>>>>>> ffff80001015b058:       a90153f3        stp     x19, x20, [sp, #16]
>>>>>> ffff80001015b05c:       2a0103f3        mov     w19, w1
>>>>>> ffff80001015b060:       aa0003f4        mov     x20, x0
>>>>>> ffff80001015b064:       aa1e03e0        mov     x0, x30
>>>>>> ffff80001015b068:       97fd07ee        bl      ffff80001009d020 <_mcount>
>>>>>> ffff80001015b06c:       53107e61        lsr     w1, w19, #16
>>>>>> ffff80001015b070:       91000a83        add     x3, x20, #0x2
>>>>>> ffff80001015b074:       f9800071        prfm    pstl1strm, [x3]
>>>>>> ffff80001015b078:       485f7c60        ldxrh   w0, [x3]
>>>>>> ffff80001015b07c:       48027c61        stxrh   w2, w1, [x3]
>>>>>> ffff80001015b080:       35ffffc2        cbnz    w2, ffff80001015b078 <xchg_tail_my+0x28>
>>>>>> ffff80001015b084:       53103c00        lsl     w0, w0, #16
>>>>>> ffff80001015b088:       a94153f3        ldp     x19, x20, [sp, #16]
>>>>>> ffff80001015b08c:       a8c27bfd        ldp     x29, x30, [sp], #32
>>>>>> ffff80001015b090:       d65f03c0        ret
>>>>>
>>>>> This should get patched at runtime, but you're saying that's not happening?
>>>>>
>>>>
>>>> My mistake, I didn't check the runtime carefully.
>>>
>>> Good to hear there's not a bug, but if you see a performance benefit from
>>> using the static-key for xchg() then I'd obviously be open to changing it
>>
>> Thanks your reply, if I follow the two methods correctly, static-key will
>> not consume '__nops(3)', others are the same.
>>
>> I will run some tests to check the performance  ;-)
>>
> 
> We compare the two methods on Huawei Kunpeng920 and the throughput per second
> as follows:
> 
> one core  |without delay| 200ns delay|
> --------------------------------------
> static-key| 55294942    | 3937156    |
> --------------------------------------
> runtime   | 54706282    | 3918188    |
> --------------------------------------
> 

Are you happy to pick up this patch since it has some benefits for single core?  ;-)

Thanks,
Shaokun

> If we run this test using 32-cores, the result is almost the same.
> Test code is followed:
> if(delay_o) {
> 	while (get_cycles() <= (time_temp + t_cnt)) {
> 		(void)atomic64_xchg(&wk_in->num, 1);
> 		myndelay(delay_o);
> 		(void)atomic64_xchg(&wk_in->num, 2);
> 		myndelay(delay_o);
> 		w_cnt+=2;
> 	}
> } else {
> 	while (get_cycles() <= (time_temp + t_cnt)){
> 		(void)atomic64_xchg(&wk_in->num, 1);
> 		(void)atomic64_xchg(&wk_in->num, 2);
> 		w_cnt+=2;
> 	}
> }
> 
> Thanks,
> Shaokun
> 
>> Thanks,
>> Shaokun
>>
>>
>>> over as well.
>>>
>>> Thanks,
>>>
>>> Will
>>>
>>> .
>>>


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction
  2020-05-25  9:27               ` Shaokun Zhang
@ 2020-05-26 19:55                 ` Will Deacon
  0 siblings, 0 replies; 10+ messages in thread
From: Will Deacon @ 2020-05-26 19:55 UTC (permalink / raw)
  To: Shaokun Zhang; +Cc: Yuqi Jin, Andrew Murray, linux-arm-kernel, Catalin Marinas

On Mon, May 25, 2020 at 05:27:30PM +0800, Shaokun Zhang wrote:
> On 2020/5/7 15:54, Shaokun Zhang wrote:
> > On 2020/5/6 19:30, Shaokun Zhang wrote:
> >> On 2020/5/6 18:44, Will Deacon wrote:
> >>> Good to hear there's not a bug, but if you see a performance benefit from
> >>> using the static-key for xchg() then I'd obviously be open to changing it
> >>
> >> Thanks your reply, if I follow the two methods correctly, static-key will
> >> not consume '__nops(3)', others are the same.
> >>
> >> I will run some tests to check the performance  ;-)
> >>
> > 
> > We compare the two methods on Huawei Kunpeng920 and the throughput per second
> > as follows:
> > 
> > one core  |without delay| 200ns delay|
> > --------------------------------------
> > static-key| 55294942    | 3937156    |
> > --------------------------------------
> > runtime   | 54706282    | 3918188    |
> > --------------------------------------
> > 
> 
> Are you happy to pick up this patch since it has some benefits for single core?  ;-)

Is it really worth it? I don't think so.

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-05-26 19:55 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-05  9:02 [PATCH] arm64: atomics: Fix the issue on xchg when switch to atomic instruction Shaokun Zhang
2020-05-05  9:15 ` Will Deacon
2020-05-06  7:00   ` Shaokun Zhang
2020-05-06  7:53     ` Will Deacon
2020-05-06 10:39       ` Shaokun Zhang
2020-05-06 10:44         ` Will Deacon
2020-05-06 11:30           ` Shaokun Zhang
2020-05-07  7:54             ` Shaokun Zhang
2020-05-25  9:27               ` Shaokun Zhang
2020-05-26 19:55                 ` Will Deacon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.