* backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e @ 2022-09-30 15:32 Mikulas Patocka 2022-09-30 15:33 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka ` (10 more replies) 0 siblings, 11 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:32 UTC (permalink / raw) To: gregkh; +Cc: stable Hi Here I'm submitting backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e to the stable branches. Mikulas ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka @ 2022-09-30 15:33 ` Mikulas Patocka 2022-09-30 15:33 ` Mikulas Patocka ` (9 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:33 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branches 5.10, 5.15, 5.19 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++ include/asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++ include/linux/buffer_head.h | 2 - include/linux/wait_bit.h | 8 +++---- kernel/sched/wait_bit.c | 2 - 6 files changed, 53 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 13:07:11.000000000 +0200 @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -224,6 +238,13 @@ static __always_inline bool variable_tes ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/instrumented-non-atomic.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h 2022-09-30 13:07:11.000000000 +0200 @@ -135,4 +135,16 @@ static __always_inline bool test_bit(lon return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 13:07:11.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * arch___set_bit - Set a bit in memory @@ -119,4 +120,17 @@ arch_test_bit(unsigned int nr, const vol } #define test_bit arch_test_bit +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 13:07:11.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-09-30 13:07:11.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-09-30 13:07:11.000000000 +0200 @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka 2022-09-30 15:33 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka @ 2022-09-30 15:33 ` Mikulas Patocka 2022-09-30 15:34 ` Mikulas Patocka ` (8 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:33 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 5.4 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops-instrumented.h | 6 ++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 6 files changed, 47 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 15:38:29.000000000 +0200 @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -224,6 +238,13 @@ static __always_inline bool variable_tes ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:38:29.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 15:38:29.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-09-30 15:38:29.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-09-30 15:38:29.000000000 +0200 @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-stable/include/asm-generic/bitops-instrumented.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops-instrumented.h 2022-09-30 15:37:42.000000000 +0200 +++ linux-stable/include/asm-generic/bitops-instrumented.h 2022-09-30 15:40:55.000000000 +0200 @@ -238,6 +238,12 @@ static inline bool test_bit(long nr, con return arch_test_bit(nr, addr); } +static inline bool test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + kasan_check_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #if defined(arch_clear_bit_unlock_is_negative_byte) /** * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka 2022-09-30 15:33 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka 2022-09-30 15:33 ` Mikulas Patocka @ 2022-09-30 15:34 ` Mikulas Patocka 2022-09-30 15:34 ` Mikulas Patocka ` (7 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:34 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.19 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 5 files changed, 41 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 15:46:38.000000000 +0200 @@ -317,6 +317,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -343,6 +357,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:44:48.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 15:44:48.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-09-30 15:44:48.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-09-30 15:44:48.000000000 +0200 @@ -46,7 +46,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka ` (2 preceding siblings ...) 2022-09-30 15:34 ` Mikulas Patocka @ 2022-09-30 15:34 ` Mikulas Patocka 2022-09-30 15:34 ` Mikulas Patocka ` (6 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:34 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.14 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 5 files changed, 41 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 15:55:01.000000000 +0200 @@ -328,6 +328,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -354,6 +368,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:55:01.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 15:55:01.000000000 +0200 @@ -163,7 +163,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-09-30 15:55:01.000000000 +0200 @@ -76,7 +76,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -101,7 +101,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -128,7 +128,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -156,7 +156,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-09-30 15:55:55.000000000 +0200 @@ -49,7 +49,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); return ret; } ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka ` (3 preceding siblings ...) 2022-09-30 15:34 ` Mikulas Patocka @ 2022-09-30 15:34 ` Mikulas Patocka 2022-10-01 7:01 ` Greg KH 2022-09-30 15:35 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka ` (5 subsequent siblings) 10 siblings, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:34 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.9 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait.h | 8 ++++---- kernel/sched/wait.c | 2 +- 5 files changed, 41 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 16:01:38.000000000 +0200 @@ -314,6 +314,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -340,6 +354,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 16:01:38.000000000 +0200 @@ -2,6 +2,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -105,4 +106,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 16:01:38.000000000 +0200 @@ -162,7 +162,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait.h =================================================================== --- linux-stable.orig/include/linux/wait.h 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/include/linux/wait.h 2022-09-30 16:01:38.000000000 +0200 @@ -1066,7 +1066,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -1091,7 +1091,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -1118,7 +1118,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -1146,7 +1146,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait.c =================================================================== --- linux-stable.orig/kernel/sched/wait.c 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/kernel/sched/wait.c 2022-09-30 16:01:58.000000000 +0200 @@ -389,7 +389,7 @@ __wait_on_bit(wait_queue_head_t *wq, str prepare_to_wait(wq, &q->wait, mode); if (test_bit(q->key.bit_nr, q->key.flags)) ret = (*action)(&q->key, mode); - } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); + } while (test_bit_acquire(q->key.bit_nr, q->key.flags) && !ret); finish_wait(wq, &q->wait); return ret; } ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] wait_on_bit: add an acquire memory barrier 2022-09-30 15:34 ` Mikulas Patocka @ 2022-10-01 7:01 ` Greg KH 0 siblings, 0 replies; 48+ messages in thread From: Greg KH @ 2022-10-01 7:01 UTC (permalink / raw) To: Mikulas Patocka; +Cc: stable On Fri, Sep 30, 2022 at 11:34:55AM -0400, Mikulas Patocka wrote: > This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff > for the stable branch 4.9 > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Again, you lost all of the original changelog and authorship/review information here :( And what order are these to be applied in? Please make a patch series for each stable/LTS tree they are to be backported to. Would you want to try to unwind this if you were the reciever of these emails? thanks, greg k-h ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka ` (4 preceding siblings ...) 2022-09-30 15:34 ` Mikulas Patocka @ 2022-09-30 15:35 ` Mikulas Patocka 2022-09-30 15:35 ` Mikulas Patocka ` (4 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:35 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branches 5.19, 5.15, 5.10 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -172,7 +172,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -184,6 +184,13 @@ static inline bool arch_test_bit(unsigne return *addr & mask; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 13:07:14.000000000 +0200 @@ -138,4 +138,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka ` (5 preceding siblings ...) 2022-09-30 15:35 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka @ 2022-09-30 15:35 ` Mikulas Patocka 2022-09-30 15:36 ` Mikulas Patocka ` (3 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:35 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branch 5.4 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -172,7 +172,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -219,6 +219,13 @@ static inline bool arch_test_bit(unsigne return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:39:09.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka ` (6 preceding siblings ...) 2022-09-30 15:35 ` Mikulas Patocka @ 2022-09-30 15:36 ` Mikulas Patocka 2022-09-30 15:36 ` Mikulas Patocka ` (2 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:36 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branch 4.19 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 15:49:53.000000000 +0200 @@ -215,6 +215,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:48:24.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka ` (7 preceding siblings ...) 2022-09-30 15:36 ` Mikulas Patocka @ 2022-09-30 15:36 ` Mikulas Patocka 2022-09-30 15:36 ` Mikulas Patocka 2022-10-01 6:59 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:36 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branch 4.14 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 15:49:53.000000000 +0200 @@ -215,6 +215,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:48:24.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka ` (8 preceding siblings ...) 2022-09-30 15:36 ` Mikulas Patocka @ 2022-09-30 15:36 ` Mikulas Patocka 2022-10-01 7:00 ` Greg KH 2022-10-01 6:59 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 10 siblings, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-09-30 15:36 UTC (permalink / raw) To: gregkh; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branch 4.9 Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -288,6 +288,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -336,6 +336,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -270,6 +270,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 16:03:22.000000000 +0200 @@ -139,4 +139,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-09-30 15:36 ` Mikulas Patocka @ 2022-10-01 7:00 ` Greg KH 0 siblings, 0 replies; 48+ messages in thread From: Greg KH @ 2022-10-01 7:00 UTC (permalink / raw) To: Mikulas Patocka; +Cc: stable On Fri, Sep 30, 2022 at 11:36:52AM -0400, Mikulas Patocka wrote: > This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e > for the stable branch 4.9 > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> What happened to the original changelog information and the original signed-off-by information? Please keep that when backporting changes. Fix that up for all of these when you resend. thanks, greg k-h ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka ` (9 preceding siblings ...) 2022-09-30 15:36 ` Mikulas Patocka @ 2022-10-01 6:59 ` Greg KH 2022-10-03 12:28 ` Mikulas Patocka 10 siblings, 1 reply; 48+ messages in thread From: Greg KH @ 2022-10-01 6:59 UTC (permalink / raw) To: Mikulas Patocka; +Cc: stable On Fri, Sep 30, 2022 at 11:32:30AM -0400, Mikulas Patocka wrote: > Hi > > Here I'm submitting backport of patches > 8238b4579866b7c1bb99883cfe102a43db5506ff and > d6ffe6067a54972564552ea45d320fb98db1ac5e to the stable branches. Thanks, but you provide no information as to why these are needed. What needs them? They are just adding new functions to the tree from what I can tell. thanks, greg k-h ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-01 6:59 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH @ 2022-10-03 12:28 ` Mikulas Patocka 2022-10-03 12:28 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka ` (10 more replies) 0 siblings, 11 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:28 UTC (permalink / raw) To: Greg KH; +Cc: stable On Sat, 1 Oct 2022, Greg KH wrote: > On Fri, Sep 30, 2022 at 11:32:30AM -0400, Mikulas Patocka wrote: > > Hi > > > > Here I'm submitting backport of patches > > 8238b4579866b7c1bb99883cfe102a43db5506ff and > > d6ffe6067a54972564552ea45d320fb98db1ac5e to the stable branches. > > Thanks, but you provide no information as to why these are needed. > > What needs them? They are just adding new functions to the tree from > what I can tell. > > thanks, > > greg k-h There's a race condition in wait_on_bit. wait_on_bit tests a bit using the "test_bit" function, however this function doesn't do any memory barrier, so the memory accesses that follow wait_on_bit may be reordered before it and return invalid data. Linus didn't want to add a memory barrier to wait_on_bit, he instead wanted to introduce a new function test_bit_acquire that performs the "acquire" memory barrier and use it in wait_on_bit. The patch d6ffe6067a54972564552ea45d320fb98db1ac5e fixes an oversight in the patch 8238b4579866b7c1bb99883cfe102a43db5506ff where the function test_bit_acquire was not defined for some architectures and this caused compile failure. The backport of the patch 8238b4579866b7c1bb99883cfe102a43db5506ff should be applied first and the backport of the patch d6ffe6067a54972564552ea45d320fb98db1ac5e afterwards. Mikulas ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-03 12:28 ` Mikulas Patocka @ 2022-10-03 12:28 ` Mikulas Patocka 2022-10-03 12:29 ` Mikulas Patocka ` (9 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:28 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branches 5.10, 5.15, 5.19 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++ include/asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++ include/linux/buffer_head.h | 2 - include/linux/wait_bit.h | 8 +++---- kernel/sched/wait_bit.c | 2 - 6 files changed, 53 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 13:07:11.000000000 +0200 @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -224,6 +238,13 @@ static __always_inline bool variable_tes ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/instrumented-non-atomic.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h 2022-09-30 13:07:11.000000000 +0200 @@ -135,4 +135,16 @@ static __always_inline bool test_bit(lon return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 13:07:11.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * arch___set_bit - Set a bit in memory @@ -119,4 +120,17 @@ arch_test_bit(unsigned int nr, const vol } #define test_bit arch_test_bit +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 13:07:11.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-09-30 13:07:11.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-09-30 13:07:11.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-09-30 13:07:11.000000000 +0200 @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-03 12:28 ` Mikulas Patocka 2022-10-03 12:28 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka @ 2022-10-03 12:29 ` Mikulas Patocka 2022-10-03 12:29 ` Mikulas Patocka ` (8 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:29 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 5.4 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops-instrumented.h | 6 ++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 6 files changed, 47 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 15:38:29.000000000 +0200 @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -224,6 +238,13 @@ static __always_inline bool variable_tes ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:38:29.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 15:38:29.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-09-30 15:38:29.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-09-30 15:38:29.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-09-30 15:38:29.000000000 +0200 @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-stable/include/asm-generic/bitops-instrumented.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops-instrumented.h 2022-09-30 15:37:42.000000000 +0200 +++ linux-stable/include/asm-generic/bitops-instrumented.h 2022-09-30 15:40:55.000000000 +0200 @@ -238,6 +238,12 @@ static inline bool test_bit(long nr, con return arch_test_bit(nr, addr); } +static inline bool test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + kasan_check_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #if defined(arch_clear_bit_unlock_is_negative_byte) /** * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-03 12:28 ` Mikulas Patocka 2022-10-03 12:28 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka 2022-10-03 12:29 ` Mikulas Patocka @ 2022-10-03 12:29 ` Mikulas Patocka 2022-10-03 12:29 ` Mikulas Patocka ` (7 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:29 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.19 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 5 files changed, 41 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 15:46:38.000000000 +0200 @@ -317,6 +317,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -343,6 +357,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:44:48.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 15:44:48.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-09-30 15:44:48.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-09-30 15:44:48.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-09-30 15:44:48.000000000 +0200 @@ -46,7 +46,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-03 12:28 ` Mikulas Patocka ` (2 preceding siblings ...) 2022-10-03 12:29 ` Mikulas Patocka @ 2022-10-03 12:29 ` Mikulas Patocka 2022-10-03 12:30 ` Mikulas Patocka ` (6 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:29 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.14 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 5 files changed, 41 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 15:55:01.000000000 +0200 @@ -328,6 +328,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -354,6 +368,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 15:55:01.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 15:55:01.000000000 +0200 @@ -163,7 +163,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-09-30 15:55:01.000000000 +0200 @@ -76,7 +76,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -101,7 +101,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -128,7 +128,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -156,7 +156,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-09-30 15:55:01.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-09-30 15:55:55.000000000 +0200 @@ -49,7 +49,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); return ret; } ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-03 12:28 ` Mikulas Patocka ` (3 preceding siblings ...) 2022-10-03 12:29 ` Mikulas Patocka @ 2022-10-03 12:30 ` Mikulas Patocka 2022-10-03 12:30 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka ` (5 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:30 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.9 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait.h | 8 ++++---- kernel/sched/wait.c | 2 +- 5 files changed, 41 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-09-30 16:01:38.000000000 +0200 @@ -314,6 +314,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -340,6 +354,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-09-30 16:01:38.000000000 +0200 @@ -2,6 +2,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -105,4 +106,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-09-30 16:01:38.000000000 +0200 @@ -162,7 +162,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait.h =================================================================== --- linux-stable.orig/include/linux/wait.h 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/include/linux/wait.h 2022-09-30 16:01:38.000000000 +0200 @@ -1066,7 +1066,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -1091,7 +1091,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -1118,7 +1118,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -1146,7 +1146,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait.c =================================================================== --- linux-stable.orig/kernel/sched/wait.c 2022-09-30 16:01:38.000000000 +0200 +++ linux-stable/kernel/sched/wait.c 2022-09-30 16:01:58.000000000 +0200 @@ -389,7 +389,7 @@ __wait_on_bit(wait_queue_head_t *wq, str prepare_to_wait(wq, &q->wait, mode); if (test_bit(q->key.bit_nr, q->key.flags)) ret = (*action)(&q->key, mode); - } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); + } while (test_bit_acquire(q->key.bit_nr, q->key.flags) && !ret); finish_wait(wq, &q->wait); return ret; } ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-10-03 12:28 ` Mikulas Patocka ` (4 preceding siblings ...) 2022-10-03 12:30 ` Mikulas Patocka @ 2022-10-03 12:30 ` Mikulas Patocka 2022-10-03 12:31 ` Mikulas Patocka ` (4 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:30 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branches 5.19, 5.15, 5.10 provide arch_test_bit_acquire for architectures that define test_bit Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. We also clean up the code by using the generic test_bit if that is equivalent to the arch-specific version. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -172,7 +172,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 13:07:14.000000000 +0200 @@ -184,6 +184,13 @@ static inline bool arch_test_bit(unsigne return *addr & mask; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 13:07:14.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 13:07:14.000000000 +0200 @@ -138,4 +138,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-10-03 12:28 ` Mikulas Patocka ` (5 preceding siblings ...) 2022-10-03 12:30 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka @ 2022-10-03 12:31 ` Mikulas Patocka 2022-10-03 12:31 ` Mikulas Patocka ` (3 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:31 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branch 5.4 provide arch_test_bit_acquire for architectures that define test_bit Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. We also clean up the code by using the generic test_bit if that is equivalent to the arch-specific version. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -172,7 +172,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 15:39:09.000000000 +0200 @@ -219,6 +219,13 @@ static inline bool arch_test_bit(unsigne return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:39:09.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:39:09.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-10-03 12:28 ` Mikulas Patocka ` (6 preceding siblings ...) 2022-10-03 12:31 ` Mikulas Patocka @ 2022-10-03 12:31 ` Mikulas Patocka 2022-10-03 12:31 ` Mikulas Patocka ` (2 subsequent siblings) 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:31 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branch 4.19 provide arch_test_bit_acquire for architectures that define test_bit Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. We also clean up the code by using the generic test_bit if that is equivalent to the arch-specific version. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 15:49:53.000000000 +0200 @@ -215,6 +215,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:48:24.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-10-03 12:28 ` Mikulas Patocka ` (7 preceding siblings ...) 2022-10-03 12:31 ` Mikulas Patocka @ 2022-10-03 12:31 ` Mikulas Patocka 2022-10-03 12:32 ` Mikulas Patocka 2022-10-05 16:48 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:31 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branch 4.14 provide arch_test_bit_acquire for architectures that define test_bit Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. We also clean up the code by using the generic test_bit if that is equivalent to the arch-specific version. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 15:49:53.000000000 +0200 @@ -215,6 +215,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:48:24.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 15:48:24.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-10-03 12:28 ` Mikulas Patocka ` (8 preceding siblings ...) 2022-10-03 12:31 ` Mikulas Patocka @ 2022-10-03 12:32 ` Mikulas Patocka 2022-10-05 16:48 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 10 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-03 12:32 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch d6ffe6067a54972564552ea45d320fb98db1ac5e for the stable branch 4.9 provide arch_test_bit_acquire for architectures that define test_bit Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. We also clean up the code by using the generic test_bit if that is equivalent to the arch-specific version. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ 6 files changed, 49 insertions(+) Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -288,6 +288,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -336,6 +336,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-09-30 16:03:22.000000000 +0200 @@ -270,6 +270,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-09-30 16:03:22.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-09-30 16:03:22.000000000 +0200 @@ -139,4 +139,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-03 12:28 ` Mikulas Patocka ` (9 preceding siblings ...) 2022-10-03 12:32 ` Mikulas Patocka @ 2022-10-05 16:48 ` Greg KH 2022-10-10 19:08 ` Greg KH 10 siblings, 1 reply; 48+ messages in thread From: Greg KH @ 2022-10-05 16:48 UTC (permalink / raw) To: Mikulas Patocka; +Cc: stable On Mon, Oct 03, 2022 at 08:28:06AM -0400, Mikulas Patocka wrote: > > > On Sat, 1 Oct 2022, Greg KH wrote: > > > On Fri, Sep 30, 2022 at 11:32:30AM -0400, Mikulas Patocka wrote: > > > Hi > > > > > > Here I'm submitting backport of patches > > > 8238b4579866b7c1bb99883cfe102a43db5506ff and > > > d6ffe6067a54972564552ea45d320fb98db1ac5e to the stable branches. > > > > Thanks, but you provide no information as to why these are needed. > > > > What needs them? They are just adding new functions to the tree from > > what I can tell. > > > > thanks, > > > > greg k-h > > There's a race condition in wait_on_bit. wait_on_bit tests a bit using the > "test_bit" function, however this function doesn't do any memory barrier, > so the memory accesses that follow wait_on_bit may be reordered before it > and return invalid data. > > Linus didn't want to add a memory barrier to wait_on_bit, he instead > wanted to introduce a new function test_bit_acquire that performs the > "acquire" memory barrier and use it in wait_on_bit. > > The patch d6ffe6067a54972564552ea45d320fb98db1ac5e fixes an oversight in > the patch 8238b4579866b7c1bb99883cfe102a43db5506ff where the function > test_bit_acquire was not defined for some architectures and this caused > compile failure. > > The backport of the patch 8238b4579866b7c1bb99883cfe102a43db5506ff should > be applied first and the backport of the patch > d6ffe6067a54972564552ea45d320fb98db1ac5e afterwards. All now queued up, thanks. greg k-h ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-05 16:48 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH @ 2022-10-10 19:08 ` Greg KH 2022-10-11 9:48 ` Mikulas Patocka 0 siblings, 1 reply; 48+ messages in thread From: Greg KH @ 2022-10-10 19:08 UTC (permalink / raw) To: Mikulas Patocka; +Cc: stable On Wed, Oct 05, 2022 at 06:48:54PM +0200, Greg KH wrote: > On Mon, Oct 03, 2022 at 08:28:06AM -0400, Mikulas Patocka wrote: > > > > > > On Sat, 1 Oct 2022, Greg KH wrote: > > > > > On Fri, Sep 30, 2022 at 11:32:30AM -0400, Mikulas Patocka wrote: > > > > Hi > > > > > > > > Here I'm submitting backport of patches > > > > 8238b4579866b7c1bb99883cfe102a43db5506ff and > > > > d6ffe6067a54972564552ea45d320fb98db1ac5e to the stable branches. > > > > > > Thanks, but you provide no information as to why these are needed. > > > > > > What needs them? They are just adding new functions to the tree from > > > what I can tell. > > > > > > thanks, > > > > > > greg k-h > > > > There's a race condition in wait_on_bit. wait_on_bit tests a bit using the > > "test_bit" function, however this function doesn't do any memory barrier, > > so the memory accesses that follow wait_on_bit may be reordered before it > > and return invalid data. > > > > Linus didn't want to add a memory barrier to wait_on_bit, he instead > > wanted to introduce a new function test_bit_acquire that performs the > > "acquire" memory barrier and use it in wait_on_bit. > > > > The patch d6ffe6067a54972564552ea45d320fb98db1ac5e fixes an oversight in > > the patch 8238b4579866b7c1bb99883cfe102a43db5506ff where the function > > test_bit_acquire was not defined for some architectures and this caused > > compile failure. > > > > The backport of the patch 8238b4579866b7c1bb99883cfe102a43db5506ff should > > be applied first and the backport of the patch > > d6ffe6067a54972564552ea45d320fb98db1ac5e afterwards. > > All now queued up, thanks. Nope, these cause loads of breakages. See https://lore.kernel.org/r/09eca44e-4d91-a060-d48c-d0aa41ac5045@roeck-us.net for one such example, and I know kbuild sent you other build problems. I'll drop all of these from the stable trees now. Please feel free to resend them when you have the build issues worked out. thanks, greg k-h ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-10 19:08 ` Greg KH @ 2022-10-11 9:48 ` Mikulas Patocka 2022-10-11 9:58 ` Greg KH 0 siblings, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-10-11 9:48 UTC (permalink / raw) To: Greg KH; +Cc: stable On Mon, 10 Oct 2022, Greg KH wrote: > Nope, these cause loads of breakages. See > https://lore.kernel.org/r/09eca44e-4d91-a060-d48c-d0aa41ac5045@roeck-us.net > for one such example, and I know kbuild sent you other build problems. > I'll drop all of these from the stable trees now. Please feel free to > resend them when you have the build issues worked out. > > thanks, > > greg k-h I don't have cross compilers for all the architectures that Linux supports. Is there some way how to have the patch compile-tested before I send it to you? Or - would you accept this patch instead of the upstream patch? It fixes the same bug as the upstream patch, but it's noticeably smaller and it could be applied to the stable kernels 4.19 to 5.19. Mikulas From: Mikulas Patocka <mpatocka@redhat.com> This fixes a bug that is fixed by the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff. This patch differs from the upstream patch because backporting the upstream patch causes many build failures on various architectures. Original commit message: There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/wait_bit.h | 16 ++++++++++++---- kernel/sched/wait_bit.c | 2 ++ 2 files changed, 14 insertions(+), 4 deletions(-) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-10-11 11:23:12.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-10-11 11:24:33.000000000 +0200 @@ -71,8 +71,10 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit(bit, word)) { + smp_rmb(); return 0; + } return out_of_line_wait_on_bit(word, bit, bit_wait, mode); @@ -96,8 +98,10 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit(bit, word)) { + smp_rmb(); return 0; + } return out_of_line_wait_on_bit(word, bit, bit_wait_io, mode); @@ -123,8 +127,10 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit(bit, word)) { + smp_rmb(); return 0; + } return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, mode, timeout); @@ -151,8 +157,10 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit(bit, word)) { + smp_rmb(); return 0; + } return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-10-11 11:23:12.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-10-11 11:25:22.000000000 +0200 @@ -51,6 +51,8 @@ __wait_on_bit(struct wait_queue_head *wq finish_wait(wq_head, &wbq_entry->wq_entry); + smp_rmb(); + return ret; } EXPORT_SYMBOL(__wait_on_bit); ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-11 9:48 ` Mikulas Patocka @ 2022-10-11 9:58 ` Greg KH 2022-10-18 11:36 ` Mikulas Patocka 0 siblings, 1 reply; 48+ messages in thread From: Greg KH @ 2022-10-11 9:58 UTC (permalink / raw) To: Mikulas Patocka; +Cc: stable On Tue, Oct 11, 2022 at 05:48:26AM -0400, Mikulas Patocka wrote: > > > On Mon, 10 Oct 2022, Greg KH wrote: > > > Nope, these cause loads of breakages. See > > https://lore.kernel.org/r/09eca44e-4d91-a060-d48c-d0aa41ac5045@roeck-us.net > > for one such example, and I know kbuild sent you other build problems. > > I'll drop all of these from the stable trees now. Please feel free to > > resend them when you have the build issues worked out. > > > > thanks, > > > > greg k-h > > I don't have cross compilers for all the architectures that Linux > supports. Is there some way how to have the patch compile-tested before I > send it to you? You can download those compilers from kernel.org, they are all available there. > Or - would you accept this patch instead of the upstream patch? It fixes > the same bug as the upstream patch, but it's noticeably smaller and it > could be applied to the stable kernels 4.19 to 5.19. We should stick with what is in Linus's tree so as to not cause new bugs, and to make future backports easier. thanks, greg k-h ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-11 9:58 ` Greg KH @ 2022-10-18 11:36 ` Mikulas Patocka 2022-10-18 11:37 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka ` (7 more replies) 0 siblings, 8 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-18 11:36 UTC (permalink / raw) To: Greg KH; +Cc: stable On Tue, 11 Oct 2022, Greg KH wrote: > On Tue, Oct 11, 2022 at 05:48:26AM -0400, Mikulas Patocka wrote: > > > > > > On Mon, 10 Oct 2022, Greg KH wrote: > > > > > Nope, these cause loads of breakages. See > > > https://lore.kernel.org/r/09eca44e-4d91-a060-d48c-d0aa41ac5045@roeck-us.net > > > for one such example, and I know kbuild sent you other build problems. > > > I'll drop all of these from the stable trees now. Please feel free to > > > resend them when you have the build issues worked out. > > > > > > thanks, > > > > > > greg k-h > > > > I don't have cross compilers for all the architectures that Linux > > supports. Is there some way how to have the patch compile-tested before I > > send it to you? > > You can download those compilers from kernel.org, they are all available > there. OK. I downloaded cross compilers from https://mirrors.edge.kernel.org/pub/tools/crosstool/ and compile-tested the patches with all possible architectures. Here I'm sending new versions. Mikulas ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-18 11:36 ` Mikulas Patocka @ 2022-10-18 11:37 ` Mikulas Patocka 2022-10-18 11:37 ` Mikulas Patocka ` (6 subsequent siblings) 7 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-18 11:37 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 5.19 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 ++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++ arch/ia64/include/asm/bitops.h | 7 ++++++ arch/m68k/include/asm/bitops.h | 6 +++++ arch/s390/include/asm/bitops.h | 7 ++++++ arch/sh/include/asm/bitops-op32.h | 7 ++++++ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++ include/asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++ include/linux/buffer_head.h | 2 - include/linux/wait_bit.h | 8 +++---- kernel/sched/wait_bit.c | 2 - 12 files changed, 102 insertions(+), 6 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -224,6 +238,13 @@ static __always_inline bool variable_tes ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/instrumented-non-atomic.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h 2022-10-17 19:02:15.000000000 +0200 @@ -135,4 +135,16 @@ static __always_inline bool test_bit(lon return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-10-17 19:02:15.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * arch___set_bit - Set a bit in memory @@ -119,4 +120,17 @@ arch_test_bit(unsigned int nr, const vol } #define test_bit arch_test_bit +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-10-17 19:02:15.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-10-17 19:02:15.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-10-17 19:02:15.000000000 +0200 @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 @@ -172,7 +172,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-10-17 19:02:15.000000000 +0200 @@ -184,6 +184,13 @@ static inline bool arch_test_bit(unsigne return *addr & mask; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-10-17 19:02:15.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-10-17 19:02:15.000000000 +0200 @@ -138,4 +138,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-18 11:36 ` Mikulas Patocka 2022-10-18 11:37 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka @ 2022-10-18 11:37 ` Mikulas Patocka 2022-10-18 11:38 ` Mikulas Patocka ` (5 subsequent siblings) 7 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-18 11:37 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 5.15 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 ++++++ arch/h8300/include/asm/bitops.h | 3 +- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++ arch/ia64/include/asm/bitops.h | 7 ++++++ arch/m68k/include/asm/bitops.h | 6 +++++ arch/s390/include/asm/bitops.h | 7 ++++++ arch/sh/include/asm/bitops-op32.h | 7 ++++++ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++ include/asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++ include/linux/buffer_head.h | 2 - include/linux/wait_bit.h | 8 +++---- kernel/sched/wait_bit.c | 2 - 13 files changed, 104 insertions(+), 7 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -224,6 +238,13 @@ static __always_inline bool variable_tes ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/instrumented-non-atomic.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h 2022-10-18 12:42:09.000000000 +0200 @@ -135,4 +135,16 @@ static inline bool test_bit(long nr, con return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-10-18 12:42:09.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * arch___set_bit - Set a bit in memory @@ -119,4 +120,17 @@ arch_test_bit(unsigned int nr, const vol } #define test_bit arch_test_bit +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-10-18 12:42:09.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-10-18 12:42:09.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-10-18 12:42:09.000000000 +0200 @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 @@ -172,7 +172,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 @@ -184,6 +184,13 @@ static inline bool arch_test_bit(unsigne return *addr & mask; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-10-18 12:42:09.000000000 +0200 @@ -138,4 +138,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ Index: linux-stable/arch/h8300/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/h8300/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 +++ linux-stable/arch/h8300/include/asm/bitops.h 2022-10-18 12:42:09.000000000 +0200 @@ -87,7 +87,8 @@ static inline int test_bit(int nr, const return ret; } -#define __test_bit(nr, addr) test_bit(nr, addr) +#define __test_bit(nr, addr) test_bit(nr, addr) +#define test_bit_acquire(nr, addr) test_bit(nr, addr) #define H8300_GEN_TEST_BITOP(FNNAME, OP) \ static inline int FNNAME(int nr, void *addr) \ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-18 11:36 ` Mikulas Patocka 2022-10-18 11:37 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka 2022-10-18 11:37 ` Mikulas Patocka @ 2022-10-18 11:38 ` Mikulas Patocka 2022-10-18 11:38 ` Mikulas Patocka ` (4 subsequent siblings) 7 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-18 11:38 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 5.10 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 ++++++ arch/arc/include/asm/bitops.h | 7 ++++++ arch/h8300/include/asm/bitops.h | 3 +- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++ arch/ia64/include/asm/bitops.h | 7 ++++++ arch/m68k/include/asm/bitops.h | 6 +++++ arch/s390/include/asm/bitops.h | 7 ++++++ arch/sh/include/asm/bitops-op32.h | 7 ++++++ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++ include/asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++ include/linux/buffer_head.h | 2 - include/linux/wait_bit.h | 8 +++---- kernel/sched/wait_bit.c | 2 - 14 files changed, 111 insertions(+), 7 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -224,6 +238,13 @@ static __always_inline bool variable_tes ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/instrumented-non-atomic.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/instrumented-non-atomic.h 2022-10-17 19:38:32.000000000 +0200 @@ -135,4 +135,16 @@ static inline bool test_bit(long nr, con return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-10-17 19:38:32.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-10-17 19:38:32.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-10-17 19:38:32.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-10-17 19:38:32.000000000 +0200 @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 @@ -172,7 +172,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 @@ -219,6 +219,13 @@ static inline bool arch_test_bit(unsigne return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-10-17 19:38:32.000000000 +0200 @@ -138,4 +138,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ Index: linux-stable/arch/h8300/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/h8300/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 +++ linux-stable/arch/h8300/include/asm/bitops.h 2022-10-17 19:38:32.000000000 +0200 @@ -83,7 +83,8 @@ static inline int test_bit(int nr, const return ret; } -#define __test_bit(nr, addr) test_bit(nr, addr) +#define __test_bit(nr, addr) test_bit(nr, addr) +#define test_bit_acquire(nr, addr) test_bit(nr, addr) #define H8300_GEN_TEST_BITOP(FNNAME, OP) \ static inline int FNNAME(int nr, void *addr) \ Index: linux-stable/arch/arc/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/arc/include/asm/bitops.h 2022-10-17 19:35:36.000000000 +0200 +++ linux-stable/arch/arc/include/asm/bitops.h 2022-10-17 19:41:07.000000000 +0200 @@ -197,6 +197,13 @@ test_bit(unsigned int nr, const volatile return ((mask & *addr) != 0); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #ifdef CONFIG_ISA_ARCOMPACT /* ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-18 11:36 ` Mikulas Patocka ` (2 preceding siblings ...) 2022-10-18 11:38 ` Mikulas Patocka @ 2022-10-18 11:38 ` Mikulas Patocka 2022-10-18 11:39 ` Mikulas Patocka ` (3 subsequent siblings) 7 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-18 11:38 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 5.4 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/arc/include/asm/bitops.h | 7 +++++++ arch/h8300/include/asm/bitops.h | 3 ++- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops-instrumented.h | 6 ++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 14 files changed, 105 insertions(+), 7 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -224,6 +238,13 @@ static __always_inline bool variable_tes ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-10-17 19:50:18.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-10-17 19:50:18.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-10-17 19:50:18.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-10-17 19:50:18.000000000 +0200 @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-stable/include/asm-generic/bitops-instrumented.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops-instrumented.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/include/asm-generic/bitops-instrumented.h 2022-10-17 19:50:18.000000000 +0200 @@ -238,6 +238,12 @@ static inline bool test_bit(long nr, con return arch_test_bit(nr, addr); } +static inline bool test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + kasan_check_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #if defined(arch_clear_bit_unlock_is_negative_byte) /** * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 @@ -172,7 +172,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 @@ -219,6 +219,13 @@ static inline bool arch_test_bit(unsigne return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-10-17 19:50:18.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ Index: linux-stable/arch/arc/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/arc/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 +++ linux-stable/arch/arc/include/asm/bitops.h 2022-10-17 19:50:18.000000000 +0200 @@ -251,6 +251,13 @@ test_bit(unsigned int nr, const volatile return ((mask & *addr) != 0); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #ifdef CONFIG_ISA_ARCOMPACT /* Index: linux-stable/arch/h8300/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/h8300/include/asm/bitops.h 2022-10-17 19:46:50.000000000 +0200 +++ linux-stable/arch/h8300/include/asm/bitops.h 2022-10-17 19:54:41.000000000 +0200 @@ -83,7 +83,8 @@ static inline int test_bit(int nr, const return ret; } -#define __test_bit(nr, addr) test_bit(nr, addr) +#define __test_bit(nr, addr) test_bit(nr, addr) +#define test_bit_acquire(nr, addr) test_bit(nr, addr) #define H8300_GEN_TEST_BITOP(FNNAME, OP) \ static inline int FNNAME(int nr, void *addr) \ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-18 11:36 ` Mikulas Patocka ` (3 preceding siblings ...) 2022-10-18 11:38 ` Mikulas Patocka @ 2022-10-18 11:39 ` Mikulas Patocka 2022-10-18 11:39 ` Mikulas Patocka ` (2 subsequent siblings) 7 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-18 11:39 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.19 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/arc/include/asm/bitops.h | 7 +++++++ arch/h8300/include/asm/bitops.h | 3 ++- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 13 files changed, 99 insertions(+), 7 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 @@ -317,6 +317,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -343,6 +357,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-10-17 20:04:40.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-10-17 20:04:40.000000000 +0200 @@ -166,7 +166,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-10-17 20:04:40.000000000 +0200 @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-10-17 20:04:40.000000000 +0200 @@ -46,7 +46,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 @@ -215,6 +215,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-10-17 20:04:40.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ Index: linux-stable/arch/arc/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/arc/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/arc/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 @@ -254,6 +254,13 @@ test_bit(unsigned int nr, const volatile return ((mask & *addr) != 0); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #ifdef CONFIG_ISA_ARCOMPACT /* Index: linux-stable/arch/h8300/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/h8300/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 +++ linux-stable/arch/h8300/include/asm/bitops.h 2022-10-17 20:04:40.000000000 +0200 @@ -89,7 +89,8 @@ static inline int test_bit(int nr, const return ret; } -#define __test_bit(nr, addr) test_bit(nr, addr) +#define __test_bit(nr, addr) test_bit(nr, addr) +#define test_bit_acquire(nr, addr) test_bit(nr, addr) #define H8300_GEN_TEST_BITOP(FNNAME, OP) \ static inline int FNNAME(int nr, void *addr) \ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-18 11:36 ` Mikulas Patocka ` (4 preceding siblings ...) 2022-10-18 11:39 ` Mikulas Patocka @ 2022-10-18 11:39 ` Mikulas Patocka 2022-10-18 11:39 ` Mikulas Patocka 2022-10-26 17:01 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 7 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-18 11:39 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.14 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/arc/include/asm/bitops.h | 7 +++++++ arch/frv/include/asm/bitops.h | 7 +++++++ arch/h8300/include/asm/bitops.h | 3 ++- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/mn10300/include/asm/bitops.h | 7 +++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 15 files changed, 113 insertions(+), 7 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -328,6 +328,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -354,6 +368,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-10-17 20:39:55.000000000 +0200 @@ -3,6 +3,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -106,4 +107,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-10-17 20:39:55.000000000 +0200 @@ -163,7 +163,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait_bit.h =================================================================== --- linux-stable.orig/include/linux/wait_bit.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/include/linux/wait_bit.h 2022-10-17 20:39:55.000000000 +0200 @@ -76,7 +76,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -101,7 +101,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -128,7 +128,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -156,7 +156,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait_bit.c =================================================================== --- linux-stable.orig/kernel/sched/wait_bit.c 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/kernel/sched/wait_bit.c 2022-10-17 20:39:55.000000000 +0200 @@ -49,7 +49,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); return ret; } Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -289,6 +289,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -337,6 +337,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -215,6 +215,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-10-17 20:39:55.000000000 +0200 @@ -140,4 +140,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ Index: linux-stable/arch/arc/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/arc/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/arc/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -254,6 +254,13 @@ test_bit(unsigned int nr, const volatile return ((mask & *addr) != 0); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #ifdef CONFIG_ISA_ARCOMPACT /* Index: linux-stable/arch/h8300/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/h8300/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/h8300/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -89,7 +89,8 @@ static inline int test_bit(int nr, const return ret; } -#define __test_bit(nr, addr) test_bit(nr, addr) +#define __test_bit(nr, addr) test_bit(nr, addr) +#define test_bit_acquire(nr, addr) test_bit(nr, addr) #define H8300_GEN_TEST_BITOP(FNNAME, OP) \ static inline int FNNAME(int nr, void *addr) \ Index: linux-stable/arch/frv/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/frv/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 +++ linux-stable/arch/frv/include/asm/bitops.h 2022-10-17 20:39:55.000000000 +0200 @@ -156,6 +156,13 @@ static inline int __test_bit(unsigned lo __constant_test_bit((nr),(addr)) : \ __test_bit((nr),(addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #include <asm-generic/bitops/find.h> /** Index: linux-stable/arch/mn10300/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/mn10300/include/asm/bitops.h 2022-10-17 20:39:46.000000000 +0200 +++ linux-stable/arch/mn10300/include/asm/bitops.h 2022-10-17 20:30:50.000000000 +0200 @@ -73,6 +73,13 @@ static inline int test_bit(unsigned long return 1UL & (((const volatile unsigned int *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * change bit */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier 2022-10-18 11:36 ` Mikulas Patocka ` (5 preceding siblings ...) 2022-10-18 11:39 ` Mikulas Patocka @ 2022-10-18 11:39 ` Mikulas Patocka 2022-10-26 17:01 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 7 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-18 11:39 UTC (permalink / raw) To: Greg KH; +Cc: stable This is backport of the upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff for the stable branch 4.9 wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Acked-by: Will Deacon <will@kernel.org> Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/arc/include/asm/bitops.h | 7 +++++++ arch/frv/include/asm/bitops.h | 7 +++++++ arch/h8300/include/asm/bitops.h | 3 ++- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 6 ++++++ arch/mn10300/include/asm/bitops.h | 7 +++++++ arch/s390/include/asm/bitops.h | 7 +++++++ arch/sh/include/asm/bitops-op32.h | 7 +++++++ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/non-atomic.h | 14 ++++++++++++++ include/linux/buffer_head.h | 2 +- include/linux/wait.h | 8 ++++---- kernel/sched/wait.c | 2 +- 15 files changed, 113 insertions(+), 7 deletions(-) Index: linux-stable/arch/x86/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/x86/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/x86/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -314,6 +314,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -340,6 +354,13 @@ static bool test_bit(int nr, const volat ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-stable/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-stable.orig/include/asm-generic/bitops/non-atomic.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/include/asm-generic/bitops/non-atomic.h 2022-10-17 20:43:20.000000000 +0200 @@ -2,6 +2,7 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/barrier.h> /** * __set_bit - Set a bit in memory @@ -105,4 +106,17 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * arch_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} +#define test_bit_acquire arch_test_bit_acquire + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ Index: linux-stable/include/linux/buffer_head.h =================================================================== --- linux-stable.orig/include/linux/buffer_head.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/include/linux/buffer_head.h 2022-10-17 20:43:20.000000000 +0200 @@ -162,7 +162,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-stable/include/linux/wait.h =================================================================== --- linux-stable.orig/include/linux/wait.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/include/linux/wait.h 2022-10-17 20:43:20.000000000 +0200 @@ -1066,7 +1066,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -1091,7 +1091,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -1118,7 +1118,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -1146,7 +1146,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-stable/kernel/sched/wait.c =================================================================== --- linux-stable.orig/kernel/sched/wait.c 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/kernel/sched/wait.c 2022-10-17 20:43:20.000000000 +0200 @@ -389,7 +389,7 @@ __wait_on_bit(wait_queue_head_t *wq, str prepare_to_wait(wq, &q->wait, mode); if (test_bit(q->key.bit_nr, q->key.flags)) ret = (*action)(&q->key, mode); - } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); + } while (test_bit_acquire(q->key.bit_nr, q->key.flags) && !ret); finish_wait(wq, &q->wait); return ret; } Index: linux-stable/arch/alpha/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/alpha/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/alpha/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -288,6 +288,13 @@ test_bit(int nr, const volatile void * a return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-stable/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/hexagon/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/hexagon/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -186,7 +186,22 @@ static inline int __test_bit(int nr, con return retval; } +static inline int __test_bit_acquire(int nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + #define test_bit(nr, addr) __test_bit(nr, addr) +#define test_bit_acquire(nr, addr) __test_bit_acquire(nr, addr) /* * ffz - find first zero in word. Index: linux-stable/arch/ia64/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/ia64/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/ia64/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -336,6 +336,13 @@ test_bit (int nr, const volatile void *a return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-stable/arch/m68k/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/m68k/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/m68k/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -153,6 +153,12 @@ static inline int test_bit(int nr, const return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-stable/arch/s390/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/s390/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/s390/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -270,6 +270,13 @@ static inline int test_bit(unsigned long return (*addr >> (nr & 7)) & 1; } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { Index: linux-stable/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-stable.orig/arch/sh/include/asm/bitops-op32.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/sh/include/asm/bitops-op32.h 2022-10-17 20:43:20.000000000 +0200 @@ -139,4 +139,11 @@ static inline int test_bit(int nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #endif /* __ASM_SH_BITOPS_OP32_H */ Index: linux-stable/arch/arc/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/arc/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/arc/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -254,6 +254,13 @@ test_bit(unsigned int nr, const volatile return ((mask & *addr) != 0); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #ifdef CONFIG_ISA_ARCOMPACT /* Index: linux-stable/arch/h8300/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/h8300/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/h8300/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -88,7 +88,8 @@ static inline int test_bit(int nr, const return ret; } -#define __test_bit(nr, addr) test_bit(nr, addr) +#define __test_bit(nr, addr) test_bit(nr, addr) +#define test_bit_acquire(nr, addr) test_bit(nr, addr) #define H8300_GEN_TEST_BITOP(FNNAME, OP) \ static inline int FNNAME(int nr, void *addr) \ Index: linux-stable/arch/frv/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/frv/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/frv/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -156,6 +156,13 @@ static inline int __test_bit(unsigned lo __constant_test_bit((nr),(addr)) : \ __test_bit((nr),(addr))) +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + #include <asm-generic/bitops/find.h> /** Index: linux-stable/arch/mn10300/include/asm/bitops.h =================================================================== --- linux-stable.orig/arch/mn10300/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 +++ linux-stable/arch/mn10300/include/asm/bitops.h 2022-10-17 20:43:20.000000000 +0200 @@ -73,6 +73,13 @@ static inline int test_bit(unsigned long return 1UL & (((const volatile unsigned int *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * change bit */ ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-18 11:36 ` Mikulas Patocka ` (6 preceding siblings ...) 2022-10-18 11:39 ` Mikulas Patocka @ 2022-10-26 17:01 ` Greg KH 2022-10-27 11:45 ` Mikulas Patocka 7 siblings, 1 reply; 48+ messages in thread From: Greg KH @ 2022-10-26 17:01 UTC (permalink / raw) To: Mikulas Patocka; +Cc: stable On Tue, Oct 18, 2022 at 07:36:22AM -0400, Mikulas Patocka wrote: > > > On Tue, 11 Oct 2022, Greg KH wrote: > > > On Tue, Oct 11, 2022 at 05:48:26AM -0400, Mikulas Patocka wrote: > > > > > > > > > On Mon, 10 Oct 2022, Greg KH wrote: > > > > > > > Nope, these cause loads of breakages. See > > > > https://lore.kernel.org/r/09eca44e-4d91-a060-d48c-d0aa41ac5045@roeck-us.net > > > > for one such example, and I know kbuild sent you other build problems. > > > > I'll drop all of these from the stable trees now. Please feel free to > > > > resend them when you have the build issues worked out. > > > > > > > > thanks, > > > > > > > > greg k-h > > > > > > I don't have cross compilers for all the architectures that Linux > > > supports. Is there some way how to have the patch compile-tested before I > > > send it to you? > > > > You can download those compilers from kernel.org, they are all available > > there. > > OK. I downloaded cross compilers from > https://mirrors.edge.kernel.org/pub/tools/crosstool/ and compile-tested > the patches with all possible architectures. > > Here I'm sending new versions. But don't you need 2 patches, not just 1, to be applied? Please resend a set of series, one series per stable kernel branch, to make it more obvious what to do. Your thread here is very confusing. See the stable mailing list archives for lots of examples of how to do this properly, here are 2 good examples: https://lore.kernel.org/r/20221019125303.2845522-1-conor.dooley@microchip.com https://lore.kernel.org/r/20221019125209.2844943-1-conor.dooley@microchip.com thanks, greg k-h ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-26 17:01 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH @ 2022-10-27 11:45 ` Mikulas Patocka 2022-10-27 11:53 ` Greg KH 0 siblings, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-10-27 11:45 UTC (permalink / raw) To: Greg KH; +Cc: stable On Wed, 26 Oct 2022, Greg KH wrote: > On Tue, Oct 18, 2022 at 07:36:22AM -0400, Mikulas Patocka wrote: > > > > > > On Tue, 11 Oct 2022, Greg KH wrote: > > > > > On Tue, Oct 11, 2022 at 05:48:26AM -0400, Mikulas Patocka wrote: > > > > > > > > > > > > On Mon, 10 Oct 2022, Greg KH wrote: > > > > > > > > > Nope, these cause loads of breakages. See > > > > > https://lore.kernel.org/r/09eca44e-4d91-a060-d48c-d0aa41ac5045@roeck-us.net > > > > > for one such example, and I know kbuild sent you other build problems. > > > > > I'll drop all of these from the stable trees now. Please feel free to > > > > > resend them when you have the build issues worked out. > > > > > > > > > > thanks, > > > > > > > > > > greg k-h > > > > > > > > I don't have cross compilers for all the architectures that Linux > > > > supports. Is there some way how to have the patch compile-tested before I > > > > send it to you? > > > > > > You can download those compilers from kernel.org, they are all available > > > there. > > > > OK. I downloaded cross compilers from > > https://mirrors.edge.kernel.org/pub/tools/crosstool/ and compile-tested > > the patches with all possible architectures. > > > > Here I'm sending new versions. > > But don't you need 2 patches, not just 1, to be applied? Just one patch is sufficient. The upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff fixes a bug and the patch d6ffe6067a54972564552ea45d320fb98db1ac5e fixes compile failures triggered by 8238b4579866b7c1bb99883cfe102a43db5506ff on some architectures. For simplicity of making and testing the stable branch patches I folded these changes into just one patch - that fixes the bug and fixes compile failures as well. > Please resend a set of series, one series per stable kernel branch, to > make it more obvious what to do. Your thread here is very confusing. I'll resend it, but except for the subject line I don't know what have I done wrong. Mikulas > See the stable mailing list archives for lots of examples of how to do > this properly, here are 2 good examples: > https://lore.kernel.org/r/20221019125303.2845522-1-conor.dooley@microchip.com > https://lore.kernel.org/r/20221019125209.2844943-1-conor.dooley@microchip.com > > thanks, > > greg k-h > ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-27 11:45 ` Mikulas Patocka @ 2022-10-27 11:53 ` Greg KH 2022-10-27 12:48 ` Mikulas Patocka 0 siblings, 1 reply; 48+ messages in thread From: Greg KH @ 2022-10-27 11:53 UTC (permalink / raw) To: Mikulas Patocka; +Cc: stable On Thu, Oct 27, 2022 at 07:45:52AM -0400, Mikulas Patocka wrote: > > > On Wed, 26 Oct 2022, Greg KH wrote: > > > On Tue, Oct 18, 2022 at 07:36:22AM -0400, Mikulas Patocka wrote: > > > > > > > > > On Tue, 11 Oct 2022, Greg KH wrote: > > > > > > > On Tue, Oct 11, 2022 at 05:48:26AM -0400, Mikulas Patocka wrote: > > > > > > > > > > > > > > > On Mon, 10 Oct 2022, Greg KH wrote: > > > > > > > > > > > Nope, these cause loads of breakages. See > > > > > > https://lore.kernel.org/r/09eca44e-4d91-a060-d48c-d0aa41ac5045@roeck-us.net > > > > > > for one such example, and I know kbuild sent you other build problems. > > > > > > I'll drop all of these from the stable trees now. Please feel free to > > > > > > resend them when you have the build issues worked out. > > > > > > > > > > > > thanks, > > > > > > > > > > > > greg k-h > > > > > > > > > > I don't have cross compilers for all the architectures that Linux > > > > > supports. Is there some way how to have the patch compile-tested before I > > > > > send it to you? > > > > > > > > You can download those compilers from kernel.org, they are all available > > > > there. > > > > > > OK. I downloaded cross compilers from > > > https://mirrors.edge.kernel.org/pub/tools/crosstool/ and compile-tested > > > the patches with all possible architectures. > > > > > > Here I'm sending new versions. > > > > But don't you need 2 patches, not just 1, to be applied? > > Just one patch is sufficient. > > The upstream patch 8238b4579866b7c1bb99883cfe102a43db5506ff fixes a bug > and the patch d6ffe6067a54972564552ea45d320fb98db1ac5e fixes compile > failures triggered by 8238b4579866b7c1bb99883cfe102a43db5506ff on some > architectures. > > For simplicity of making and testing the stable branch patches I folded > these changes into just one patch - that fixes the bug and fixes compile > failures as well. No, please do not do that. We want both commits at once, not a "fixed up" change, right? Otherwise our tools will want to apply the second one as it is insisting that a fix is still needed. > > Please resend a set of series, one series per stable kernel branch, to > > make it more obvious what to do. Your thread here is very confusing. > > I'll resend it, but except for the subject line I don't know what have I > done wrong. Subject line is everything :) As is the text in the body, I would have had to remove that from your last one. See the examples on the list for how to make this easy for us to apply. thanks, greg k-h ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e 2022-10-27 11:53 ` Greg KH @ 2022-10-27 12:48 ` Mikulas Patocka 0 siblings, 0 replies; 48+ messages in thread From: Mikulas Patocka @ 2022-10-27 12:48 UTC (permalink / raw) To: Greg KH; +Cc: stable On Thu, 27 Oct 2022, Greg KH wrote: > > > For simplicity of making and testing the stable branch patches I folded > > these changes into just one patch - that fixes the bug and fixes compile > > failures as well. > > No, please do not do that. We want both commits at once, not a "fixed > up" change, right? Otherwise our tools will want to apply the second > one as it is insisting that a fix is still needed. OK - so I split the patches in two and resent them. Mikulas ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] wait_on_bit: add an acquire memory barrier @ 2022-08-22 9:38 Mikulas Patocka 2022-08-22 17:08 ` Linus Torvalds 0 siblings, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-08-22 9:38 UTC (permalink / raw) To: Linus Torvalds Cc: Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, linux-kernel, linux-arch Hi I'd like to ask what do you think about this patch? Do you want to commit it - or do you think that the barrier should be added to the callers of wait_on_bit? Mikulas From: Mikulas Patocka <mpatocka@redhat.com> There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by adding an acquire memory barrier to wait_on_bit, wait_on_bit_io, wait_on_bit_timeout and wait_on_bit_action. The code that uses these functions should clear the bit using the function clear_bit_unlock. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org --- include/linux/wait_bit.h | 16 ++++++++++++---- kernel/sched/wait_bit.c | 2 ++ 2 files changed, 14 insertions(+), 4 deletions(-) Index: linux-2.6/include/linux/wait_bit.h =================================================================== --- linux-2.6.orig/include/linux/wait_bit.h 2022-08-20 14:33:44.000000000 +0200 +++ linux-2.6/include/linux/wait_bit.h 2022-08-20 15:41:43.000000000 +0200 @@ -71,8 +71,10 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit(bit, word)) { + smp_acquire__after_ctrl_dep(); /* should pair with clear_bit_unlock */ return 0; + } return out_of_line_wait_on_bit(word, bit, bit_wait, mode); @@ -96,8 +98,10 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit(bit, word)) { + smp_acquire__after_ctrl_dep(); /* should pair with clear_bit_unlock */ return 0; + } return out_of_line_wait_on_bit(word, bit, bit_wait_io, mode); @@ -123,8 +127,10 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit(bit, word)) { + smp_acquire__after_ctrl_dep(); /* should pair with clear_bit_unlock */ return 0; + } return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, mode, timeout); @@ -151,8 +157,10 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit(bit, word)) { + smp_acquire__after_ctrl_dep(); /* should pair with clear_bit_unlock */ return 0; + } return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-2.6/kernel/sched/wait_bit.c =================================================================== --- linux-2.6.orig/kernel/sched/wait_bit.c 2022-08-20 14:33:44.000000000 +0200 +++ linux-2.6/kernel/sched/wait_bit.c 2022-08-20 15:41:39.000000000 +0200 @@ -49,6 +49,8 @@ __wait_on_bit(struct wait_queue_head *wq ret = (*action)(&wbq_entry->key, mode); } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + smp_acquire__after_ctrl_dep(); /* should pair with clear_bit_unlock */ + finish_wait(wq_head, &wbq_entry->wq_entry); return ret; ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] wait_on_bit: add an acquire memory barrier 2022-08-22 9:38 [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka @ 2022-08-22 17:08 ` Linus Torvalds 2022-08-22 17:39 ` Linus Torvalds 0 siblings, 1 reply; 48+ messages in thread From: Linus Torvalds @ 2022-08-22 17:08 UTC (permalink / raw) To: Mikulas Patocka Cc: Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, linux-kernel, linux-arch On Mon, Aug 22, 2022 at 2:39 AM Mikulas Patocka <mpatocka@redhat.com> wrote: > > I'd like to ask what do you think about this patch? I really don't like it. It adds a pointless read barrier only because you didn't want to do it properly. On x86, it doesn't matter, since rmb is a no-op and only a scheduling barrier (and not noticeable in this case anyway). On other architectures, it might. But on all architectures it's just ugly. I suggested in an earlier thread that you just do it right with an explicit smp_load_acquire() and a manual bit test. So why don't we just create a "test_bit_acquire()" and be done with it? We literally created clear_bit_unlock() for the opposite reason, and your comments about the new barrier hack even point to it. Why is "clear_bit_unlock()" worthy of a real helper, but "test_bit_acquire()" is not and people who want it have to use this horrendous hack? Please stop adding random barriers already. Just do it right. I've said this before, why do you then keep doing this and asking for comments? My reply will remain the same: JUST DO IT RIGHT. Linus ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] wait_on_bit: add an acquire memory barrier 2022-08-22 17:08 ` Linus Torvalds @ 2022-08-22 17:39 ` Linus Torvalds 2022-08-25 21:03 ` Mikulas Patocka 0 siblings, 1 reply; 48+ messages in thread From: Linus Torvalds @ 2022-08-22 17:39 UTC (permalink / raw) To: Mikulas Patocka Cc: Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, linux-kernel, linux-arch [-- Attachment #1: Type: text/plain, Size: 1471 bytes --] On Mon, Aug 22, 2022 at 10:08 AM Linus Torvalds <torvalds@linux-foundation.org> wrote: > > So why don't we just create a "test_bit_acquire()" and be done with > it? We literally created clear_bit_unlock() for the opposite reason, > and your comments about the new barrier hack even point to it. Here's a patch that is (a) almost entirely untested (I checked that one single case builds and seems to generate the expected code) (b) needs some more loving but seems to superficially work. At a minimum this needs to be split into two (so the bitop and the wait_on_bit parts split up), and that whole placement of <asm/barrier.h> and generic_bit_test_acquire() need at least some thinking about, but on the whole it seems reasonable. For example, it would make more sense to have this in <asm-generic/bitops/lock.h>, but not all architectures include that, and some do their own version. I didn't want to mess with architecture-specific headers, so this illogically just uses generic-non-atomic.h. Maybe just put it in <linux/bitops.h> directly? So I'm not at all claiming that this is a great patch. It definitely needs more work, and a lot more testing. But I think this is at least the right _direction_ to take here. And yes, I think it also would have been better if "clear_bit_unlock()" would have been called "clear_bit_release()", and we'd have more consistent naming with our ordered atomics. But it's probably not worth changing. Linus [-- Attachment #2: patch.diff --] [-- Type: text/x-patch, Size: 4197 bytes --] include/asm-generic/bitops/generic-non-atomic.h | 9 +++++++++ include/asm-generic/bitops/non-atomic.h | 1 + include/linux/bitops.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h index 3d5ebd24652b..f56a252db9e8 100644 --- a/include/asm-generic/bitops/generic-non-atomic.h +++ b/include/asm-generic/bitops/generic-non-atomic.h @@ -4,6 +4,7 @@ #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #include <linux/bits.h> +#include <asm/barrier.h> #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly @@ -158,4 +159,12 @@ const_test_bit(unsigned long nr, const volatile unsigned long *addr) return !!(val & mask); } +static __always_inline bool +generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return smp_load_acquire(p) & mask; +} + #endif /* __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H */ diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index 5c37ced343ae..71f8d54a5195 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -13,6 +13,7 @@ #define arch___test_and_change_bit generic___test_and_change_bit #define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include <asm-generic/bitops/non-instrumented-non-atomic.h> diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf9bf65039f2..22adf74d5c25 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -59,7 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w); #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) - +#define test_bit_acquire(nr, addr) generic_test_bit_acquire(nr, addr) /* * Include this here because some architectures need generic_ffs/fls in * scope diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7dec36aecbd9..7725b7579b78 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index d4788f810b55..0b1cd985dc27 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_ prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); ^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [PATCH] wait_on_bit: add an acquire memory barrier 2022-08-22 17:39 ` Linus Torvalds @ 2022-08-25 21:03 ` Mikulas Patocka 2022-08-25 21:54 ` Linus Torvalds 0 siblings, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-08-25 21:03 UTC (permalink / raw) To: Linus Torvalds Cc: Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, linux-kernel, linux-arch On Mon, 22 Aug 2022, Linus Torvalds wrote: > On Mon, Aug 22, 2022 at 10:08 AM Linus Torvalds > <torvalds@linux-foundation.org> wrote: > > > > So why don't we just create a "test_bit_acquire()" and be done with > > it? We literally created clear_bit_unlock() for the opposite reason, > > and your comments about the new barrier hack even point to it. > > Here's a patch that is > > (a) almost entirely untested (I checked that one single case builds > and seems to generate the expected code) > > (b) needs some more loving > > but seems to superficially work. > > At a minimum this needs to be split into two (so the bitop and the > wait_on_bit parts split up), and that whole placement of > <asm/barrier.h> and generic_bit_test_acquire() need at least some > thinking about, but on the whole it seems reasonable. > > For example, it would make more sense to have this in > <asm-generic/bitops/lock.h>, but not all architectures include that, > and some do their own version. I didn't want to mess with > architecture-specific headers, so this illogically just uses > generic-non-atomic.h. > > Maybe just put it in <linux/bitops.h> directly? > > So I'm not at all claiming that this is a great patch. It definitely > needs more work, and a lot more testing. > > But I think this is at least the right _direction_ to take here. > > And yes, I think it also would have been better if > "clear_bit_unlock()" would have been called "clear_bit_release()", and > we'd have more consistent naming with our ordered atomics. But it's > probably not worth changing. > > Linus Hi Here I reworked your patch, so that test_bit_acquire is defined just like test_bit. There's some code duplication (in include/asm-generic/bitops/generic-non-atomic.h and in arch/x86/include/asm/bitops.h), but that duplication exists in the test_bit function too. I tested it on x86-64 and arm64. On x86-64 it generates the "bt" instruction for variable-bit test and "shr; and $1" for constant bit test. On arm64 it generates the "ldar" instruction for both constant and variable bit test. For me, the kernel 6.0-rc2 doesn't boot in an arm64 virtual machine at all (with or without this patch), so I only compile-tested it on arm64. I have to bisect it. Mikulas From: Mikulas Patocka <mpatocka@redhat.com> There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org arch/x86/include/asm/bitops.h | 13 +++++++++++++ include/asm-generic/bitops/generic-non-atomic.h | 14 ++++++++++++++ include/asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++++ include/asm-generic/bitops/non-atomic.h | 1 + include/asm-generic/bitops/non-instrumented-non-atomic.h | 1 + include/linux/bitops.h | 1 + include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 9 files changed, 48 insertions(+), 6 deletions(-) Index: linux-2.6/include/asm-generic/bitops/generic-non-atomic.h =================================================================== --- linux-2.6.orig/include/asm-generic/bitops/generic-non-atomic.h +++ linux-2.6/include/asm-generic/bitops/generic-non-atomic.h @@ -4,6 +4,7 @@ #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #include <linux/bits.h> +#include <asm/barrier.h> #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly @@ -127,6 +128,18 @@ generic_test_bit(unsigned long nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * generic_test_bit - Determine whether a bit is set with acquire semantics + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * const_*() definitions provide good compile-time optimizations when * the passed arguments can be resolved at compile time. @@ -137,6 +150,7 @@ generic_test_bit(unsigned long nr, const #define const___test_and_set_bit generic___test_and_set_bit #define const___test_and_clear_bit generic___test_and_clear_bit #define const___test_and_change_bit generic___test_and_change_bit +#define const_test_bit_acquire generic_test_bit_acquire /** * const_test_bit - Determine whether a bit is set Index: linux-2.6/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-2.6.orig/include/asm-generic/bitops/non-atomic.h +++ linux-2.6/include/asm-generic/bitops/non-atomic.h @@ -13,6 +13,7 @@ #define arch___test_and_change_bit generic___test_and_change_bit #define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include <asm-generic/bitops/non-instrumented-non-atomic.h> Index: linux-2.6/include/linux/bitops.h =================================================================== --- linux-2.6.orig/include/linux/bitops.h +++ linux-2.6/include/linux/bitops.h @@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u6 #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) +#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) /* * Include this here because some architectures need generic_ffs/fls in Index: linux-2.6/include/linux/wait_bit.h =================================================================== --- linux-2.6.orig/include/linux/wait_bit.h +++ linux-2.6/include/linux/wait_bit.h @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-2.6/kernel/sched/wait_bit.c =================================================================== --- linux-2.6.orig/kernel/sched/wait_bit.c +++ linux-2.6/kernel/sched/wait_bit.c @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-2.6/include/asm-generic/bitops/non-instrumented-non-atomic.h =================================================================== --- linux-2.6.orig/include/asm-generic/bitops/non-instrumented-non-atomic.h +++ linux-2.6/include/asm-generic/bitops/non-instrumented-non-atomic.h @@ -12,5 +12,6 @@ #define ___test_and_change_bit arch___test_and_change_bit #define _test_bit arch_test_bit +#define _test_bit_acquire arch_test_bit_acquire #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-2.6/arch/x86/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/bitops.h +++ linux-2.6/arch/x86/include/asm/bitops.h @@ -207,6 +207,12 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -226,6 +232,13 @@ arch_test_bit(unsigned long nr, const vo variable_test_bit(nr, addr); } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-2.6/include/asm-generic/bitops/instrumented-non-atomic.h =================================================================== --- linux-2.6.orig/include/asm-generic/bitops/instrumented-non-atomic.h +++ linux-2.6/include/asm-generic/bitops/instrumented-non-atomic.h @@ -142,4 +142,16 @@ _test_bit(unsigned long nr, const volati return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine whether a bit is set with acquire semantics + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-2.6/include/linux/buffer_head.h =================================================================== --- linux-2.6.orig/include/linux/buffer_head.h +++ linux-2.6/include/linux/buffer_head.h @@ -156,7 +156,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] wait_on_bit: add an acquire memory barrier 2022-08-25 21:03 ` Mikulas Patocka @ 2022-08-25 21:54 ` Linus Torvalds 2022-08-26 13:17 ` [PATCH v3] " Mikulas Patocka 0 siblings, 1 reply; 48+ messages in thread From: Linus Torvalds @ 2022-08-25 21:54 UTC (permalink / raw) To: Mikulas Patocka Cc: Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, linux-kernel, linux-arch On Thu, Aug 25, 2022 at 2:03 PM Mikulas Patocka <mpatocka@redhat.com> wrote: > > Here I reworked your patch, so that test_bit_acquire is defined just like > test_bit. There's some code duplication (in > include/asm-generic/bitops/generic-non-atomic.h and in > arch/x86/include/asm/bitops.h), but that duplication exists in the > test_bit function too. This looks fine to me, and I like how you fixed up buffer_uptodate() while at it. > I tested it on x86-64 and arm64. On x86-64 it generates the "bt" > instruction for variable-bit test and "shr; and $1" for constant bit test. That shr/and is almost certainly pessimal for small constant values at least, and it's better done as "movq %rax" followed by "test %rax". But I guess it depends on the bit value (and thus the constant size). Doing a "testb $imm8" would likely be optimal, but you'll never get that with smp_load_acquire() on x86 unless you use inline asm, because of how we're doing it with a volatile pointer. Anyway, you could try something like this: static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { bool oldbit; asm volatile("testb %2,%1" CC_SET(nz) : CC_OUT(nz) (oldbit) : "m" (((unsigned char *)addr)[nr >> 3]), "Ir" (1 << (nr & 7)) :"memory"); return oldbit; } for both the regular test_bit() and for the acquire (since all loads are acquires on x86, and using an asm basically forces a memory load so it just does that "volatile" part. But that's a separate optimization and independent of the acquire thing. > For me, the kernel 6.0-rc2 doesn't boot in an arm64 virtual machine at all > (with or without this patch), so I only compile-tested it on arm64. I have > to bisect it. Hmm. I'm running it on real arm64 hardware (rc2+ - not your patch), so I wonder what's up.. Linus ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH v3] wait_on_bit: add an acquire memory barrier 2022-08-25 21:54 ` Linus Torvalds @ 2022-08-26 13:17 ` Mikulas Patocka 2022-08-26 19:23 ` Geert Uytterhoeven 0 siblings, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-08-26 13:17 UTC (permalink / raw) To: Linus Torvalds Cc: Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, linux-kernel, linux-arch On Thu, 25 Aug 2022, Linus Torvalds wrote: > On Thu, Aug 25, 2022 at 2:03 PM Mikulas Patocka <mpatocka@redhat.com> wrote: > > > > Here I reworked your patch, so that test_bit_acquire is defined just like > > test_bit. There's some code duplication (in > > include/asm-generic/bitops/generic-non-atomic.h and in > > arch/x86/include/asm/bitops.h), but that duplication exists in the > > test_bit function too. > > This looks fine to me, and I like how you fixed up buffer_uptodate() > while at it. > > > I tested it on x86-64 and arm64. On x86-64 it generates the "bt" > > instruction for variable-bit test and "shr; and $1" for constant bit test. > > That shr/and is almost certainly pessimal for small constant values at > least, and it's better done as "movq %rax" followed by "test %rax". > But I guess it depends on the bit value (and thus the constant size). > > Doing a "testb $imm8" would likely be optimal, but you'll never get > that with smp_load_acquire() on x86 unless you use inline asm, because > of how we're doing it with a volatile pointer. > > Anyway, you could try something like this: > > static __always_inline bool constant_test_bit(long nr, const > volatile unsigned long *addr) > { > bool oldbit; > > asm volatile("testb %2,%1" > CC_SET(nz) > : CC_OUT(nz) (oldbit) > : "m" (((unsigned char *)addr)[nr >> 3]), > "Ir" (1 << (nr & 7)) > :"memory"); > return oldbit; > } > > for both the regular test_bit() and for the acquire (since all loads > are acquires on x86, and using an asm basically forces a memory load > so it just does that "volatile" part. I wouldn't do this for regular test_bit because if you read memory with different size/alignment from what you wrote, various CPUs suffer from store->load forwarding penalties. But for test_bit_acqure this optimization is likely harmless because the bit will not be tested a few instructions after writing it. > But that's a separate optimization and independent of the acquire thing. > > > For me, the kernel 6.0-rc2 doesn't boot in an arm64 virtual machine at all > > (with or without this patch), so I only compile-tested it on arm64. I have > > to bisect it. > > Hmm. I'm running it on real arm64 hardware (rc2+ - not your patch), so > I wonder what's up.. > > Linus > This is version 3 of the patch. Changes: * use assembler "testb" in constant_test_bit_acquire * fix some comments as suggeste by Alan Stern * fix Documentation/atomic_bitops.txt (note that since the commit 415d832497098030241605c52ea83d4e2cfa7879, test_and_set/clear_bit is always ordered, so fix this claim as well) Mikulas From: Mikulas Patocka <mpatocka@redhat.com> There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Documentation/atomic_bitops.txt | 10 ++----- arch/x86/include/asm/bitops.h | 21 +++++++++++++++ include/asm-generic/bitops/generic-non-atomic.h | 14 ++++++++++ include/asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++ include/asm-generic/bitops/non-atomic.h | 1 include/asm-generic/bitops/non-instrumented-non-atomic.h | 1 include/linux/bitops.h | 1 include/linux/buffer_head.h | 2 - include/linux/wait_bit.h | 8 ++--- kernel/sched/wait_bit.c | 2 - 10 files changed, 60 insertions(+), 12 deletions(-) Index: linux-2.6/include/asm-generic/bitops/generic-non-atomic.h =================================================================== --- linux-2.6.orig/include/asm-generic/bitops/generic-non-atomic.h +++ linux-2.6/include/asm-generic/bitops/generic-non-atomic.h @@ -4,6 +4,7 @@ #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #include <linux/bits.h> +#include <asm/barrier.h> #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly @@ -127,6 +128,18 @@ generic_test_bit(unsigned long nr, const return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * const_*() definitions provide good compile-time optimizations when * the passed arguments can be resolved at compile time. @@ -137,6 +150,7 @@ generic_test_bit(unsigned long nr, const #define const___test_and_set_bit generic___test_and_set_bit #define const___test_and_clear_bit generic___test_and_clear_bit #define const___test_and_change_bit generic___test_and_change_bit +#define const_test_bit_acquire generic_test_bit_acquire /** * const_test_bit - Determine whether a bit is set Index: linux-2.6/include/asm-generic/bitops/non-atomic.h =================================================================== --- linux-2.6.orig/include/asm-generic/bitops/non-atomic.h +++ linux-2.6/include/asm-generic/bitops/non-atomic.h @@ -13,6 +13,7 @@ #define arch___test_and_change_bit generic___test_and_change_bit #define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include <asm-generic/bitops/non-instrumented-non-atomic.h> Index: linux-2.6/include/linux/bitops.h =================================================================== --- linux-2.6.orig/include/linux/bitops.h +++ linux-2.6/include/linux/bitops.h @@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u6 #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) +#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) /* * Include this here because some architectures need generic_ffs/fls in Index: linux-2.6/include/linux/wait_bit.h =================================================================== --- linux-2.6.orig/include/linux/wait_bit.h +++ linux-2.6/include/linux/wait_bit.h @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } Index: linux-2.6/kernel/sched/wait_bit.c =================================================================== --- linux-2.6.orig/kernel/sched/wait_bit.c +++ linux-2.6/kernel/sched/wait_bit.c @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); Index: linux-2.6/include/asm-generic/bitops/non-instrumented-non-atomic.h =================================================================== --- linux-2.6.orig/include/asm-generic/bitops/non-instrumented-non-atomic.h +++ linux-2.6/include/asm-generic/bitops/non-instrumented-non-atomic.h @@ -12,5 +12,6 @@ #define ___test_and_change_bit arch___test_and_change_bit #define _test_bit arch_test_bit +#define _test_bit_acquire arch_test_bit_acquire #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-2.6/arch/x86/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/bitops.h +++ linux-2.6/arch/x86/include/asm/bitops.h @@ -207,6 +207,20 @@ static __always_inline bool constant_tes (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -226,6 +240,13 @@ arch_test_bit(unsigned long nr, const vo variable_test_bit(nr, addr); } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search Index: linux-2.6/include/asm-generic/bitops/instrumented-non-atomic.h =================================================================== --- linux-2.6.orig/include/asm-generic/bitops/instrumented-non-atomic.h +++ linux-2.6/include/asm-generic/bitops/instrumented-non-atomic.h @@ -142,4 +142,16 @@ _test_bit(unsigned long nr, const volati return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ Index: linux-2.6/include/linux/buffer_head.h =================================================================== --- linux-2.6.orig/include/linux/buffer_head.h +++ linux-2.6/include/linux/buffer_head.h @@ -156,7 +156,7 @@ static __always_inline int buffer_uptoda * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) Index: linux-2.6/Documentation/atomic_bitops.txt =================================================================== --- linux-2.6.orig/Documentation/atomic_bitops.txt +++ linux-2.6/Documentation/atomic_bitops.txt @@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is - RMW operations that have a return value are fully ordered. - - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_set_bit_lock(), - if the bit in memory is unchanged by the operation then it is deemed to have - failed. + - RMW operations that are conditional are fully ordered. -Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and -clear_bit_unlock() which has RELEASE semantics. +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics, +clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has +ACQUIRE semantics. Since a platform only has a single means of achieving atomic operations the same barriers as for atomic_t are used, see atomic_t.txt. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v3] wait_on_bit: add an acquire memory barrier 2022-08-26 13:17 ` [PATCH v3] " Mikulas Patocka @ 2022-08-26 19:23 ` Geert Uytterhoeven 2022-08-26 20:03 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka 2022-08-26 20:03 ` [PATCH v3] wait_on_bit: add an acquire memory barrier Linus Torvalds 0 siblings, 2 replies; 48+ messages in thread From: Geert Uytterhoeven @ 2022-08-26 19:23 UTC (permalink / raw) To: Mikulas Patocka Cc: Linus Torvalds, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch Hi Mikulas, On Fri, Aug 26, 2022 at 3:18 PM Mikulas Patocka <mpatocka@redhat.com> wrote: > On Thu, 25 Aug 2022, Linus Torvalds wrote: > > On Thu, Aug 25, 2022 at 2:03 PM Mikulas Patocka <mpatocka@redhat.com> wrote: > > > Here I reworked your patch, so that test_bit_acquire is defined just like > > > test_bit. There's some code duplication (in > > > include/asm-generic/bitops/generic-non-atomic.h and in > > > arch/x86/include/asm/bitops.h), but that duplication exists in the > > > test_bit function too. > > > > This looks fine to me, and I like how you fixed up buffer_uptodate() > > while at it. > > > > > I tested it on x86-64 and arm64. On x86-64 it generates the "bt" > > > instruction for variable-bit test and "shr; and $1" for constant bit test. > > > > That shr/and is almost certainly pessimal for small constant values at > > least, and it's better done as "movq %rax" followed by "test %rax". > > But I guess it depends on the bit value (and thus the constant size). > > > > Doing a "testb $imm8" would likely be optimal, but you'll never get > > that with smp_load_acquire() on x86 unless you use inline asm, because > > of how we're doing it with a volatile pointer. > > > > Anyway, you could try something like this: > > > > static __always_inline bool constant_test_bit(long nr, const > > volatile unsigned long *addr) > > { > > bool oldbit; > > > > asm volatile("testb %2,%1" > > CC_SET(nz) > > : CC_OUT(nz) (oldbit) > > : "m" (((unsigned char *)addr)[nr >> 3]), > > "Ir" (1 << (nr & 7)) > > :"memory"); > > return oldbit; > > } > > > > for both the regular test_bit() and for the acquire (since all loads > > are acquires on x86, and using an asm basically forces a memory load > > so it just does that "volatile" part. > > I wouldn't do this for regular test_bit because if you read memory with > different size/alignment from what you wrote, various CPUs suffer from > store->load forwarding penalties. > > But for test_bit_acqure this optimization is likely harmless because the > bit will not be tested a few instructions after writing it. > > > But that's a separate optimization and independent of the acquire thing. > > > > > For me, the kernel 6.0-rc2 doesn't boot in an arm64 virtual machine at all > > > (with or without this patch), so I only compile-tested it on arm64. I have > > > to bisect it. > > > > Hmm. I'm running it on real arm64 hardware (rc2+ - not your patch), so > > I wonder what's up.. > > > > Linus > > > > This is version 3 of the patch. Changes: > * use assembler "testb" in constant_test_bit_acquire > * fix some comments as suggeste by Alan Stern > * fix Documentation/atomic_bitops.txt (note that since the commit > 415d832497098030241605c52ea83d4e2cfa7879, test_and_set/clear_bit is > always ordered, so fix this claim as well) > > Mikulas > > > > > From: Mikulas Patocka <mpatocka@redhat.com> > > There are several places in the kernel where wait_on_bit is not followed > by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On > architectures with weak memory ordering, it may happen that memory > accesses that follow wait_on_bit are reordered before wait_on_bit and they > may return invalid data. > > Fix this class of bugs by introducing a new function "test_bit_acquire" > that works like test_bit, but has acquire memory ordering semantics. > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Thanks for your patch, which is now commit 8238b4579866b7c1 ("wait_on_bit: add an acquire memory barrier"). noreply@ellerman.id.au reports lots of build failures on m68k: include/asm-generic/bitops/non-instrumented-non-atomic.h:15:33: error: implicit declaration of function 'arch_test_bit_acquire'; did you mean '_test_bit_acquire'? [-Werror=implicit-function-declaration] which I've bisected to this commit. http://kisskb.ellerman.id.au/kisskb/head/3e5c673f0d75bc22b3c26eade87e4db4f374cd34 > Cc: stable@vger.kernel.org > > Documentation/atomic_bitops.txt | 10 ++----- > arch/x86/include/asm/bitops.h | 21 +++++++++++++++ > include/asm-generic/bitops/generic-non-atomic.h | 14 ++++++++++ > include/asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++ > include/asm-generic/bitops/non-atomic.h | 1 > include/asm-generic/bitops/non-instrumented-non-atomic.h | 1 > include/linux/bitops.h | 1 > include/linux/buffer_head.h | 2 - > include/linux/wait_bit.h | 8 ++--- > kernel/sched/wait_bit.c | 2 - > 10 files changed, 60 insertions(+), 12 deletions(-) > > Index: linux-2.6/include/asm-generic/bitops/generic-non-atomic.h > =================================================================== > --- linux-2.6.orig/include/asm-generic/bitops/generic-non-atomic.h > +++ linux-2.6/include/asm-generic/bitops/generic-non-atomic.h > @@ -4,6 +4,7 @@ > #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H > > #include <linux/bits.h> > +#include <asm/barrier.h> > > #ifndef _LINUX_BITOPS_H > #error only <linux/bitops.h> can be included directly > @@ -127,6 +128,18 @@ generic_test_bit(unsigned long nr, const > return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); > } > > +/** > + * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set > + * @nr: bit number to test > + * @addr: Address to start counting from > + */ > +static __always_inline bool > +generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) > +{ > + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); > + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); > +} > + > /* > * const_*() definitions provide good compile-time optimizations when > * the passed arguments can be resolved at compile time. > @@ -137,6 +150,7 @@ generic_test_bit(unsigned long nr, const > #define const___test_and_set_bit generic___test_and_set_bit > #define const___test_and_clear_bit generic___test_and_clear_bit > #define const___test_and_change_bit generic___test_and_change_bit > +#define const_test_bit_acquire generic_test_bit_acquire > > /** > * const_test_bit - Determine whether a bit is set > Index: linux-2.6/include/asm-generic/bitops/non-atomic.h > =================================================================== > --- linux-2.6.orig/include/asm-generic/bitops/non-atomic.h > +++ linux-2.6/include/asm-generic/bitops/non-atomic.h > @@ -13,6 +13,7 @@ > #define arch___test_and_change_bit generic___test_and_change_bit > > #define arch_test_bit generic_test_bit > +#define arch_test_bit_acquire generic_test_bit_acquire > > #include <asm-generic/bitops/non-instrumented-non-atomic.h> > > Index: linux-2.6/include/linux/bitops.h > =================================================================== > --- linux-2.6.orig/include/linux/bitops.h > +++ linux-2.6/include/linux/bitops.h > @@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u6 > #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) > #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) > #define test_bit(nr, addr) bitop(_test_bit, nr, addr) > +#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) > > /* > * Include this here because some architectures need generic_ffs/fls in > Index: linux-2.6/include/linux/wait_bit.h > =================================================================== > --- linux-2.6.orig/include/linux/wait_bit.h > +++ linux-2.6/include/linux/wait_bit.h > @@ -71,7 +71,7 @@ static inline int > wait_on_bit(unsigned long *word, int bit, unsigned mode) > { > might_sleep(); > - if (!test_bit(bit, word)) > + if (!test_bit_acquire(bit, word)) > return 0; > return out_of_line_wait_on_bit(word, bit, > bit_wait, > @@ -96,7 +96,7 @@ static inline int > wait_on_bit_io(unsigned long *word, int bit, unsigned mode) > { > might_sleep(); > - if (!test_bit(bit, word)) > + if (!test_bit_acquire(bit, word)) > return 0; > return out_of_line_wait_on_bit(word, bit, > bit_wait_io, > @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, > unsigned long timeout) > { > might_sleep(); > - if (!test_bit(bit, word)) > + if (!test_bit_acquire(bit, word)) > return 0; > return out_of_line_wait_on_bit_timeout(word, bit, > bit_wait_timeout, > @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, > unsigned mode) > { > might_sleep(); > - if (!test_bit(bit, word)) > + if (!test_bit_acquire(bit, word)) > return 0; > return out_of_line_wait_on_bit(word, bit, action, mode); > } > Index: linux-2.6/kernel/sched/wait_bit.c > =================================================================== > --- linux-2.6.orig/kernel/sched/wait_bit.c > +++ linux-2.6/kernel/sched/wait_bit.c > @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq > prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); > if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) > ret = (*action)(&wbq_entry->key, mode); > - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); > + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); > > finish_wait(wq_head, &wbq_entry->wq_entry); > > Index: linux-2.6/include/asm-generic/bitops/non-instrumented-non-atomic.h > =================================================================== > --- linux-2.6.orig/include/asm-generic/bitops/non-instrumented-non-atomic.h > +++ linux-2.6/include/asm-generic/bitops/non-instrumented-non-atomic.h > @@ -12,5 +12,6 @@ > #define ___test_and_change_bit arch___test_and_change_bit > > #define _test_bit arch_test_bit > +#define _test_bit_acquire arch_test_bit_acquire > > #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */ > Index: linux-2.6/arch/x86/include/asm/bitops.h > =================================================================== > --- linux-2.6.orig/arch/x86/include/asm/bitops.h > +++ linux-2.6/arch/x86/include/asm/bitops.h > @@ -207,6 +207,20 @@ static __always_inline bool constant_tes > (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; > } > > +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) > +{ > + bool oldbit; > + > + asm volatile("testb %2,%1" > + CC_SET(nz) > + : CC_OUT(nz) (oldbit) > + : "m" (((unsigned char *)addr)[nr >> 3]), > + "i" (1 << (nr & 7)) > + :"memory"); > + > + return oldbit; > +} > + > static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) > { > bool oldbit; > @@ -226,6 +240,13 @@ arch_test_bit(unsigned long nr, const vo > variable_test_bit(nr, addr); > } > > +static __always_inline bool > +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) > +{ > + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : > + variable_test_bit(nr, addr); > +} > + > /** > * __ffs - find first set bit in word > * @word: The word to search > Index: linux-2.6/include/asm-generic/bitops/instrumented-non-atomic.h > =================================================================== > --- linux-2.6.orig/include/asm-generic/bitops/instrumented-non-atomic.h > +++ linux-2.6/include/asm-generic/bitops/instrumented-non-atomic.h > @@ -142,4 +142,16 @@ _test_bit(unsigned long nr, const volati > return arch_test_bit(nr, addr); > } > > +/** > + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set > + * @nr: bit number to test > + * @addr: Address to start counting from > + */ > +static __always_inline bool > +_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) > +{ > + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); > + return arch_test_bit_acquire(nr, addr); > +} > + > #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ > Index: linux-2.6/include/linux/buffer_head.h > =================================================================== > --- linux-2.6.orig/include/linux/buffer_head.h > +++ linux-2.6/include/linux/buffer_head.h > @@ -156,7 +156,7 @@ static __always_inline int buffer_uptoda > * make it consistent with folio_test_uptodate > * pairs with smp_mb__before_atomic in set_buffer_uptodate > */ > - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; > + return test_bit_acquire(BH_Uptodate, &bh->b_state); > } > > #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) > Index: linux-2.6/Documentation/atomic_bitops.txt > =================================================================== > --- linux-2.6.orig/Documentation/atomic_bitops.txt > +++ linux-2.6/Documentation/atomic_bitops.txt > @@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is > > - RMW operations that have a return value are fully ordered. > > - - RMW operations that are conditional are unordered on FAILURE, > - otherwise the above rules apply. In the case of test_and_set_bit_lock(), > - if the bit in memory is unchanged by the operation then it is deemed to have > - failed. > + - RMW operations that are conditional are fully ordered. > > -Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and > -clear_bit_unlock() which has RELEASE semantics. > +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics, > +clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has > +ACQUIRE semantics. > > Since a platform only has a single means of achieving atomic operations > the same barriers as for atomic_t are used, see atomic_t.txt. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds ^ permalink raw reply [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-08-26 19:23 ` Geert Uytterhoeven @ 2022-08-26 20:03 ` Mikulas Patocka 2022-08-26 20:07 ` Linus Torvalds 2022-08-26 20:03 ` [PATCH v3] wait_on_bit: add an acquire memory barrier Linus Torvalds 1 sibling, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-08-26 20:03 UTC (permalink / raw) To: Geert Uytterhoeven, Brian Cain, linux-hexagon Cc: Linus Torvalds, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch On Fri, 26 Aug 2022, Geert Uytterhoeven wrote: > Hi Mikulas, > > noreply@ellerman.id.au reports lots of build failures on m68k: > > include/asm-generic/bitops/non-instrumented-non-atomic.h:15:33: > error: implicit declaration of function 'arch_test_bit_acquire'; did > you mean '_test_bit_acquire'? [-Werror=implicit-function-declaration] > > which I've bisected to this commit. > > http://kisskb.ellerman.id.au/kisskb/head/3e5c673f0d75bc22b3c26eade87e4db4f374cd34 Does this patch fix it? It is untested. I'm not sure about the hexagon architecture, it is presumably in-order so that test_bit and test_bit_acquire are equivalent, but I am not sure about that - I'm adding hexagon maintainer to the recipient field. Mikulas provide arch_test_bit_acquire for architectures that define test_bit Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") --- arch/alpha/include/asm/bitops.h | 7 +++++++ arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 +++++++ arch/m68k/include/asm/bitops.h | 7 +++++++ arch/s390/include/asm/bitops.h | 7 +++++++ 5 files changed, 43 insertions(+) Index: linux-2.6/arch/m68k/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/m68k/include/asm/bitops.h +++ linux-2.6/arch/m68k/include/asm/bitops.h @@ -163,6 +163,13 @@ arch_test_bit(unsigned long nr, const vo return (addr[nr >> 5] & (1UL << (nr & 31))) != 0; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) { Index: linux-2.6/arch/alpha/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/alpha/include/asm/bitops.h +++ linux-2.6/arch/alpha/include/asm/bitops.h @@ -289,6 +289,13 @@ arch_test_bit(unsigned long nr, const vo return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. Index: linux-2.6/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/hexagon/include/asm/bitops.h +++ linux-2.6/arch/hexagon/include/asm/bitops.h @@ -179,6 +179,21 @@ arch_test_bit(unsigned long nr, const vo return retval; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + /* * ffz - find first zero in word. * @word: The word to search Index: linux-2.6/arch/ia64/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/ia64/include/asm/bitops.h +++ linux-2.6/arch/ia64/include/asm/bitops.h @@ -337,6 +337,13 @@ arch_test_bit(unsigned long nr, const vo return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /** * ffz - find the first zero bit in a long word * @x: The long word to find the bit in Index: linux-2.6/arch/s390/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/s390/include/asm/bitops.h +++ linux-2.6/arch/s390/include/asm/bitops.h @@ -185,6 +185,13 @@ arch_test_bit(unsigned long nr, const vo return *p & mask; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) { ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-08-26 20:03 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka @ 2022-08-26 20:07 ` Linus Torvalds 0 siblings, 0 replies; 48+ messages in thread From: Linus Torvalds @ 2022-08-26 20:07 UTC (permalink / raw) To: Mikulas Patocka Cc: Geert Uytterhoeven, Brian Cain, linux-hexagon, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch [ Crossed emails ] On Fri, Aug 26, 2022 at 1:03 PM Mikulas Patocka <mpatocka@redhat.com> wrote: > > +static __always_inline bool > +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) > +{ > + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); > + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); > +} > + I'd suggest you just do #define arch_test_bit_acquire generic_test_bit_acquire in each of these files, instead of duplicating that function over and over again.. Because the generic version always exists, since it comes from generic-non-atomic.h, which is included directly from <linux/bitops.h>. Linus ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH v3] wait_on_bit: add an acquire memory barrier 2022-08-26 19:23 ` Geert Uytterhoeven 2022-08-26 20:03 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka @ 2022-08-26 20:03 ` Linus Torvalds 2022-08-26 20:43 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka 1 sibling, 1 reply; 48+ messages in thread From: Linus Torvalds @ 2022-08-26 20:03 UTC (permalink / raw) To: Geert Uytterhoeven Cc: Mikulas Patocka, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch [-- Attachment #1: Type: text/plain, Size: 2079 bytes --] On Fri, Aug 26, 2022 at 12:23 PM Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > include/asm-generic/bitops/non-instrumented-non-atomic.h:15:33: > error: implicit declaration of function 'arch_test_bit_acquire'; did > you mean '_test_bit_acquire'? [-Werror=implicit-function-declaration] > Ahh. m68k isn't using any of the generic bitops headers. *Most* architectures have that #include <asm-generic/bitops/non-atomic.h> and get it that way, but while it's common, it's most definitely not universal: [torvalds@ryzen linux]$ git grep -L bitops/non-atomic.h arch/*/include/asm/bitops.h arch/alpha/include/asm/bitops.h arch/hexagon/include/asm/bitops.h arch/ia64/include/asm/bitops.h arch/m68k/include/asm/bitops.h arch/s390/include/asm/bitops.h arch/sparc/include/asm/bitops.h arch/x86/include/asm/bitops.h and of that list only x86 has the new arch_test_bit_acquire(). So I assume it's not just m68k, but also alpha, hexagon, ia64, s390 and sparc that have this issue (unless they maybe have some other path that includes the gerneric ones, I didn't check). This was actually why my original suggested patch used the 'generic-non-atomic.h' header for it, because that is actually included regardless of any architecture headers directly from <linux/bitops.h>. And it never triggered for me that Mikulas' updated patch then had this arch_test_bit_acquire() issue. Something like the attached patch *MAY* fix it, but I really haven't thought about it a lot, and it's pretty ugly. Maybe it would be better to just add the #define arch_test_bit_acquire generic_test_bit_acquire to the affected <asm/bitops.h> files instead, and then let those architectures decide on their own that maybe they want to use their own test_bit() function because it is _already_ an acquire one. Mikulas? Geert - any opinions on that "maybe the arch should just do that #define itself"? I don't think it actually matters for m68k, you end up with pretty much the same thing anyway, because "smp_load_acquire()" is just a load anyway.. Linus [-- Attachment #2: patch.diff --] [-- Type: text/x-patch, Size: 1111 bytes --] arch/x86/include/asm/bitops.h | 1 + include/linux/bitops.h | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 0fe9de58af31..b82006138c60 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -246,6 +246,7 @@ arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : variable_test_bit(nr, addr); } +#define arch_test_bit_acquire arch_test_bit_acquire /** * __ffs - find first set bit in word diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 3b89c64bcfd8..a046b9c45fdb 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -67,6 +67,10 @@ extern unsigned long __sw_hweight64(__u64 w); */ #include <asm/bitops.h> +#ifndef arch_test_bit_acquire +#define arch_test_bit_acquire generic_test_bit_acquire +#endif + /* Check that the bitops prototypes are sane */ #define __check_bitop_pr(name) \ static_assert(__same_type(arch_##name, generic_##name) && \ ^ permalink raw reply related [flat|nested] 48+ messages in thread
* [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-08-26 20:03 ` [PATCH v3] wait_on_bit: add an acquire memory barrier Linus Torvalds @ 2022-08-26 20:43 ` Mikulas Patocka 2022-08-26 23:10 ` Linus Torvalds 0 siblings, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-08-26 20:43 UTC (permalink / raw) To: Linus Torvalds, Brian Cain, linux-hexagon Cc: Geert Uytterhoeven, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch On Fri, 26 Aug 2022, Linus Torvalds wrote: > On Fri, Aug 26, 2022 at 12:23 PM Geert Uytterhoeven > <geert@linux-m68k.org> wrote: > > > > include/asm-generic/bitops/non-instrumented-non-atomic.h:15:33: > > error: implicit declaration of function 'arch_test_bit_acquire'; did > > you mean '_test_bit_acquire'? [-Werror=implicit-function-declaration] > > > > Ahh. m68k isn't using any of the generic bitops headers. > > *Most* architectures have that > > #include <asm-generic/bitops/non-atomic.h> > > and get it that way, but while it's common, it's most definitely not universal: > > [torvalds@ryzen linux]$ git grep -L bitops/non-atomic.h > arch/*/include/asm/bitops.h > arch/alpha/include/asm/bitops.h > arch/hexagon/include/asm/bitops.h > arch/ia64/include/asm/bitops.h > arch/m68k/include/asm/bitops.h > arch/s390/include/asm/bitops.h > arch/sparc/include/asm/bitops.h > arch/x86/include/asm/bitops.h > > and of that list only x86 has the new arch_test_bit_acquire(). > > So I assume it's not just m68k, but also alpha, hexagon, ia64, s390 > and sparc that have this issue (unless they maybe have some other path > that includes the gerneric ones, I didn't check). For sparc, there is arch/sparc/include/asm/bitops_32.h and arch/sparc/include/asm/bitops_64.h that include asm-generic/bitops/non-atomic.h For the others, the generic version is not included. I'm wondering why do the architectures redefine test_bit, if their definition is equivalent to the generic one? We could just delete arch_test_bit and use "#define arch_test_bit generic_test_bit" as well. > This was actually why my original suggested patch used the > 'generic-non-atomic.h' header for it, because that is actually > included regardless of any architecture headers directly from > <linux/bitops.h>. > > And it never triggered for me that Mikulas' updated patch then had > this arch_test_bit_acquire() issue. > > Something like the attached patch *MAY* fix it, but I really haven't > thought about it a lot, and it's pretty ugly. Maybe it would be better > to just add the > > #define arch_test_bit_acquire generic_test_bit_acquire > > to the affected <asm/bitops.h> files instead, and then let those > architectures decide on their own that maybe they want to use their > own test_bit() function because it is _already_ an acquire one. > > Mikulas? > > Geert - any opinions on that "maybe the arch should just do that > #define itself"? I don't think it actually matters for m68k, you end > up with pretty much the same thing anyway, because > "smp_load_acquire()" is just a load anyway.. > > Linus Another untested patch ... tomorrow, I'll try to compile it, at least for architectures where Debian provides cross-compiling gcc. From: Mikulas Patocka <mpatocka@redhat.com> Some architectures define their own arch_test_bit and they also need arch_test_bit_acquire, otherwise they won't compile. We also clean up the code by using the generic test_bit if that is equivalent to the arch-specific version. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Fixes: 8238b4579866 ("wait_on_bit: add an acquire memory barrier") --- arch/alpha/include/asm/bitops.h | 7 ++----- arch/hexagon/include/asm/bitops.h | 15 +++++++++++++++ arch/ia64/include/asm/bitops.h | 7 ++----- arch/m68k/include/asm/bitops.h | 7 ++----- arch/s390/include/asm/bitops.h | 10 ++-------- arch/sh/include/asm/bitops-op32.h | 12 ++---------- 6 files changed, 25 insertions(+), 33 deletions(-) Index: linux-2.6/arch/alpha/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/alpha/include/asm/bitops.h +++ linux-2.6/arch/alpha/include/asm/bitops.h @@ -283,11 +283,8 @@ arch___test_and_change_bit(unsigned long return (old & mask) != 0; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire /* * ffz = Find First Zero in word. Undefined if no zero exists, Index: linux-2.6/arch/hexagon/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/hexagon/include/asm/bitops.h +++ linux-2.6/arch/hexagon/include/asm/bitops.h @@ -179,6 +179,21 @@ arch_test_bit(unsigned long nr, const vo return retval; } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + int retval; + + asm volatile( + "{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n" + : "=&r" (retval) + : "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG) + : "p0", "memory" + ); + + return retval; +} + /* * ffz - find first zero in word. * @word: The word to search Index: linux-2.6/arch/ia64/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/ia64/include/asm/bitops.h +++ linux-2.6/arch/ia64/include/asm/bitops.h @@ -331,11 +331,8 @@ arch___test_and_change_bit(unsigned long return (old & bit) != 0; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31)); -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire /** * ffz - find the first zero bit in a long word Index: linux-2.6/arch/s390/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/s390/include/asm/bitops.h +++ linux-2.6/arch/s390/include/asm/bitops.h @@ -176,14 +176,8 @@ arch___test_and_change_bit(unsigned long return old & mask; } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - const volatile unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - return *p & mask; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire static inline bool arch_test_and_set_bit_lock(unsigned long nr, volatile unsigned long *ptr) Index: linux-2.6/arch/m68k/include/asm/bitops.h =================================================================== --- linux-2.6.orig/arch/m68k/include/asm/bitops.h +++ linux-2.6/arch/m68k/include/asm/bitops.h @@ -157,11 +157,8 @@ arch___change_bit(unsigned long nr, vola change_bit(nr, addr); } -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return (addr[nr >> 5] & (1UL << (nr & 31))) != 0; -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire static inline int bset_reg_test_and_set_bit(int nr, volatile unsigned long *vaddr) Index: linux-2.6/arch/sh/include/asm/bitops-op32.h =================================================================== --- linux-2.6.orig/arch/sh/include/asm/bitops-op32.h +++ linux-2.6/arch/sh/include/asm/bitops-op32.h @@ -135,16 +135,8 @@ arch___test_and_change_bit(unsigned long return (old & mask) != 0; } -/** - * arch_test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static __always_inline bool -arch_test_bit(unsigned long nr, const volatile unsigned long *addr) -{ - return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); -} +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include <asm-generic/bitops/non-instrumented-non-atomic.h> ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-08-26 20:43 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka @ 2022-08-26 23:10 ` Linus Torvalds 2022-08-26 23:18 ` Linus Torvalds 2022-08-27 11:38 ` Mikulas Patocka 0 siblings, 2 replies; 48+ messages in thread From: Linus Torvalds @ 2022-08-26 23:10 UTC (permalink / raw) To: Mikulas Patocka Cc: Brian Cain, linux-hexagon, Geert Uytterhoeven, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch On Fri, Aug 26, 2022 at 1:43 PM Mikulas Patocka <mpatocka@redhat.com> wrote: > > I'm wondering why do the architectures redefine test_bit, if their > definition is equivalent to the generic one? We could just delete > arch_test_bit and use "#define arch_test_bit generic_test_bit" as well. I think generic_test_bit() came after many of them, and when it didn't, people copied earlier architectures where they had already done their own. > Another untested patch ... tomorrow, I'll try to compile it, at least for > architectures where Debian provides cross-compiling gcc. Looks good to me, except I'd just do #define arch_test_bit_acquire arch_test_bit on hexagon rather than duplicate that function. From my reading, Hexagon doesn't have any fancy memory ordering, it's just the usual UP with barriers basically for instruction cache coherence etc. Linus ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-08-26 23:10 ` Linus Torvalds @ 2022-08-26 23:18 ` Linus Torvalds 2022-08-27 11:38 ` Mikulas Patocka 1 sibling, 0 replies; 48+ messages in thread From: Linus Torvalds @ 2022-08-26 23:18 UTC (permalink / raw) To: Mikulas Patocka Cc: Brian Cain, linux-hexagon, Geert Uytterhoeven, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch On Fri, Aug 26, 2022 at 4:10 PM Linus Torvalds <torvalds@linux-foundation.org> wrote: > > Looks good to me, except I'd just do > > #define arch_test_bit_acquire arch_test_bit > > on hexagon rather than duplicate that function. Oh, except you didn't quite duplicate it, you added the "memory" clober to it to make sure it's ordered. Which looks correct to me, even if the "almost entirely duplicated" is a bit annoying. Linus ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-08-26 23:10 ` Linus Torvalds 2022-08-26 23:18 ` Linus Torvalds @ 2022-08-27 11:38 ` Mikulas Patocka 2022-08-27 16:50 ` Linus Torvalds 1 sibling, 1 reply; 48+ messages in thread From: Mikulas Patocka @ 2022-08-27 11:38 UTC (permalink / raw) To: Linus Torvalds Cc: Brian Cain, linux-hexagon, Geert Uytterhoeven, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch On Fri, 26 Aug 2022, Linus Torvalds wrote: > On Fri, Aug 26, 2022 at 1:43 PM Mikulas Patocka <mpatocka@redhat.com> wrote: > > > > I'm wondering why do the architectures redefine test_bit, if their > > definition is equivalent to the generic one? We could just delete > > arch_test_bit and use "#define arch_test_bit generic_test_bit" as well. > > I think generic_test_bit() came after many of them, and when it > didn't, people copied earlier architectures where they had already > done their own. > > > Another untested patch ... tomorrow, I'll try to compile it, at least for > > architectures where Debian provides cross-compiling gcc. I compile-tested this patch on alpha, s390x, m68k, sh, sparc32, sparc64. So, you can commit it to close these uncompilable-kernel reports. Mikulas ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] provide arch_test_bit_acquire for architectures that define test_bit 2022-08-27 11:38 ` Mikulas Patocka @ 2022-08-27 16:50 ` Linus Torvalds 0 siblings, 0 replies; 48+ messages in thread From: Linus Torvalds @ 2022-08-27 16:50 UTC (permalink / raw) To: Mikulas Patocka Cc: Brian Cain, linux-hexagon, Geert Uytterhoeven, Alan Stern, Andrea Parri, Will Deacon, Peter Zijlstra, Boqun Feng, Nicholas Piggin, David Howells, Jade Alglave, Luc Maranget, Paul E. McKenney, Akira Yokosawa, Daniel Lustig, Joel Fernandes, Linux Kernel Mailing List, Linux-Arch On Sat, Aug 27, 2022 at 4:38 AM Mikulas Patocka <mpatocka@redhat.com> wrote: > > I compile-tested this patch on alpha, s390x, m68k, sh, sparc32, sparc64. > So, you can commit it to close these uncompilable-kernel reports. Thanks, done. Linus ^ permalink raw reply [flat|nested] 48+ messages in thread
end of thread, other threads:[~2022-10-27 12:48 UTC | newest] Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-09-30 15:32 backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Mikulas Patocka 2022-09-30 15:33 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka 2022-09-30 15:33 ` Mikulas Patocka 2022-09-30 15:34 ` Mikulas Patocka 2022-09-30 15:34 ` Mikulas Patocka 2022-09-30 15:34 ` Mikulas Patocka 2022-10-01 7:01 ` Greg KH 2022-09-30 15:35 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka 2022-09-30 15:35 ` Mikulas Patocka 2022-09-30 15:36 ` Mikulas Patocka 2022-09-30 15:36 ` Mikulas Patocka 2022-09-30 15:36 ` Mikulas Patocka 2022-10-01 7:00 ` Greg KH 2022-10-01 6:59 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 2022-10-03 12:28 ` Mikulas Patocka 2022-10-03 12:28 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka 2022-10-03 12:29 ` Mikulas Patocka 2022-10-03 12:29 ` Mikulas Patocka 2022-10-03 12:29 ` Mikulas Patocka 2022-10-03 12:30 ` Mikulas Patocka 2022-10-03 12:30 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka 2022-10-03 12:31 ` Mikulas Patocka 2022-10-03 12:31 ` Mikulas Patocka 2022-10-03 12:31 ` Mikulas Patocka 2022-10-03 12:32 ` Mikulas Patocka 2022-10-05 16:48 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 2022-10-10 19:08 ` Greg KH 2022-10-11 9:48 ` Mikulas Patocka 2022-10-11 9:58 ` Greg KH 2022-10-18 11:36 ` Mikulas Patocka 2022-10-18 11:37 ` [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka 2022-10-18 11:37 ` Mikulas Patocka 2022-10-18 11:38 ` Mikulas Patocka 2022-10-18 11:38 ` Mikulas Patocka 2022-10-18 11:39 ` Mikulas Patocka 2022-10-18 11:39 ` Mikulas Patocka 2022-10-18 11:39 ` Mikulas Patocka 2022-10-26 17:01 ` backport of patches 8238b4579866b7c1bb99883cfe102a43db5506ff and d6ffe6067a54972564552ea45d320fb98db1ac5e Greg KH 2022-10-27 11:45 ` Mikulas Patocka 2022-10-27 11:53 ` Greg KH 2022-10-27 12:48 ` Mikulas Patocka -- strict thread matches above, loose matches on Subject: below -- 2022-08-22 9:38 [PATCH] wait_on_bit: add an acquire memory barrier Mikulas Patocka 2022-08-22 17:08 ` Linus Torvalds 2022-08-22 17:39 ` Linus Torvalds 2022-08-25 21:03 ` Mikulas Patocka 2022-08-25 21:54 ` Linus Torvalds 2022-08-26 13:17 ` [PATCH v3] " Mikulas Patocka 2022-08-26 19:23 ` Geert Uytterhoeven 2022-08-26 20:03 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka 2022-08-26 20:07 ` Linus Torvalds 2022-08-26 20:03 ` [PATCH v3] wait_on_bit: add an acquire memory barrier Linus Torvalds 2022-08-26 20:43 ` [PATCH] provide arch_test_bit_acquire for architectures that define test_bit Mikulas Patocka 2022-08-26 23:10 ` Linus Torvalds 2022-08-26 23:18 ` Linus Torvalds 2022-08-27 11:38 ` Mikulas Patocka 2022-08-27 16:50 ` Linus Torvalds
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.