All of lore.kernel.org
 help / color / mirror / Atom feed
* [GIT pull] locking/urgent for v5.15-rc2
@ 2021-09-19 18:28 Thomas Gleixner
  2021-09-19 18:28 ` [GIT pull] perf/urgent " Thomas Gleixner
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Thomas Gleixner @ 2021-09-19 18:28 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, x86

Linus,

please pull the latest locking/urgent branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-2021-09-19

up to:  81121524f1c7: locking/rwbase: Take care of ordering guarantee for fastpath reader


A set of updates for the RT specific reader/writer locking base code:

  - Make the fast path reader ordering guarantees correct.

  - Code reshuffling to make the fix simpler.

Thanks,

	tglx

------------------>
Boqun Feng (1):
      locking/rwbase: Take care of ordering guarantee for fastpath reader

Peter Zijlstra (2):
      locking/rwbase: Properly match set_and_save_state() to restore_state()
      locking/rwbase: Extract __rwbase_write_trylock()


 kernel/locking/rwbase_rt.c | 65 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 20 deletions(-)

diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 4ba15088e640..88191f6e252c 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -41,6 +41,12 @@
  * The risk of writer starvation is there, but the pathological use cases
  * which trigger it are not necessarily the typical RT workloads.
  *
+ * Fast-path orderings:
+ * The lock/unlock of readers can run in fast paths: lock and unlock are only
+ * atomic ops, and there is no inner lock to provide ACQUIRE and RELEASE
+ * semantics of rwbase_rt. Atomic ops should thus provide _acquire()
+ * and _release() (or stronger).
+ *
  * Common code shared between RT rw_semaphore and rwlock
  */
 
@@ -53,6 +59,7 @@ static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb)
 	 * set.
 	 */
 	for (r = atomic_read(&rwb->readers); r < 0;) {
+		/* Fully-ordered if cmpxchg() succeeds, provides ACQUIRE */
 		if (likely(atomic_try_cmpxchg(&rwb->readers, &r, r + 1)))
 			return 1;
 	}
@@ -162,6 +169,8 @@ static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
 	/*
 	 * rwb->readers can only hit 0 when a writer is waiting for the
 	 * active readers to leave the critical section.
+	 *
+	 * dec_and_test() is fully ordered, provides RELEASE.
 	 */
 	if (unlikely(atomic_dec_and_test(&rwb->readers)))
 		__rwbase_read_unlock(rwb, state);
@@ -172,7 +181,11 @@ static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias,
 {
 	struct rt_mutex_base *rtm = &rwb->rtmutex;
 
-	atomic_add(READER_BIAS - bias, &rwb->readers);
+	/*
+	 * _release() is needed in case that reader is in fast path, pairing
+	 * with atomic_try_cmpxchg() in rwbase_read_trylock(), provides RELEASE
+	 */
+	(void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers);
 	raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
 	rwbase_rtmutex_unlock(rtm);
 }
@@ -196,6 +209,23 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
 	__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
 }
 
+static inline bool __rwbase_write_trylock(struct rwbase_rt *rwb)
+{
+	/* Can do without CAS because we're serialized by wait_lock. */
+	lockdep_assert_held(&rwb->rtmutex.wait_lock);
+
+	/*
+	 * _acquire is needed in case the reader is in the fast path, pairing
+	 * with rwbase_read_unlock(), provides ACQUIRE.
+	 */
+	if (!atomic_read_acquire(&rwb->readers)) {
+		atomic_set(&rwb->readers, WRITER_BIAS);
+		return 1;
+	}
+
+	return 0;
+}
+
 static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
 				     unsigned int state)
 {
@@ -210,34 +240,30 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
 	atomic_sub(READER_BIAS, &rwb->readers);
 
 	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
-	/*
-	 * set_current_state() for rw_semaphore
-	 * current_save_and_set_rtlock_wait_state() for rwlock
-	 */
-	rwbase_set_and_save_current_state(state);
+	if (__rwbase_write_trylock(rwb))
+		goto out_unlock;
 
-	/* Block until all readers have left the critical section. */
-	for (; atomic_read(&rwb->readers);) {
+	rwbase_set_and_save_current_state(state);
+	for (;;) {
 		/* Optimized out for rwlocks */
 		if (rwbase_signal_pending_state(state, current)) {
-			__set_current_state(TASK_RUNNING);
+			rwbase_restore_current_state();
 			__rwbase_write_unlock(rwb, 0, flags);
 			return -EINTR;
 		}
+
+		if (__rwbase_write_trylock(rwb))
+			break;
+
 		raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
+		rwbase_schedule();
+		raw_spin_lock_irqsave(&rtm->wait_lock, flags);
 
-		/*
-		 * Schedule and wait for the readers to leave the critical
-		 * section. The last reader leaving it wakes the waiter.
-		 */
-		if (atomic_read(&rwb->readers) != 0)
-			rwbase_schedule();
 		set_current_state(state);
-		raw_spin_lock_irqsave(&rtm->wait_lock, flags);
 	}
-
-	atomic_set(&rwb->readers, WRITER_BIAS);
 	rwbase_restore_current_state();
+
+out_unlock:
 	raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
 	return 0;
 }
@@ -253,8 +279,7 @@ static inline int rwbase_write_trylock(struct rwbase_rt *rwb)
 	atomic_sub(READER_BIAS, &rwb->readers);
 
 	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
-	if (!atomic_read(&rwb->readers)) {
-		atomic_set(&rwb->readers, WRITER_BIAS);
+	if (__rwbase_write_trylock(rwb)) {
 		raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
 		return 1;
 	}


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [GIT pull] perf/urgent for v5.15-rc2
  2021-09-19 18:28 [GIT pull] locking/urgent for v5.15-rc2 Thomas Gleixner
@ 2021-09-19 18:28 ` Thomas Gleixner
  2021-09-19 20:36   ` pr-tracker-bot
  2021-09-19 18:28 ` [GIT pull] x86/urgent " Thomas Gleixner
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2021-09-19 18:28 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, x86

Linus,

please pull the latest perf/urgent branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-2021-09-19

up to:  b89a05b21f46: events: Reuse value read using READ_ONCE instead of re-reading it


A single fix for the perf core where a value read with READ_ONCE() was
checked and then reread which makes all the checks invalid. Reuse the
already read value instead.

Thanks,

	tglx

------------------>
Baptiste Lepers (1):
      events: Reuse value read using READ_ONCE instead of re-reading it


 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 744e8726c5b2..0c000cb01eeb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10193,7 +10193,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
 		return;
 
 	if (ifh->nr_file_filters) {
-		mm = get_task_mm(event->ctx->task);
+		mm = get_task_mm(task);
 		if (!mm)
 			goto restart;
 


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [GIT pull] x86/urgent for v5.15-rc2
  2021-09-19 18:28 [GIT pull] locking/urgent for v5.15-rc2 Thomas Gleixner
  2021-09-19 18:28 ` [GIT pull] perf/urgent " Thomas Gleixner
@ 2021-09-19 18:28 ` Thomas Gleixner
  2021-09-19 18:39   ` Borislav Petkov
  2021-09-19 20:10 ` [GIT pull] locking/urgent " Linus Torvalds
  2021-09-19 20:36 ` pr-tracker-bot
  3 siblings, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2021-09-19 18:28 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, x86

Linus,

please pull the latest x86/urgent branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-urgent-2021-09-19

up to:  1c1046581f1a: x86/setup: Call early_reserve_memory() earlier


A set of x86 fixes:

  - Prevent a infinite loop in the MCE recovery on return to user space,
    which was caused by a second MCE queueing work for the same page and
    thereby creating a circular work list.

  - Make kern_addr_valid() handle existing PMD entries, which are marked not
    present in the higher level page table, correctly instead of blindly
    dereferencing them.

  - Invoke early_reserve_memory() earlier as the current place is too late
    for Xen dom0.

  - Pass a valid address to sanitize_phys(). This was caused by the mixture
    of inclusive and exclusive ranges. memtype_reserve() expect 'end' being
    exclusive, but sanitize_phys() wants it inclusive. This worked so far,
    but with end being the end of the physical address space the fail is
    exposed. 

 - Increase the maximum supported GPIO numbers for 64bit. Newer SoCs exceed
   the previous maximum.

Thanks,

	tglx

------------------>
Andy Shevchenko (1):
      x86/platform: Increase maximum GPIO number for X86_64

Jeff Moyer (1):
      x86/pat: Pass valid address to sanitize_phys()

Juergen Gross (1):
      x86/setup: Call early_reserve_memory() earlier

Mike Rapoport (1):
      x86/mm: Fix kern_addr_valid() to cope with existing but not present entries

Tony Luck (1):
      x86/mce: Avoid infinite loop for copy from user recovery


 arch/x86/Kconfig               |  5 +++++
 arch/x86/kernel/cpu/mce/core.c | 43 +++++++++++++++++++++++++++++++-----------
 arch/x86/kernel/setup.c        | 26 +++++++++++++------------
 arch/x86/mm/init_64.c          |  6 +++---
 arch/x86/mm/pat/memtype.c      |  7 ++++++-
 include/linux/sched.h          |  1 +
 6 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 421fa9e38c60..10163887c5eb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -340,6 +340,11 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK
 config ARCH_HIBERNATION_POSSIBLE
 	def_bool y
 
+config ARCH_NR_GPIO
+	int
+	default 1024 if X86_64
+	default 512
+
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 8cb7816d03b4..193204aee880 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1253,6 +1253,9 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
 
 static void kill_me_now(struct callback_head *ch)
 {
+	struct task_struct *p = container_of(ch, struct task_struct, mce_kill_me);
+
+	p->mce_count = 0;
 	force_sig(SIGBUS);
 }
 
@@ -1262,6 +1265,7 @@ static void kill_me_maybe(struct callback_head *cb)
 	int flags = MF_ACTION_REQUIRED;
 	int ret;
 
+	p->mce_count = 0;
 	pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
 
 	if (!p->mce_ripv)
@@ -1290,17 +1294,34 @@ static void kill_me_maybe(struct callback_head *cb)
 	}
 }
 
-static void queue_task_work(struct mce *m, int kill_current_task)
+static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
 {
-	current->mce_addr = m->addr;
-	current->mce_kflags = m->kflags;
-	current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
-	current->mce_whole_page = whole_page(m);
+	int count = ++current->mce_count;
 
-	if (kill_current_task)
-		current->mce_kill_me.func = kill_me_now;
-	else
-		current->mce_kill_me.func = kill_me_maybe;
+	/* First call, save all the details */
+	if (count == 1) {
+		current->mce_addr = m->addr;
+		current->mce_kflags = m->kflags;
+		current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
+		current->mce_whole_page = whole_page(m);
+
+		if (kill_current_task)
+			current->mce_kill_me.func = kill_me_now;
+		else
+			current->mce_kill_me.func = kill_me_maybe;
+	}
+
+	/* Ten is likely overkill. Don't expect more than two faults before task_work() */
+	if (count > 10)
+		mce_panic("Too many consecutive machine checks while accessing user data", m, msg);
+
+	/* Second or later call, make sure page address matches the one from first call */
+	if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
+		mce_panic("Consecutive machine checks to different user pages", m, msg);
+
+	/* Do not call task_work_add() more than once */
+	if (count > 1)
+		return;
 
 	task_work_add(current, &current->mce_kill_me, TWA_RESUME);
 }
@@ -1438,7 +1459,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		/* If this triggers there is no way to recover. Die hard. */
 		BUG_ON(!on_thread_stack() || !user_mode(regs));
 
-		queue_task_work(&m, kill_current_task);
+		queue_task_work(&m, msg, kill_current_task);
 
 	} else {
 		/*
@@ -1456,7 +1477,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		}
 
 		if (m.kflags & MCE_IN_KERNEL_COPYIN)
-			queue_task_work(&m, kill_current_task);
+			queue_task_work(&m, msg, kill_current_task);
 	}
 out:
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bff3a784aec5..90951587a247 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -766,6 +766,20 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
 
 void __init setup_arch(char **cmdline_p)
 {
+	/*
+	 * Do some memory reservations *before* memory is added to memblock, so
+	 * memblock allocations won't overwrite it.
+	 *
+	 * After this point, everything still needed from the boot loader or
+	 * firmware or kernel text should be early reserved or marked not RAM in
+	 * e820. All other memory is free game.
+	 *
+	 * This call needs to happen before e820__memory_setup() which calls
+	 * xen_memory_setup() on Xen dom0 which relies on the fact that those
+	 * early reservations have happened already.
+	 */
+	early_reserve_memory();
+
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 
@@ -885,18 +899,6 @@ void __init setup_arch(char **cmdline_p)
 
 	parse_early_param();
 
-	/*
-	 * Do some memory reservations *before* memory is added to
-	 * memblock, so memblock allocations won't overwrite it.
-	 * Do it after early param, so we could get (unlikely) panic from
-	 * serial.
-	 *
-	 * After this point everything still needed from the boot loader or
-	 * firmware or kernel text should be early reserved or marked not
-	 * RAM in e820. All other memory is free game.
-	 */
-	early_reserve_memory();
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/*
 	 * Memory used by the kernel cannot be hot-removed because Linux
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ddeaba947eb3..879886c6cc53 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1433,18 +1433,18 @@ int kern_addr_valid(unsigned long addr)
 		return 0;
 
 	p4d = p4d_offset(pgd, addr);
-	if (p4d_none(*p4d))
+	if (!p4d_present(*p4d))
 		return 0;
 
 	pud = pud_offset(p4d, addr);
-	if (pud_none(*pud))
+	if (!pud_present(*pud))
 		return 0;
 
 	if (pud_large(*pud))
 		return pfn_valid(pud_pfn(*pud));
 
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	if (!pmd_present(*pmd))
 		return 0;
 
 	if (pmd_large(*pmd))
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 3112ca7786ed..4ba2a3ee4bce 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -583,7 +583,12 @@ int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type,
 	int err = 0;
 
 	start = sanitize_phys(start);
-	end = sanitize_phys(end);
+
+	/*
+	 * The end address passed into this function is exclusive, but
+	 * sanitize_phys() expects an inclusive address.
+	 */
+	end = sanitize_phys(end - 1) + 1;
 	if (start >= end) {
 		WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__,
 				start, end - 1, cattr_name(req_type));
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1780260f237b..361c7bc72cbb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1468,6 +1468,7 @@ struct task_struct {
 					mce_whole_page : 1,
 					__mce_reserved : 62;
 	struct callback_head		mce_kill_me;
+	int				mce_count;
 #endif
 
 #ifdef CONFIG_KRETPROBES


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [GIT pull] x86/urgent for v5.15-rc2
  2021-09-19 18:28 ` [GIT pull] x86/urgent " Thomas Gleixner
@ 2021-09-19 18:39   ` Borislav Petkov
  2021-09-19 18:47     ` [GIT PULL] Updated " Borislav Petkov
  0 siblings, 1 reply; 11+ messages in thread
From: Borislav Petkov @ 2021-09-19 18:39 UTC (permalink / raw)
  To: Thomas Gleixner, Linus Torvalds; +Cc: linux-kernel, x86

Hi Linus,

On Sun, Sep 19, 2021 at 08:28:10PM +0200, Thomas Gleixner wrote:
> Juergen Gross (1):
>       x86/setup: Call early_reserve_memory() earlier

this one has been reported as failing on some machines:

https://lkml.kernel.org/r/4422257385dbee913eb5270bda5fded7fbb993ab.camel@gmx.de

please hold off on merging this.

I'm preparing a new pull without it.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [GIT PULL] Updated x86/urgent for v5.15-rc2
  2021-09-19 18:39   ` Borislav Petkov
@ 2021-09-19 18:47     ` Borislav Petkov
  2021-09-19 20:36       ` pr-tracker-bot
  0 siblings, 1 reply; 11+ messages in thread
From: Borislav Petkov @ 2021-09-19 18:47 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Thomas Gleixner, linux-kernel, x86

Hi Linus,

here's the updated x86/urgent set of changes for 5.15-rc2.

Please merge this one instead with the broken patch removed.

Thx.

---

The following changes since commit 8596e589b787732c8346f0482919e83cc9362db1:

  Merge tag 'timers-core-2021-08-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (2021-08-30 15:31:33 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git tags/x86_urgent_for_v5.15_rc2

for you to fetch changes up to 81065b35e2486c024c7aa86caed452e1f01a59d4:

  x86/mce: Avoid infinite loop for copy from user recovery (2021-09-14 10:27:03 +0200)

----------------------------------------------------------------
A set of x86 fixes:

  - Prevent a infinite loop in the MCE recovery on return to user space,
    which was caused by a second MCE queueing work for the same page and
    thereby creating a circular work list.

  - Make kern_addr_valid() handle existing PMD entries, which are marked not
    present in the higher level page table, correctly instead of blindly
    dereferencing them.

  - Pass a valid address to sanitize_phys(). This was caused by the mixture
    of inclusive and exclusive ranges. memtype_reserve() expect 'end' being
    exclusive, but sanitize_phys() wants it inclusive. This worked so far,
    but with end being the end of the physical address space the fail is
    exposed.

 - Increase the maximum supported GPIO numbers for 64bit. Newer SoCs exceed
   the previous maximum.

----------------------------------------------------------------
Andy Shevchenko (1):
      x86/platform: Increase maximum GPIO number for X86_64

Jeff Moyer (1):
      x86/pat: Pass valid address to sanitize_phys()

Mike Rapoport (1):
      x86/mm: Fix kern_addr_valid() to cope with existing but not present entries

Tony Luck (1):
      x86/mce: Avoid infinite loop for copy from user recovery

 arch/x86/Kconfig               |  5 +++++
 arch/x86/kernel/cpu/mce/core.c | 43 +++++++++++++++++++++++++++++++-----------
 arch/x86/mm/init_64.c          |  6 +++---
 arch/x86/mm/pat/memtype.c      |  7 ++++++-
 include/linux/sched.h          |  1 +
 5 files changed, 47 insertions(+), 15 deletions(-)
-- 
Regards/Gruss,
    Boris.

SUSE Software Solutions Germany GmbH, GF: Felix Imendörffer, HRB 36809, AG Nürnberg
-- 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [GIT pull] locking/urgent for v5.15-rc2
  2021-09-19 18:28 [GIT pull] locking/urgent for v5.15-rc2 Thomas Gleixner
  2021-09-19 18:28 ` [GIT pull] perf/urgent " Thomas Gleixner
  2021-09-19 18:28 ` [GIT pull] x86/urgent " Thomas Gleixner
@ 2021-09-19 20:10 ` Linus Torvalds
  2021-09-20 11:27   ` Peter Zijlstra
  2021-09-19 20:36 ` pr-tracker-bot
  3 siblings, 1 reply; 11+ messages in thread
From: Linus Torvalds @ 2021-09-19 20:10 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Linux Kernel Mailing List, the arch/x86 maintainers

[-- Attachment #1: Type: text/plain, Size: 850 bytes --]

On Sun, Sep 19, 2021 at 11:28 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
>
> -       atomic_add(READER_BIAS - bias, &rwb->readers);
> +       /*
> +        * _release() is needed in case that reader is in fast path, pairing
> +        * with atomic_try_cmpxchg() in rwbase_read_trylock(), provides RELEASE
> +        */
> +       (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers);

Ugh. This really needs fixing.

atomic_add() is already much more than release-ordered on x86, and
atomic_add_return_release() is much more expensive on some uarchs.

I think it should be easy to add a atomic_add_release() function, and
it might be as simple as the attached patch, allowing architectures to
add their own arch_atomic_add_release() as needed.

I've pulled this, but please don't do things like the above hack.

            Linus

[-- Attachment #2: patch.diff --]
[-- Type: text/x-patch, Size: 1855 bytes --]

 arch/x86/include/asm/atomic.h               | 2 ++
 include/linux/atomic/atomic-arch-fallback.h | 4 ++++
 include/linux/atomic/atomic-instrumented.h  | 7 +++++++
 3 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 5e754e895767..6c5814177c73 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -55,6 +55,8 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
 		     : "ir" (i) : "memory");
 }
 
+#define arch_atomic_add_release arch_atomic_add
+
 /**
  * arch_atomic_sub - subtract integer from atomic variable
  * @i: integer value to subtract
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index a3dba31df01e..ae437d961bd1 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -165,6 +165,10 @@ arch_atomic_set_release(atomic_t *v, int i)
 #define arch_atomic_set_release arch_atomic_set_release
 #endif
 
+#ifndef arch_atomic_add_release
+#define arch_atomic_add_release (void)arch_atomic_add_return_release
+#endif
+
 #ifndef arch_atomic_add_return_relaxed
 #define arch_atomic_add_return_acquire arch_atomic_add_return
 #define arch_atomic_add_return_release arch_atomic_add_return
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index a0f654370da3..485b89804b9d 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -56,6 +56,13 @@ atomic_add(int i, atomic_t *v)
 	arch_atomic_add(i, v);
 }
 
+static __always_inline void
+atomic_add_release(int i, atomic_t *v)
+{
+	instrument_atomic_read_write(v, sizeof(*v));
+	arch_atomic_add_release(i, v);
+}
+
 static __always_inline int
 atomic_add_return(int i, atomic_t *v)
 {

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [GIT pull] perf/urgent for v5.15-rc2
  2021-09-19 18:28 ` [GIT pull] perf/urgent " Thomas Gleixner
@ 2021-09-19 20:36   ` pr-tracker-bot
  0 siblings, 0 replies; 11+ messages in thread
From: pr-tracker-bot @ 2021-09-19 20:36 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Linus Torvalds, linux-kernel, x86

The pull request you sent on Sun, 19 Sep 2021 20:28:09 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-2021-09-19

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/fec3036200b7d9df32c94eb2616447eb2f6e09ac

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [GIT pull] locking/urgent for v5.15-rc2
  2021-09-19 18:28 [GIT pull] locking/urgent for v5.15-rc2 Thomas Gleixner
                   ` (2 preceding siblings ...)
  2021-09-19 20:10 ` [GIT pull] locking/urgent " Linus Torvalds
@ 2021-09-19 20:36 ` pr-tracker-bot
  3 siblings, 0 replies; 11+ messages in thread
From: pr-tracker-bot @ 2021-09-19 20:36 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Linus Torvalds, linux-kernel, x86

The pull request you sent on Sun, 19 Sep 2021 20:28:07 +0200 (CEST):

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking-urgent-2021-09-19

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/f5e29a26c42b2c1b149118319a03bf937f168298

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [GIT PULL] Updated x86/urgent for v5.15-rc2
  2021-09-19 18:47     ` [GIT PULL] Updated " Borislav Petkov
@ 2021-09-19 20:36       ` pr-tracker-bot
  0 siblings, 0 replies; 11+ messages in thread
From: pr-tracker-bot @ 2021-09-19 20:36 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Linus Torvalds, Thomas Gleixner, linux-kernel, x86

The pull request you sent on Sun, 19 Sep 2021 20:47:09 +0200:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git tags/x86_urgent_for_v5.15_rc2

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/20621d2f27a0163b81dc2b74fd4c0b3e6aa5fa12

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [GIT pull] locking/urgent for v5.15-rc2
  2021-09-19 20:10 ` [GIT pull] locking/urgent " Linus Torvalds
@ 2021-09-20 11:27   ` Peter Zijlstra
  2021-09-20 15:23     ` Linus Torvalds
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2021-09-20 11:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Thomas Gleixner, Linux Kernel Mailing List, the arch/x86 maintainers

On Sun, Sep 19, 2021 at 01:10:45PM -0700, Linus Torvalds wrote:
> On Sun, Sep 19, 2021 at 11:28 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> >
> >
> > -       atomic_add(READER_BIAS - bias, &rwb->readers);
> > +       /*
> > +        * _release() is needed in case that reader is in fast path, pairing
> > +        * with atomic_try_cmpxchg() in rwbase_read_trylock(), provides RELEASE
> > +        */
> > +       (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers);
> 
> Ugh. This really needs fixing.
> 
> atomic_add() is already much more than release-ordered on x86, and
> atomic_add_return_release() is much more expensive on some uarchs.

Is that XADD really more expensive? I don't think I've ever seen that.
Anyway, sure, we can go create atomic_{add,sub}_release() and replace
the few occurences we have of this (a quick grep shows this is the 3rd
such).

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [GIT pull] locking/urgent for v5.15-rc2
  2021-09-20 11:27   ` Peter Zijlstra
@ 2021-09-20 15:23     ` Linus Torvalds
  0 siblings, 0 replies; 11+ messages in thread
From: Linus Torvalds @ 2021-09-20 15:23 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Linux Kernel Mailing List, the arch/x86 maintainers

On Mon, Sep 20, 2021 at 4:27 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> Is that XADD really more expensive? I don't think I've ever seen that.

Hmm. Looking at newer tables from Agner Fog, it doesn't seem to be the
big difference it used to be, so maybe not a big deal. It shows up as
many more ups, but the lock synchronization ends up dominating.

I just remember from when xadd weas new, and it was noticeably slower.

                 Linus

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-09-20 15:27 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-19 18:28 [GIT pull] locking/urgent for v5.15-rc2 Thomas Gleixner
2021-09-19 18:28 ` [GIT pull] perf/urgent " Thomas Gleixner
2021-09-19 20:36   ` pr-tracker-bot
2021-09-19 18:28 ` [GIT pull] x86/urgent " Thomas Gleixner
2021-09-19 18:39   ` Borislav Petkov
2021-09-19 18:47     ` [GIT PULL] Updated " Borislav Petkov
2021-09-19 20:36       ` pr-tracker-bot
2021-09-19 20:10 ` [GIT pull] locking/urgent " Linus Torvalds
2021-09-20 11:27   ` Peter Zijlstra
2021-09-20 15:23     ` Linus Torvalds
2021-09-19 20:36 ` pr-tracker-bot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.