All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/6] smp: irq_work / smp_call_function rework
@ 2020-10-28 11:07 Peter Zijlstra
  2020-10-28 11:07 ` [PATCH v3 1/6] irq_work: Cleanup Peter Zijlstra
                   ` (5 more replies)
  0 siblings, 6 replies; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 11:07 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot, peterz

Hi,

These are the non-controversial irq_work / smp_call_function integration /
cleanup patches.

There's one RFC patch to use the new irq_work_queue_remote() which seems to
survive rcutorture.

Please consider.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 1/6] irq_work: Cleanup
  2020-10-28 11:07 [PATCH v3 0/6] smp: irq_work / smp_call_function rework Peter Zijlstra
@ 2020-10-28 11:07 ` Peter Zijlstra
  2020-10-28 13:01   ` Frederic Weisbecker
  2020-10-28 11:07 ` [PATCH v3 2/6] smp: Cleanup smp_call_function*() Peter Zijlstra
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 11:07 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot, peterz

Get rid of the __call_single_node union and clean up the API a little
to avoid external code relying on the structure layout as much.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/gpu/drm/i915/i915_request.c |    4 ++--
 include/linux/irq_work.h            |   33 +++++++++++++++++++++------------
 include/linux/irqflags.h            |    4 ++--
 kernel/bpf/stackmap.c               |    2 +-
 kernel/irq_work.c                   |   18 +++++++++---------
 kernel/printk/printk.c              |    6 ++----
 kernel/rcu/tree.c                   |    3 +--
 kernel/time/tick-sched.c            |    6 ++----
 kernel/trace/bpf_trace.c            |    2 +-
 9 files changed, 41 insertions(+), 37 deletions(-)

--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -197,7 +197,7 @@ __notify_execute_cb(struct i915_request
 
 	llist_for_each_entry_safe(cb, cn,
 				  llist_del_all(&rq->execute_cb),
-				  work.llnode)
+				  work.node.llist)
 		fn(&cb->work);
 }
 
@@ -460,7 +460,7 @@ __await_execution(struct i915_request *r
 	 * callback first, then checking the ACTIVE bit, we serialise with
 	 * the completed/retired request.
 	 */
-	if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
+	if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
 		if (i915_request_is_active(signal) ||
 		    __request_in_flight(signal))
 			__notify_execute_cb_imm(signal);
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -14,28 +14,37 @@
  */
 
 struct irq_work {
-	union {
-		struct __call_single_node node;
-		struct {
-			struct llist_node llnode;
-			atomic_t flags;
-		};
-	};
+	struct __call_single_node node;
 	void (*func)(struct irq_work *);
 };
 
+#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){	\
+	.node = { .u_flags = (_flags), },			\
+	.func = (_func),					\
+}
+
+#define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
+#define IRQ_WORK_INIT_LAZY(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_LAZY)
+#define IRQ_WORK_INIT_HARD(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_HARD_IRQ)
+
+#define DEFINE_IRQ_WORK(name, _f)				\
+	struct irq_work name = IRQ_WORK_INIT(_f)
+
 static inline
 void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
 {
-	atomic_set(&work->flags, 0);
-	work->func = func;
+	*work = IRQ_WORK_INIT(func);
 }
 
-#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = {	\
-		.flags = ATOMIC_INIT(0),			\
-		.func  = (_f)					\
+static inline bool irq_work_is_pending(struct irq_work *work)
+{
+	return atomic_read(&work->node.a_flags) & IRQ_WORK_PENDING;
 }
 
+static inline bool irq_work_is_busy(struct irq_work *work)
+{
+	return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
+}
 
 bool irq_work_queue(struct irq_work *work);
 bool irq_work_queue_on(struct irq_work *work, int cpu);
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -109,12 +109,12 @@ do {						\
 
 # define lockdep_irq_work_enter(__work)					\
 	  do {								\
-		  if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\
+		  if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\
 			current->irq_config = 1;			\
 	  } while (0)
 # define lockdep_irq_work_exit(__work)					\
 	  do {								\
-		  if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\
+		  if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\
 			current->irq_config = 0;			\
 	  } while (0)
 
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -298,7 +298,7 @@ static void stack_map_get_build_id_offse
 	if (irqs_disabled()) {
 		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
 			work = this_cpu_ptr(&up_read_work);
-			if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) {
+			if (irq_work_is_busy(&work->irq_work)) {
 				/* cannot queue more up_read, fallback */
 				irq_work_busy = true;
 			}
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_wo
 {
 	int oflags;
 
-	oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
+	oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags);
 	/*
 	 * If the work is already pending, no need to raise the IPI.
 	 * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
@@ -53,12 +53,12 @@ void __weak arch_irq_work_raise(void)
 static void __irq_work_queue_local(struct irq_work *work)
 {
 	/* If the work is "lazy", handle it from next tick if any */
-	if (atomic_read(&work->flags) & IRQ_WORK_LAZY) {
-		if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
+	if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
+		if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
 		    tick_nohz_tick_stopped())
 			arch_irq_work_raise();
 	} else {
-		if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
+		if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
 			arch_irq_work_raise();
 	}
 }
@@ -102,7 +102,7 @@ bool irq_work_queue_on(struct irq_work *
 	if (cpu != smp_processor_id()) {
 		/* Arch remote IPI send/receive backend aren't NMI safe */
 		WARN_ON_ONCE(in_nmi());
-		__smp_call_single_queue(cpu, &work->llnode);
+		__smp_call_single_queue(cpu, &work->node.llist);
 	} else {
 		__irq_work_queue_local(work);
 	}
@@ -142,7 +142,7 @@ void irq_work_single(void *arg)
 	 * to claim that work don't rely on us to handle their data
 	 * while we are in the middle of the func.
 	 */
-	flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
+	flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->node.a_flags);
 
 	lockdep_irq_work_enter(work);
 	work->func(work);
@@ -152,7 +152,7 @@ void irq_work_single(void *arg)
 	 * no-one else claimed it meanwhile.
 	 */
 	flags &= ~IRQ_WORK_PENDING;
-	(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
+	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
 }
 
 static void irq_work_run_list(struct llist_head *list)
@@ -166,7 +166,7 @@ static void irq_work_run_list(struct lli
 		return;
 
 	llnode = llist_del_all(list);
-	llist_for_each_entry_safe(work, tmp, llnode, llnode)
+	llist_for_each_entry_safe(work, tmp, llnode, node.llist)
 		irq_work_single(work);
 }
 
@@ -198,7 +198,7 @@ void irq_work_sync(struct irq_work *work
 {
 	lockdep_assert_irqs_enabled();
 
-	while (atomic_read(&work->flags) & IRQ_WORK_BUSY)
+	while (irq_work_is_busy(work))
 		cpu_relax();
 }
 EXPORT_SYMBOL_GPL(irq_work_sync);
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3025,10 +3025,8 @@ static void wake_up_klogd_work_func(stru
 		wake_up_interruptible(&log_wait);
 }
 
-static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
-	.func = wake_up_klogd_work_func,
-	.flags = ATOMIC_INIT(IRQ_WORK_LAZY),
-};
+static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
+	IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func);
 
 void wake_up_klogd(void)
 {
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1311,8 +1311,6 @@ static int rcu_implicit_dynticks_qs(stru
 		if (IS_ENABLED(CONFIG_IRQ_WORK) &&
 		    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
 		    (rnp->ffmask & rdp->grpmask)) {
-			init_irq_work(&rdp->rcu_iw, rcu_iw_handler);
-			atomic_set(&rdp->rcu_iw.flags, IRQ_WORK_HARD_IRQ);
 			rdp->rcu_iw_pending = true;
 			rdp->rcu_iw_gp_seq = rnp->gp_seq;
 			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
@@ -3964,6 +3962,7 @@ int rcutree_prepare_cpu(unsigned int cpu
 	rdp->cpu_no_qs.b.norm = true;
 	rdp->core_needs_qs = false;
 	rdp->rcu_iw_pending = false;
+	rdp->rcu_iw = IRQ_WORK_INIT_HARD(rcu_iw_handler);
 	rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;
 	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -243,10 +243,8 @@ static void nohz_full_kick_func(struct i
 	/* Empty, the tick restart happens on tick_nohz_irq_exit() */
 }
 
-static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
-	.func = nohz_full_kick_func,
-	.flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ),
-};
+static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) =
+	IRQ_WORK_INIT_HARD(nohz_full_kick_func);
 
 /*
  * Kick this CPU if it's full dynticks in order to force it to
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1086,7 +1086,7 @@ static int bpf_send_signal_common(u32 si
 			return -EINVAL;
 
 		work = this_cpu_ptr(&send_signal_work);
-		if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY)
+		if (irq_work_is_busy(&work->irq_work))
 			return -EBUSY;
 
 		/* Add the current task, which is the target of sending signal,



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 2/6] smp: Cleanup smp_call_function*()
  2020-10-28 11:07 [PATCH v3 0/6] smp: irq_work / smp_call_function rework Peter Zijlstra
  2020-10-28 11:07 ` [PATCH v3 1/6] irq_work: Cleanup Peter Zijlstra
@ 2020-10-28 11:07 ` Peter Zijlstra
  2020-10-28 13:25   ` Frederic Weisbecker
  2020-10-28 11:07 ` [PATCH v3 3/6] irq_work: Optimize irq_work_single() Peter Zijlstra
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 11:07 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot, peterz

Get rid of the __call_single_node union and cleanup the API a little
to avoid external code relying on the structure layout as much.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/mips/kernel/process.c                      |    5 +-
 arch/mips/kernel/smp.c                          |   25 ++----------
 arch/s390/pci/pci_irq.c                         |    4 -
 arch/x86/kernel/cpuid.c                         |    7 +--
 arch/x86/lib/msr-smp.c                          |    7 +--
 block/blk-mq.c                                  |    4 -
 drivers/cpuidle/coupled.c                       |    3 -
 drivers/net/ethernet/cavium/liquidio/lio_core.c |    9 ----
 include/linux/smp.h                             |   19 ++++-----
 kernel/debug/debug_core.c                       |    6 +-
 kernel/sched/core.c                             |   12 -----
 kernel/smp.c                                    |   50 ++++++++++++------------
 net/core/dev.c                                  |    3 -
 13 files changed, 60 insertions(+), 94 deletions(-)

--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -702,7 +702,6 @@ unsigned long arch_align_stack(unsigned
 	return sp & ALMASK;
 }
 
-static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
 static struct cpumask backtrace_csd_busy;
 
 static void handle_backtrace(void *info)
@@ -711,6 +710,9 @@ static void handle_backtrace(void *info)
 	cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy);
 }
 
+static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) =
+	CSD_INIT(handle_backtrace, NULL);
+
 static void raise_backtrace(cpumask_t *mask)
 {
 	call_single_data_t *csd;
@@ -730,7 +732,6 @@ static void raise_backtrace(cpumask_t *m
 		}
 
 		csd = &per_cpu(backtrace_csd, cpu);
-		csd->func = handle_backtrace;
 		smp_call_function_single_async(cpu, csd);
 	}
 }
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -687,36 +687,23 @@ EXPORT_SYMBOL(flush_tlb_one);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 
-static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
-
-void tick_broadcast(const struct cpumask *mask)
-{
-	call_single_data_t *csd;
-	int cpu;
-
-	for_each_cpu(cpu, mask) {
-		csd = &per_cpu(tick_broadcast_csd, cpu);
-		smp_call_function_single_async(cpu, csd);
-	}
-}
-
 static void tick_broadcast_callee(void *info)
 {
 	tick_receive_broadcast();
 }
 
-static int __init tick_broadcast_init(void)
+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd) =
+	CSD_INIT(tick_broadcast_callee, NULL);
+
+void tick_broadcast(const struct cpumask *mask)
 {
 	call_single_data_t *csd;
 	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_cpu(cpu, mask) {
 		csd = &per_cpu(tick_broadcast_csd, cpu);
-		csd->func = tick_broadcast_callee;
+		smp_call_function_single_async(cpu, csd);
 	}
-
-	return 0;
 }
-early_initcall(tick_broadcast_init);
 
 #endif /* CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(voi
 		if (atomic_inc_return(&cpu_data->scheduled) > 1)
 			continue;
 
-		cpu_data->csd.func = zpci_handle_remote_irq;
-		cpu_data->csd.info = &cpu_data->scheduled;
-		cpu_data->csd.flags = 0;
+		INIT_CSD(&cpu_data->csd, zpci_handle_remote_irq, &cpu_data->scheduled);
 		smp_call_function_single_async(cpu, &cpu_data->csd);
 	}
 }
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -74,10 +74,9 @@ static ssize_t cpuid_read(struct file *f
 
 	init_completion(&cmd.done);
 	for (; count; count -= 16) {
-		call_single_data_t csd = {
-			.func = cpuid_smp_cpuid,
-			.info = &cmd,
-		};
+		call_single_data_t csd;
+
+		INIT_CSD(&csd, cpuid_smp_cpuid, &cmd);
 
 		cmd.regs.eax = pos;
 		cmd.regs.ecx = pos >> 32;
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -169,12 +169,11 @@ static void __wrmsr_safe_on_cpu(void *in
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
 	struct msr_info_completion rv;
-	call_single_data_t csd = {
-		.func	= __rdmsr_safe_on_cpu,
-		.info	= &rv,
-	};
+	call_single_data_t csd;
 	int err;
 
+	INIT_CSD(&csd, __rdmsr_safe_on_cpu, &rv);
+
 	memset(&rv, 0, sizeof(rv));
 	init_completion(&rv.done);
 	rv.msr.msr_no = msr_no;
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -671,9 +671,7 @@ bool blk_mq_complete_request_remote(stru
 		return false;
 
 	if (blk_mq_complete_need_ipi(rq)) {
-		rq->csd.func = __blk_mq_complete_request_remote;
-		rq->csd.info = rq;
-		rq->csd.flags = 0;
+		INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
 		smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
 	} else {
 		if (rq->q->nr_hw_queues > 1)
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -674,8 +674,7 @@ int cpuidle_coupled_register_device(stru
 	coupled->refcnt++;
 
 	csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
-	csd->func = cpuidle_coupled_handle_poke;
-	csd->info = (void *)(unsigned long)dev->cpu;
+	INIT_CSD(csd, cpuidle_coupled_handle_poke, (void *)(unsigned long)dev->cpu);
 
 	return 0;
 }
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -729,13 +729,8 @@ static void liquidio_napi_drv_callback(v
 	    droq->cpu_id == this_cpu) {
 		napi_schedule_irqoff(&droq->napi);
 	} else {
-		call_single_data_t *csd = &droq->csd;
-
-		csd->func = napi_schedule_wrapper;
-		csd->info = &droq->napi;
-		csd->flags = 0;
-
-		smp_call_function_single_async(droq->cpu_id, csd);
+		INIT_CSD(&droq->csd, napi_schedule_wrapper, &droq->napi);
+		smp_call_function_single_async(droq->cpu_id, &droq->csd);
 	}
 }
 
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -21,24 +21,23 @@ typedef bool (*smp_cond_func_t)(int cpu,
  * structure shares (partial) layout with struct irq_work
  */
 struct __call_single_data {
-	union {
-		struct __call_single_node node;
-		struct {
-			struct llist_node llist;
-			unsigned int flags;
-#ifdef CONFIG_64BIT
-			u16 src, dst;
-#endif
-		};
-	};
+	struct __call_single_node node;
 	smp_call_func_t func;
 	void *info;
 };
 
+#define CSD_INIT(_func, _info) \
+	(struct __call_single_data){ .func = (_func), .info = (_info), }
+
 /* Use __aligned() to avoid to use 2 cache lines for 1 csd */
 typedef struct __call_single_data call_single_data_t
 	__aligned(sizeof(struct __call_single_data));
 
+#define INIT_CSD(_csd, _func, _info)		\
+do {						\
+	*(_csd) = CSD_INIT((_func), (_info));	\
+} while (0)
+
 /*
  * Enqueue a llist_node on the call_single_queue; be very careful, read
  * flush_smp_call_function_queue() in detail.
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -225,8 +225,6 @@ NOKPROBE_SYMBOL(kgdb_skipexception);
  * Default (weak) implementation for kgdb_roundup_cpus
  */
 
-static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd);
-
 void __weak kgdb_call_nmi_hook(void *ignored)
 {
 	/*
@@ -241,6 +239,9 @@ void __weak kgdb_call_nmi_hook(void *ign
 }
 NOKPROBE_SYMBOL(kgdb_call_nmi_hook);
 
+static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) =
+	CSD_INIT(kgdb_call_nmi_hook, NULL);
+
 void __weak kgdb_roundup_cpus(void)
 {
 	call_single_data_t *csd;
@@ -267,7 +268,6 @@ void __weak kgdb_roundup_cpus(void)
 			continue;
 		kgdb_info[cpu].rounding_up = true;
 
-		csd->func = kgdb_call_nmi_hook;
 		ret = smp_call_function_single_async(cpu, csd);
 		if (ret)
 			kgdb_info[cpu].rounding_up = false;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -320,14 +320,6 @@ void update_rq_clock(struct rq *rq)
 	update_rq_clock_task(rq, delta);
 }
 
-static inline void
-rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func)
-{
-	csd->flags = 0;
-	csd->func = func;
-	csd->info = rq;
-}
-
 #ifdef CONFIG_SCHED_HRTICK
 /*
  * Use HR-timers to deliver accurate preemption points.
@@ -428,7 +420,7 @@ void hrtick_start(struct rq *rq, u64 del
 static void hrtick_rq_init(struct rq *rq)
 {
 #ifdef CONFIG_SMP
-	rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start);
+	INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq);
 #endif
 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
 	rq->hrtick_timer.function = hrtick;
@@ -7188,7 +7180,7 @@ void __init sched_init(void)
 		rq->last_blocked_load_update_tick = jiffies;
 		atomic_set(&rq->nohz_flags, 0);
 
-		rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
+		INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq);
 #endif
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -27,7 +27,7 @@
 #include "smpboot.h"
 #include "sched/smp.h"
 
-#define CSD_TYPE(_csd)	((_csd)->flags & CSD_FLAG_TYPE_MASK)
+#define CSD_TYPE(_csd)	((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
 
 struct call_function_data {
 	call_single_data_t	__percpu *csd;
@@ -146,7 +146,7 @@ static __always_inline bool csd_lock_wai
 	bool firsttime;
 	u64 ts2, ts_delta;
 	call_single_data_t *cpu_cur_csd;
-	unsigned int flags = READ_ONCE(csd->flags);
+	unsigned int flags = READ_ONCE(csd->node.u_flags);
 
 	if (!(flags & CSD_FLAG_LOCK)) {
 		if (!unlikely(*bug_id))
@@ -224,14 +224,14 @@ static void csd_lock_record(call_single_
 
 static __always_inline void csd_lock_wait(call_single_data_t *csd)
 {
-	smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
+	smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
 }
 #endif
 
 static __always_inline void csd_lock(call_single_data_t *csd)
 {
 	csd_lock_wait(csd);
-	csd->flags |= CSD_FLAG_LOCK;
+	csd->node.u_flags |= CSD_FLAG_LOCK;
 
 	/*
 	 * prevent CPU from reordering the above assignment
@@ -243,12 +243,12 @@ static __always_inline void csd_lock(cal
 
 static __always_inline void csd_unlock(call_single_data_t *csd)
 {
-	WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
+	WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
 
 	/*
 	 * ensure we're all done before releasing data:
 	 */
-	smp_store_release(&csd->flags, 0);
+	smp_store_release(&csd->node.u_flags, 0);
 }
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
@@ -300,7 +300,7 @@ static int generic_exec_single(int cpu,
 		return -ENXIO;
 	}
 
-	__smp_call_single_queue(cpu, &csd->llist);
+	__smp_call_single_queue(cpu, &csd->node.llist);
 
 	return 0;
 }
@@ -353,7 +353,7 @@ static void flush_smp_call_function_queu
 		 * We don't have to use the _safe() variant here
 		 * because we are not invoking the IPI handlers yet.
 		 */
-		llist_for_each_entry(csd, entry, llist) {
+		llist_for_each_entry(csd, entry, node.llist) {
 			switch (CSD_TYPE(csd)) {
 			case CSD_TYPE_ASYNC:
 			case CSD_TYPE_SYNC:
@@ -378,16 +378,16 @@ static void flush_smp_call_function_queu
 	 * First; run all SYNC callbacks, people are waiting for us.
 	 */
 	prev = NULL;
-	llist_for_each_entry_safe(csd, csd_next, entry, llist) {
+	llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
 		/* Do we wait until *after* callback? */
 		if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
 			smp_call_func_t func = csd->func;
 			void *info = csd->info;
 
 			if (prev) {
-				prev->next = &csd_next->llist;
+				prev->next = &csd_next->node.llist;
 			} else {
-				entry = &csd_next->llist;
+				entry = &csd_next->node.llist;
 			}
 
 			csd_lock_record(csd);
@@ -395,7 +395,7 @@ static void flush_smp_call_function_queu
 			csd_unlock(csd);
 			csd_lock_record(NULL);
 		} else {
-			prev = &csd->llist;
+			prev = &csd->node.llist;
 		}
 	}
 
@@ -406,14 +406,14 @@ static void flush_smp_call_function_queu
 	 * Second; run all !SYNC callbacks.
 	 */
 	prev = NULL;
-	llist_for_each_entry_safe(csd, csd_next, entry, llist) {
+	llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
 		int type = CSD_TYPE(csd);
 
 		if (type != CSD_TYPE_TTWU) {
 			if (prev) {
-				prev->next = &csd_next->llist;
+				prev->next = &csd_next->node.llist;
 			} else {
-				entry = &csd_next->llist;
+				entry = &csd_next->node.llist;
 			}
 
 			if (type == CSD_TYPE_ASYNC) {
@@ -429,7 +429,7 @@ static void flush_smp_call_function_queu
 			}
 
 		} else {
-			prev = &csd->llist;
+			prev = &csd->node.llist;
 		}
 	}
 
@@ -465,7 +465,7 @@ int smp_call_function_single(int cpu, sm
 {
 	call_single_data_t *csd;
 	call_single_data_t csd_stack = {
-		.flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC,
+		.node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
 	};
 	int this_cpu;
 	int err;
@@ -502,8 +502,8 @@ int smp_call_function_single(int cpu, sm
 	csd->func = func;
 	csd->info = info;
 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
-	csd->src = smp_processor_id();
-	csd->dst = cpu;
+	csd->node.src = smp_processor_id();
+	csd->node.dst = cpu;
 #endif
 
 	err = generic_exec_single(cpu, csd);
@@ -544,12 +544,12 @@ int smp_call_function_single_async(int c
 
 	preempt_disable();
 
-	if (csd->flags & CSD_FLAG_LOCK) {
+	if (csd->node.u_flags & CSD_FLAG_LOCK) {
 		err = -EBUSY;
 		goto out;
 	}
 
-	csd->flags = CSD_FLAG_LOCK;
+	csd->node.u_flags = CSD_FLAG_LOCK;
 	smp_wmb();
 
 	err = generic_exec_single(cpu, csd);
@@ -667,14 +667,14 @@ static void smp_call_function_many_cond(
 
 		csd_lock(csd);
 		if (wait)
-			csd->flags |= CSD_TYPE_SYNC;
+			csd->node.u_flags |= CSD_TYPE_SYNC;
 		csd->func = func;
 		csd->info = info;
 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
-		csd->src = smp_processor_id();
-		csd->dst = cpu;
+		csd->node.src = smp_processor_id();
+		csd->node.dst = cpu;
 #endif
-		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
+		if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu)))
 			__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
 	}
 
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11165,8 +11165,7 @@ static int __init net_dev_init(void)
 		INIT_LIST_HEAD(&sd->poll_list);
 		sd->output_queue_tailp = &sd->output_queue;
 #ifdef CONFIG_RPS
-		sd->csd.func = rps_trigger_softirq;
-		sd->csd.info = sd;
+		INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
 		sd->cpu = i;
 #endif
 



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 3/6] irq_work: Optimize irq_work_single()
  2020-10-28 11:07 [PATCH v3 0/6] smp: irq_work / smp_call_function rework Peter Zijlstra
  2020-10-28 11:07 ` [PATCH v3 1/6] irq_work: Cleanup Peter Zijlstra
  2020-10-28 11:07 ` [PATCH v3 2/6] smp: Cleanup smp_call_function*() Peter Zijlstra
@ 2020-10-28 11:07 ` Peter Zijlstra
  2020-10-28 12:06   ` Frederic Weisbecker
  2020-10-28 11:07 ` [PATCH v3 4/6] irq_work: Unconditionally build on SMP Peter Zijlstra
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 11:07 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot, peterz

Trade one atomic op for a full memory barrier.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/irqflags.h |    8 ++++----
 kernel/irq_work.c        |   29 +++++++++++++++++------------
 2 files changed, 21 insertions(+), 16 deletions(-)

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -88,14 +88,14 @@ do {						\
 		  current->irq_config = 0;			\
 	  } while (0)
 
-# define lockdep_irq_work_enter(__work)					\
+# define lockdep_irq_work_enter(_flags)					\
 	  do {								\
-		  if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\
+		  if (!((_flags) & IRQ_WORK_HARD_IRQ))			\
 			current->irq_config = 1;			\
 	  } while (0)
-# define lockdep_irq_work_exit(__work)					\
+# define lockdep_irq_work_exit(_flags)					\
 	  do {								\
-		  if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\
+		  if (!((_flags) & IRQ_WORK_HARD_IRQ))			\
 			current->irq_config = 0;			\
 	  } while (0)
 
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -34,7 +34,7 @@ static bool irq_work_claim(struct irq_wo
 	oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags);
 	/*
 	 * If the work is already pending, no need to raise the IPI.
-	 * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
+	 * The pairing smp_mb() in irq_work_single() makes sure
 	 * everything we did before is visible.
 	 */
 	if (oflags & IRQ_WORK_PENDING)
@@ -136,22 +136,27 @@ void irq_work_single(void *arg)
 	int flags;
 
 	/*
-	 * Clear the PENDING bit, after this point the @work
-	 * can be re-used.
-	 * Make it immediately visible so that other CPUs trying
-	 * to claim that work don't rely on us to handle their data
-	 * while we are in the middle of the func.
+	 * Clear the PENDING bit, after this point the @work can be re-used.
+	 * The PENDING bit acts as a lock, and we own it, so we can clear it
+	 * without atomic ops.
 	 */
-	flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->node.a_flags);
+	flags = atomic_read(&work->node.a_flags);
+	flags &= ~IRQ_WORK_PENDING;
+	atomic_set(&work->node.a_flags, flags);
+
+	/*
+	 * See irq_work_claim().
+	 */
+	smp_mb();
 
-	lockdep_irq_work_enter(work);
+	lockdep_irq_work_enter(flags);
 	work->func(work);
-	lockdep_irq_work_exit(work);
+	lockdep_irq_work_exit(flags);
+
 	/*
-	 * Clear the BUSY bit and return to the free state if
-	 * no-one else claimed it meanwhile.
+	 * Clear the BUSY bit, if set, and return to the free state if no-one
+	 * else claimed it meanwhile.
 	 */
-	flags &= ~IRQ_WORK_PENDING;
 	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
 }
 



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 4/6] irq_work: Unconditionally build on SMP
  2020-10-28 11:07 [PATCH v3 0/6] smp: irq_work / smp_call_function rework Peter Zijlstra
                   ` (2 preceding siblings ...)
  2020-10-28 11:07 ` [PATCH v3 3/6] irq_work: Optimize irq_work_single() Peter Zijlstra
@ 2020-10-28 11:07 ` Peter Zijlstra
  2020-10-28 13:34   ` Frederic Weisbecker
  2020-10-28 11:07 ` [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote() Peter Zijlstra
  2020-10-28 11:07 ` [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote() Peter Zijlstra
  5 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 11:07 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot, peterz


Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/Makefile   |    1 +
 kernel/irq_work.c |    3 +++
 2 files changed, 4 insertions(+)

--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -105,6 +105,7 @@ obj-$(CONFIG_TRACE_CLOCK) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
+obj-$(CONFIG_SMP) += irq_work.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
 obj-$(CONFIG_KCSAN) += kcsan/
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -20,6 +20,7 @@
 #include <linux/smp.h>
 #include <asm/processor.h>
 
+#ifdef CONFIG_IRQ_WORK
 
 static DEFINE_PER_CPU(struct llist_head, raised_list);
 static DEFINE_PER_CPU(struct llist_head, lazy_list);
@@ -212,3 +213,5 @@ void irq_work_sync(struct irq_work *work
 		cpu_relax();
 }
 EXPORT_SYMBOL_GPL(irq_work_sync);
+
+#endif /* CONFIG_IRQ_WORK */



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote()
  2020-10-28 11:07 [PATCH v3 0/6] smp: irq_work / smp_call_function rework Peter Zijlstra
                   ` (3 preceding siblings ...)
  2020-10-28 11:07 ` [PATCH v3 4/6] irq_work: Unconditionally build on SMP Peter Zijlstra
@ 2020-10-28 11:07 ` Peter Zijlstra
  2020-10-28 13:40   ` Frederic Weisbecker
  2020-10-28 11:07 ` [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote() Peter Zijlstra
  5 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 11:07 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot, peterz

While the traditional irq_work relies on the ability to self-IPI, it
makes sense to provide an unconditional irq_work_queue_remote()
interface.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/irq_work.h |   17 ++++--
 kernel/irq_work.c        |  129 ++++++++++++++++++++++++++++-------------------
 kernel/rcu/tree.c        |    5 +
 3 files changed, 94 insertions(+), 57 deletions(-)

--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -46,22 +46,29 @@ static inline bool irq_work_is_busy(stru
 	return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
 }
 
+#ifdef CONFIG_IRQ_WORK
+
 bool irq_work_queue(struct irq_work *work);
 bool irq_work_queue_on(struct irq_work *work, int cpu);
 
 void irq_work_tick(void);
 void irq_work_sync(struct irq_work *work);
 
-#ifdef CONFIG_IRQ_WORK
 #include <asm/irq_work.h>
 
 void irq_work_run(void);
 bool irq_work_needs_cpu(void);
-void irq_work_single(void *arg);
-#else
-static inline bool irq_work_needs_cpu(void) { return false; }
+
+#else /* !CONFIG_IRQ_WORK */
+
 static inline void irq_work_run(void) { }
-static inline void irq_work_single(void *arg) { }
+static inline bool irq_work_needs_cpu(void) { return false; }
+
+#endif /* CONFIG_IRQ_WORK */
+
+#ifdef CONFIG_SMP
+extern int irq_work_queue_remote(int cpu, struct irq_work *work);
+extern void irq_work_single(void *arg);
 #endif
 
 #endif /* _LINUX_IRQ_WORK_H */
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -20,10 +20,7 @@
 #include <linux/smp.h>
 #include <asm/processor.h>
 
-#ifdef CONFIG_IRQ_WORK
-
-static DEFINE_PER_CPU(struct llist_head, raised_list);
-static DEFINE_PER_CPU(struct llist_head, lazy_list);
+#if defined(CONFIG_IRQ_WORK) || defined(CONFIG_SMP)
 
 /*
  * Claim the entry so that no one else will poke at it.
@@ -43,6 +40,82 @@ static bool irq_work_claim(struct irq_wo
 	return true;
 }
 
+void irq_work_single(void *arg)
+{
+	struct irq_work *work = arg;
+	int flags;
+
+	/*
+	 * Clear the PENDING bit, after this point the @work can be re-used.
+	 * The PENDING bit acts as a lock, and we own it, so we can clear it
+	 * without atomic ops.
+	 */
+	flags = atomic_read(&work->node.a_flags);
+	flags &= ~IRQ_WORK_PENDING;
+	atomic_set(&work->node.a_flags, flags);
+
+	/*
+	 * See irq_work_claim().
+	 */
+	smp_mb();
+
+	lockdep_irq_work_enter(flags);
+	work->func(work);
+	lockdep_irq_work_exit(flags);
+
+	/*
+	 * Clear the BUSY bit, if set, and return to the free state if no-one
+	 * else claimed it meanwhile.
+	 */
+	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
+}
+
+/*
+ * Synchronize against the irq_work @entry, ensures the entry is not
+ * currently in use.
+ */
+void irq_work_sync(struct irq_work *work)
+{
+	lockdep_assert_irqs_enabled();
+
+	while (irq_work_is_busy(work))
+		cpu_relax();
+}
+EXPORT_SYMBOL_GPL(irq_work_sync);
+
+#endif /* CONFIG_IRQ_WORK || CONFIG_SMP */
+
+#ifdef CONFIG_SMP
+
+static void __irq_work_queue_remote(int cpu, struct irq_work *work)
+{
+	/* Arch remote IPI send/receive backend aren't NMI safe */
+	WARN_ON_ONCE(in_nmi());
+	__smp_call_single_queue(cpu, &work->node.llist);
+}
+
+int irq_work_queue_remote(int cpu, struct irq_work *work)
+{
+	/*
+	 * Ensures preemption is disabled in the caller.
+	 */
+	WARN_ON_ONCE(cpu == smp_processor_id());
+
+	if (!irq_work_claim(work))
+		return -EBUSY;
+
+	__irq_work_queue_remote(cpu, work);
+
+	return 0;
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_IRQ_WORK
+
+static DEFINE_PER_CPU(struct llist_head, raised_list);
+static DEFINE_PER_CPU(struct llist_head, lazy_list);
+
 void __weak arch_irq_work_raise(void)
 {
 	/*
@@ -101,9 +174,7 @@ bool irq_work_queue_on(struct irq_work *
 
 	preempt_disable();
 	if (cpu != smp_processor_id()) {
-		/* Arch remote IPI send/receive backend aren't NMI safe */
-		WARN_ON_ONCE(in_nmi());
-		__smp_call_single_queue(cpu, &work->node.llist);
+		__irq_work_queue_remote(cpu, work);
 	} else {
 		__irq_work_queue_local(work);
 	}
@@ -131,36 +202,6 @@ bool irq_work_needs_cpu(void)
 	return true;
 }
 
-void irq_work_single(void *arg)
-{
-	struct irq_work *work = arg;
-	int flags;
-
-	/*
-	 * Clear the PENDING bit, after this point the @work can be re-used.
-	 * The PENDING bit acts as a lock, and we own it, so we can clear it
-	 * without atomic ops.
-	 */
-	flags = atomic_read(&work->node.a_flags);
-	flags &= ~IRQ_WORK_PENDING;
-	atomic_set(&work->node.a_flags, flags);
-
-	/*
-	 * See irq_work_claim().
-	 */
-	smp_mb();
-
-	lockdep_irq_work_enter(flags);
-	work->func(work);
-	lockdep_irq_work_exit(flags);
-
-	/*
-	 * Clear the BUSY bit, if set, and return to the free state if no-one
-	 * else claimed it meanwhile.
-	 */
-	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
-}
-
 static void irq_work_run_list(struct llist_head *list)
 {
 	struct irq_work *work, *tmp;
@@ -196,17 +237,5 @@ void irq_work_tick(void)
 	irq_work_run_list(this_cpu_ptr(&lazy_list));
 }
 
-/*
- * Synchronize against the irq_work @entry, ensures the entry is not
- * currently in use.
- */
-void irq_work_sync(struct irq_work *work)
-{
-	lockdep_assert_irqs_enabled();
-
-	while (irq_work_is_busy(work))
-		cpu_relax();
-}
-EXPORT_SYMBOL_GPL(irq_work_sync);
-
 #endif /* CONFIG_IRQ_WORK */
+
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1308,13 +1308,14 @@ static int rcu_implicit_dynticks_qs(stru
 			resched_cpu(rdp->cpu);
 			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
 		}
-		if (IS_ENABLED(CONFIG_IRQ_WORK) &&
-		    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
+#ifdef CONFIG_IRQ_WORK
+		if (!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
 		    (rnp->ffmask & rdp->grpmask)) {
 			rdp->rcu_iw_pending = true;
 			rdp->rcu_iw_gp_seq = rnp->gp_seq;
 			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
 		}
+#endif
 	}
 
 	return 0;



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-28 11:07 [PATCH v3 0/6] smp: irq_work / smp_call_function rework Peter Zijlstra
                   ` (4 preceding siblings ...)
  2020-10-28 11:07 ` [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote() Peter Zijlstra
@ 2020-10-28 11:07 ` Peter Zijlstra
  2020-10-28 14:54   ` Peter Zijlstra
  5 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 11:07 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot, peterz

AFAICT we only need/use irq_work_queue_on() on remote CPUs, since we
can directly access local state.  So avoid the IRQ_WORK dependency and
use the unconditionally available irq_work_queue_remote().

This survives a number of TREE01 runs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/rcu/tree.c |    4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1308,14 +1308,12 @@ static int rcu_implicit_dynticks_qs(stru
 			resched_cpu(rdp->cpu);
 			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
 		}
-#ifdef CONFIG_IRQ_WORK
 		if (!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
 		    (rnp->ffmask & rdp->grpmask)) {
 			rdp->rcu_iw_pending = true;
 			rdp->rcu_iw_gp_seq = rnp->gp_seq;
-			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
+			irq_work_queue_remote(rdp->cpu, &rdp->rcu_iw);
 		}
-#endif
 	}
 
 	return 0;



^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 3/6] irq_work: Optimize irq_work_single()
  2020-10-28 11:07 ` [PATCH v3 3/6] irq_work: Optimize irq_work_single() Peter Zijlstra
@ 2020-10-28 12:06   ` Frederic Weisbecker
  0 siblings, 0 replies; 24+ messages in thread
From: Frederic Weisbecker @ 2020-10-28 12:06 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 12:07:10PM +0100, Peter Zijlstra wrote:
> Trade one atomic op for a full memory barrier.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 1/6] irq_work: Cleanup
  2020-10-28 11:07 ` [PATCH v3 1/6] irq_work: Cleanup Peter Zijlstra
@ 2020-10-28 13:01   ` Frederic Weisbecker
  0 siblings, 0 replies; 24+ messages in thread
From: Frederic Weisbecker @ 2020-10-28 13:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 12:07:08PM +0100, Peter Zijlstra wrote:
> +#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){	\
> +	.node = { .u_flags = (_flags), },			\

I guess, just for the sake of being conservative:

 +	.node = { .a_flags = ATOMIC_INIT(_flags), },


> +	.func = (_func),					\
> +}

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>

Thanks.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 2/6] smp: Cleanup smp_call_function*()
  2020-10-28 11:07 ` [PATCH v3 2/6] smp: Cleanup smp_call_function*() Peter Zijlstra
@ 2020-10-28 13:25   ` Frederic Weisbecker
  0 siblings, 0 replies; 24+ messages in thread
From: Frederic Weisbecker @ 2020-10-28 13:25 UTC (permalink / raw)
  To: Peter Zijlstra, g
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 12:07:09PM +0100, Peter Zijlstra wrote:
> Get rid of the __call_single_node union and cleanup the API a little
> to avoid external code relying on the structure layout as much.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 4/6] irq_work: Unconditionally build on SMP
  2020-10-28 11:07 ` [PATCH v3 4/6] irq_work: Unconditionally build on SMP Peter Zijlstra
@ 2020-10-28 13:34   ` Frederic Weisbecker
  2020-10-28 14:52     ` Peter Zijlstra
  0 siblings, 1 reply; 24+ messages in thread
From: Frederic Weisbecker @ 2020-10-28 13:34 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 12:07:11PM +0100, Peter Zijlstra wrote:

This may need a changelog :-)

> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/Makefile   |    1 +
>  kernel/irq_work.c |    3 +++
>  2 files changed, 4 insertions(+)
> 
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -105,6 +105,7 @@ obj-$(CONFIG_TRACE_CLOCK) += trace/
>  obj-$(CONFIG_RING_BUFFER) += trace/
>  obj-$(CONFIG_TRACEPOINTS) += trace/
>  obj-$(CONFIG_IRQ_WORK) += irq_work.o
> +obj-$(CONFIG_SMP) += irq_work.o
>  obj-$(CONFIG_CPU_PM) += cpu_pm.o
>  obj-$(CONFIG_BPF) += bpf/
>  obj-$(CONFIG_KCSAN) += kcsan/
> --- a/kernel/irq_work.c
> +++ b/kernel/irq_work.c
> @@ -20,6 +20,7 @@
>  #include <linux/smp.h>
>  #include <asm/processor.h>
>  
> +#ifdef CONFIG_IRQ_WORK
>  
>  static DEFINE_PER_CPU(struct llist_head, raised_list);
>  static DEFINE_PER_CPU(struct llist_head, lazy_list);
> @@ -212,3 +213,5 @@ void irq_work_sync(struct irq_work *work
>  		cpu_relax();
>  }
>  EXPORT_SYMBOL_GPL(irq_work_sync);
> +
> +#endif /* CONFIG_IRQ_WORK */
> 
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote()
  2020-10-28 11:07 ` [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote() Peter Zijlstra
@ 2020-10-28 13:40   ` Frederic Weisbecker
  2020-10-28 14:38     ` Peter Zijlstra
  2020-10-28 14:53     ` Peter Zijlstra
  0 siblings, 2 replies; 24+ messages in thread
From: Frederic Weisbecker @ 2020-10-28 13:40 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 12:07:12PM +0100, Peter Zijlstra wrote:
> While the traditional irq_work relies on the ability to self-IPI, it
> makes sense to provide an unconditional irq_work_queue_remote()
> interface.

We may need a reason as well here.

> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1308,13 +1308,14 @@ static int rcu_implicit_dynticks_qs(stru
>  			resched_cpu(rdp->cpu);
>  			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
>  		}
> -		if (IS_ENABLED(CONFIG_IRQ_WORK) &&
> -		    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
> +#ifdef CONFIG_IRQ_WORK
> +		if (!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&

If it's unconditional on SMP, I expect it to be unconditional on rcutree.

Also this chunk seems unrelated to this patch.

>  		    (rnp->ffmask & rdp->grpmask)) {
>  			rdp->rcu_iw_pending = true;
>  			rdp->rcu_iw_gp_seq = rnp->gp_seq;
>  			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
>  		}
> +#endif
>  	}
>  
>  	return 0;
> 
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote()
  2020-10-28 13:40   ` Frederic Weisbecker
@ 2020-10-28 14:38     ` Peter Zijlstra
  2020-10-28 14:53     ` Peter Zijlstra
  1 sibling, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 14:38 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 02:40:46PM +0100, Frederic Weisbecker wrote:
> On Wed, Oct 28, 2020 at 12:07:12PM +0100, Peter Zijlstra wrote:
> > While the traditional irq_work relies on the ability to self-IPI, it
> > makes sense to provide an unconditional irq_work_queue_remote()
> > interface.
> 
> We may need a reason as well here.

Well, it doesn't rely on arch self-IPI code. The remote irq_work bits
are generic SMP code.

> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1308,13 +1308,14 @@ static int rcu_implicit_dynticks_qs(stru
> >  			resched_cpu(rdp->cpu);
> >  			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
> >  		}
> > -		if (IS_ENABLED(CONFIG_IRQ_WORK) &&
> > -		    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
> > +#ifdef CONFIG_IRQ_WORK
> > +		if (!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
> 
> If it's unconditional on SMP, I expect it to be unconditional on rcutree.
> 
> Also this chunk seems unrelated to this patch.

This hunk is due to irq_work_queue_on() no longer existing for
CONFIG_IRQ_WORK and hence breaking the compile with that IS_ENABLED()
crud.

That is, this changes IS_ENABLED() for a proper #ifdef.

> >  		    (rnp->ffmask & rdp->grpmask)) {
> >  			rdp->rcu_iw_pending = true;
> >  			rdp->rcu_iw_gp_seq = rnp->gp_seq;
> >  			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
> >  		}
> > +#endif
> >  	}
> >  
> >  	return 0;
> > 
> > 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 4/6] irq_work: Unconditionally build on SMP
  2020-10-28 13:34   ` Frederic Weisbecker
@ 2020-10-28 14:52     ` Peter Zijlstra
  0 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 14:52 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 02:34:20PM +0100, Frederic Weisbecker wrote:
> On Wed, Oct 28, 2020 at 12:07:11PM +0100, Peter Zijlstra wrote:
> 
> This may need a changelog :-)

Like so then?
---

Only (the local) irq_work_queue() relies on arch self-IPI bits,
prepare to have the remote queueing available on all SMP.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote()
  2020-10-28 13:40   ` Frederic Weisbecker
  2020-10-28 14:38     ` Peter Zijlstra
@ 2020-10-28 14:53     ` Peter Zijlstra
  2020-10-28 14:56       ` Frederic Weisbecker
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 14:53 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 02:40:46PM +0100, Frederic Weisbecker wrote:
> On Wed, Oct 28, 2020 at 12:07:12PM +0100, Peter Zijlstra wrote:
> > While the traditional irq_work relies on the ability to self-IPI, it
> > makes sense to provide an unconditional irq_work_queue_remote()
> > interface.
> 
> We may need a reason as well here.

Rewrote it like so.
---

While the traditional irq_work relies on the ability to self-IPI, the
remote irq_work only requires generic SMP bits and can thus be made
available unconditionally.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-28 11:07 ` [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote() Peter Zijlstra
@ 2020-10-28 14:54   ` Peter Zijlstra
  2020-10-28 20:02     ` Peter Zijlstra
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 14:54 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 12:07:13PM +0100, Peter Zijlstra wrote:
> AFAICT we only need/use irq_work_queue_on() on remote CPUs, since we
> can directly access local state.  So avoid the IRQ_WORK dependency and
> use the unconditionally available irq_work_queue_remote().
> 
> This survives a number of TREE01 runs.

OK, Paul mentioned on IRC that while it is extremely unlikely, this code
does not indeed guarantee it will not try to IPI self.

I'll try again.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote()
  2020-10-28 14:53     ` Peter Zijlstra
@ 2020-10-28 14:56       ` Frederic Weisbecker
  0 siblings, 0 replies; 24+ messages in thread
From: Frederic Weisbecker @ 2020-10-28 14:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, linux-kernel, will, paulmck, hch, axboe, chris, davem,
	kuba, fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 03:53:24PM +0100, Peter Zijlstra wrote:
> On Wed, Oct 28, 2020 at 02:40:46PM +0100, Frederic Weisbecker wrote:
> > On Wed, Oct 28, 2020 at 12:07:12PM +0100, Peter Zijlstra wrote:
> > > While the traditional irq_work relies on the ability to self-IPI, it
> > > makes sense to provide an unconditional irq_work_queue_remote()
> > > interface.
> > 
> > We may need a reason as well here.
> 
> Rewrote it like so.
> ---
> 
> While the traditional irq_work relies on the ability to self-IPI, the
> remote irq_work only requires generic SMP bits and can thus be made
> available unconditionally.

Thanks!

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-28 14:54   ` Peter Zijlstra
@ 2020-10-28 20:02     ` Peter Zijlstra
  2020-10-28 20:15       ` Paul E. McKenney
  0 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-28 20:02 UTC (permalink / raw)
  To: mingo
  Cc: linux-kernel, will, paulmck, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 03:54:28PM +0100, Peter Zijlstra wrote:
> On Wed, Oct 28, 2020 at 12:07:13PM +0100, Peter Zijlstra wrote:
> > AFAICT we only need/use irq_work_queue_on() on remote CPUs, since we
> > can directly access local state.  So avoid the IRQ_WORK dependency and
> > use the unconditionally available irq_work_queue_remote().
> > 
> > This survives a number of TREE01 runs.
> 
> OK, Paul mentioned on IRC that while it is extremely unlikely, this code
> does not indeed guarantee it will not try to IPI self.
> 
> I'll try again.

This is the best I could come up with.. :/

---
Subject: rcu/tree: Use irq_work_queue_remote()
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed Oct 28 11:53:40 CET 2020

All sites that consume rcu_iw_gp_seq seem to have rcu_node lock held,
so setting it probably should too. Also the effect of self-IPI here
would be setting rcu_iw_gp_seq to the value we just set it to
(pointless) and clearing rcu_iw_pending, which we just set, so don't
set it.

Passes TREE01.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/rcu/tree.c |   10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1308,14 +1308,16 @@ static int rcu_implicit_dynticks_qs(stru
 			resched_cpu(rdp->cpu);
 			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
 		}
-#ifdef CONFIG_IRQ_WORK
+		raw_spin_lock_rcu_node(rnp);
 		if (!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
 		    (rnp->ffmask & rdp->grpmask)) {
-			rdp->rcu_iw_pending = true;
 			rdp->rcu_iw_gp_seq = rnp->gp_seq;
-			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
+			if (likely(rdp->cpu != smp_processor_id())) {
+				rdp->rcu_iw_pending = true;
+				irq_work_queue_remote(rdp->cpu, &rdp->rcu_iw);
+			}
 		}
-#endif
+		raw_spin_unlock_rcu_node(rnp);
 	}
 
 	return 0;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-28 20:02     ` Peter Zijlstra
@ 2020-10-28 20:15       ` Paul E. McKenney
  2020-10-29  9:10         ` Peter Zijlstra
  2020-10-29  9:15         ` Peter Zijlstra
  0 siblings, 2 replies; 24+ messages in thread
From: Paul E. McKenney @ 2020-10-28 20:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, linux-kernel, will, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 09:02:43PM +0100, Peter Zijlstra wrote:
> On Wed, Oct 28, 2020 at 03:54:28PM +0100, Peter Zijlstra wrote:
> > On Wed, Oct 28, 2020 at 12:07:13PM +0100, Peter Zijlstra wrote:
> > > AFAICT we only need/use irq_work_queue_on() on remote CPUs, since we
> > > can directly access local state.  So avoid the IRQ_WORK dependency and
> > > use the unconditionally available irq_work_queue_remote().
> > > 
> > > This survives a number of TREE01 runs.
> > 
> > OK, Paul mentioned on IRC that while it is extremely unlikely, this code
> > does not indeed guarantee it will not try to IPI self.
> > 
> > I'll try again.
> 
> This is the best I could come up with.. :/
> 
> ---
> Subject: rcu/tree: Use irq_work_queue_remote()
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Wed Oct 28 11:53:40 CET 2020
> 
> All sites that consume rcu_iw_gp_seq seem to have rcu_node lock held,
> so setting it probably should too. Also the effect of self-IPI here
> would be setting rcu_iw_gp_seq to the value we just set it to
> (pointless) and clearing rcu_iw_pending, which we just set, so don't
> set it.
> 
> Passes TREE01.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/rcu/tree.c |   10 ++++++----
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1308,14 +1308,16 @@ static int rcu_implicit_dynticks_qs(stru
>  			resched_cpu(rdp->cpu);
>  			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
>  		}
> -#ifdef CONFIG_IRQ_WORK
> +		raw_spin_lock_rcu_node(rnp);

The caller of rcu_implicit_dynticks_qs() already holds this lock.
Please see the force_qs_rnp() function and its second call site,
to which rcu_implicit_dynticks_qs() is passed as an argument.

But other than that, this does look plausible.  And getting rid of
that #ifdef is worth something.  ;-)

							Thanx, Paul

>  		if (!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
>  		    (rnp->ffmask & rdp->grpmask)) {
> -			rdp->rcu_iw_pending = true;
>  			rdp->rcu_iw_gp_seq = rnp->gp_seq;
> -			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
> +			if (likely(rdp->cpu != smp_processor_id())) {
> +				rdp->rcu_iw_pending = true;
> +				irq_work_queue_remote(rdp->cpu, &rdp->rcu_iw);
> +			}
>  		}
> -#endif
> +		raw_spin_unlock_rcu_node(rnp);
>  	}
>  
>  	return 0;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-28 20:15       ` Paul E. McKenney
@ 2020-10-29  9:10         ` Peter Zijlstra
  2020-10-29 16:04           ` Paul E. McKenney
  2020-10-29  9:15         ` Peter Zijlstra
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-29  9:10 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: mingo, linux-kernel, will, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 01:15:54PM -0700, Paul E. McKenney wrote:
> On Wed, Oct 28, 2020 at 09:02:43PM +0100, Peter Zijlstra wrote:

> > Subject: rcu/tree: Use irq_work_queue_remote()
> > From: Peter Zijlstra <peterz@infradead.org>
> > Date: Wed Oct 28 11:53:40 CET 2020
> > 
> > All sites that consume rcu_iw_gp_seq seem to have rcu_node lock held,
> > so setting it probably should too. Also the effect of self-IPI here
> > would be setting rcu_iw_gp_seq to the value we just set it to
> > (pointless) and clearing rcu_iw_pending, which we just set, so don't
> > set it.
> > 
> > Passes TREE01.
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  kernel/rcu/tree.c |   10 ++++++----
> >  1 file changed, 6 insertions(+), 4 deletions(-)
> > 
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1308,14 +1308,16 @@ static int rcu_implicit_dynticks_qs(stru
> >  			resched_cpu(rdp->cpu);
> >  			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
> >  		}
> > -#ifdef CONFIG_IRQ_WORK
> > +		raw_spin_lock_rcu_node(rnp);
> 
> The caller of rcu_implicit_dynticks_qs() already holds this lock.
> Please see the force_qs_rnp() function and its second call site,
> to which rcu_implicit_dynticks_qs() is passed as an argument.
> 
> But other than that, this does look plausible.  And getting rid of
> that #ifdef is worth something.  ;-)

Dang, clearly TREE01 didn't actually hit any of this code :/ Is there
another test I should be running?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-28 20:15       ` Paul E. McKenney
  2020-10-29  9:10         ` Peter Zijlstra
@ 2020-10-29  9:15         ` Peter Zijlstra
  2020-10-29 16:06           ` Paul E. McKenney
  1 sibling, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-29  9:15 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: mingo, linux-kernel, will, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot

On Wed, Oct 28, 2020 at 01:15:54PM -0700, Paul E. McKenney wrote:
> On Wed, Oct 28, 2020 at 09:02:43PM +0100, Peter Zijlstra wrote:
> > -#ifdef CONFIG_IRQ_WORK
> > +		raw_spin_lock_rcu_node(rnp);
> 
> The caller of rcu_implicit_dynticks_qs() already holds this lock.
> Please see the force_qs_rnp() function and its second call site,
> to which rcu_implicit_dynticks_qs() is passed as an argument.

Like this then.

---
Subject: rcu/tree: Use irq_work_queue_remote()
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed Oct 28 11:53:40 CET 2020

The effect of an self-IPI here would be setting rcu_iw_gp_seq to the
value we just set it to (pointless) and clearing rcu_iw_pending, which
we just set, so don't set it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/rcu/tree.c |   10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1204,6 +1204,8 @@ static int rcu_implicit_dynticks_qs(stru
 	bool *ruqp;
 	struct rcu_node *rnp = rdp->mynode;
 
+	raw_lockdep_assert_held_rcu_node(rnp);
+
 	/*
 	 * If the CPU passed through or entered a dynticks idle phase with
 	 * no active irq/NMI handlers, then we can safely pretend that the CPU
@@ -1308,14 +1310,14 @@ static int rcu_implicit_dynticks_qs(stru
 			resched_cpu(rdp->cpu);
 			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
 		}
-#ifdef CONFIG_IRQ_WORK
 		if (!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
 		    (rnp->ffmask & rdp->grpmask)) {
-			rdp->rcu_iw_pending = true;
 			rdp->rcu_iw_gp_seq = rnp->gp_seq;
-			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
+			if (likely(rdp->cpu != smp_processor_id())) {
+				rdp->rcu_iw_pending = true;
+				irq_work_queue_remote(rdp->cpu, &rdp->rcu_iw);
+			}
 		}
-#endif
 	}
 
 	return 0;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-29  9:10         ` Peter Zijlstra
@ 2020-10-29 16:04           ` Paul E. McKenney
  2020-10-29 16:14             ` Peter Zijlstra
  0 siblings, 1 reply; 24+ messages in thread
From: Paul E. McKenney @ 2020-10-29 16:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, linux-kernel, will, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot

On Thu, Oct 29, 2020 at 10:10:53AM +0100, Peter Zijlstra wrote:
> On Wed, Oct 28, 2020 at 01:15:54PM -0700, Paul E. McKenney wrote:
> > On Wed, Oct 28, 2020 at 09:02:43PM +0100, Peter Zijlstra wrote:
> 
> > > Subject: rcu/tree: Use irq_work_queue_remote()
> > > From: Peter Zijlstra <peterz@infradead.org>
> > > Date: Wed Oct 28 11:53:40 CET 2020
> > > 
> > > All sites that consume rcu_iw_gp_seq seem to have rcu_node lock held,
> > > so setting it probably should too. Also the effect of self-IPI here
> > > would be setting rcu_iw_gp_seq to the value we just set it to
> > > (pointless) and clearing rcu_iw_pending, which we just set, so don't
> > > set it.
> > > 
> > > Passes TREE01.
> > > 
> > > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > ---
> > >  kernel/rcu/tree.c |   10 ++++++----
> > >  1 file changed, 6 insertions(+), 4 deletions(-)
> > > 
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -1308,14 +1308,16 @@ static int rcu_implicit_dynticks_qs(stru
> > >  			resched_cpu(rdp->cpu);
> > >  			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
> > >  		}
> > > -#ifdef CONFIG_IRQ_WORK
> > > +		raw_spin_lock_rcu_node(rnp);
> > 
> > The caller of rcu_implicit_dynticks_qs() already holds this lock.
> > Please see the force_qs_rnp() function and its second call site,
> > to which rcu_implicit_dynticks_qs() is passed as an argument.
> > 
> > But other than that, this does look plausible.  And getting rid of
> > that #ifdef is worth something.  ;-)
> 
> Dang, clearly TREE01 didn't actually hit any of this code :/ Is there
> another test I should be running?

TREE01 is fine, but you have to tell rcutorture to actually generate an
RCU CPU stall warning.  Like this for a 25-second stall with interrupts
disabled:

tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 3 --configs "10*TREE04" --bootargs "rcutorture.stall_cpu=25 rcutorture.stall_cpu_irqsoff=1" --trust-make

And like this to stall with interrupts enabled:

tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 3 --configs "10*TREE04" --bootargs "rcutorture.stall_cpu=25" --trust-make

I use TREE04 instead of TREE01 because I have recently seen the pattern
you are looking for in a similar configuration.  But any of the TREE
scenarios should work.

And you need a bit of luck.  The CPU that rcutorture forces to stall
needs to be blocking the current grace period at the beginning of that
stall, or, alternatively, another grace period needs to start while
the CPU is stalled.  But the first command (with stall_cpu_irqsoff=1)
will sometimes get you something like this:

rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
rcu:     3-...0: (1 GPs behind) idle=156/1/0x4000000000000000 softirq=493/494 fqs=4570 last_accelerate: 0000/eaad dyntick_enabled: 0
 (detected by 7, t=21003 jiffies, g=693, q=35687)

The zero in "...0" on the second line indicates that the RCU CPU stall
warning found that the stalled CPU had its interrupts disabled for the
last portion of that stall.

This is probabilistic.  Much depends on the state of both RCU and
rcutorture at the time that the stall starts.  I got the above results
once in 20 attempts, with other attempts having variously prevented
RCU's grace-period kthread from running, gotten the irq-work executed
just before the stall was reported, and so on.

Of course, to test your change, you also need the grace-period kthread to
migrate to the stalled CPU just after interrupts are enabled.  For this,
you need something like an 11-second stall plus something to move the
grace-period kthread at the right (wrong?) time.  Or just run the above
commands in a loop on a system with ample storage over night or some such.
I see about 70MB of storage per run, so disk size shouldn't be too much
of a problem.

If you do wish to muck with RCU and rcutorture to migrate
RCU's grace-period kthread, look for uses of stall_cpu_irqsoff in
kernel/rcu/rcutorture.c on the one hand and rcu_gp_kthread() along with
the functions that it calls in kernel/rcu/tree.c on the other.
For the latter, maybe force_qs_rnp() would be a good choice.

						Thanx, Paul

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-29  9:15         ` Peter Zijlstra
@ 2020-10-29 16:06           ` Paul E. McKenney
  0 siblings, 0 replies; 24+ messages in thread
From: Paul E. McKenney @ 2020-10-29 16:06 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, linux-kernel, will, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot

On Thu, Oct 29, 2020 at 10:15:34AM +0100, Peter Zijlstra wrote:
> On Wed, Oct 28, 2020 at 01:15:54PM -0700, Paul E. McKenney wrote:
> > On Wed, Oct 28, 2020 at 09:02:43PM +0100, Peter Zijlstra wrote:
> > > -#ifdef CONFIG_IRQ_WORK
> > > +		raw_spin_lock_rcu_node(rnp);
> > 
> > The caller of rcu_implicit_dynticks_qs() already holds this lock.
> > Please see the force_qs_rnp() function and its second call site,
> > to which rcu_implicit_dynticks_qs() is passed as an argument.
> 
> Like this then.

This does look plausible!  But I am sure that rcutorture will also
have an opinion.  ;-)

							Thanx, Paul

> ---
> Subject: rcu/tree: Use irq_work_queue_remote()
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Wed Oct 28 11:53:40 CET 2020
> 
> The effect of an self-IPI here would be setting rcu_iw_gp_seq to the
> value we just set it to (pointless) and clearing rcu_iw_pending, which
> we just set, so don't set it.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  kernel/rcu/tree.c |   10 ++++++----
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1204,6 +1204,8 @@ static int rcu_implicit_dynticks_qs(stru
>  	bool *ruqp;
>  	struct rcu_node *rnp = rdp->mynode;
>  
> +	raw_lockdep_assert_held_rcu_node(rnp);
> +
>  	/*
>  	 * If the CPU passed through or entered a dynticks idle phase with
>  	 * no active irq/NMI handlers, then we can safely pretend that the CPU
> @@ -1308,14 +1310,14 @@ static int rcu_implicit_dynticks_qs(stru
>  			resched_cpu(rdp->cpu);
>  			WRITE_ONCE(rdp->last_fqs_resched, jiffies);
>  		}
> -#ifdef CONFIG_IRQ_WORK
>  		if (!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
>  		    (rnp->ffmask & rdp->grpmask)) {
> -			rdp->rcu_iw_pending = true;
>  			rdp->rcu_iw_gp_seq = rnp->gp_seq;
> -			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
> +			if (likely(rdp->cpu != smp_processor_id())) {
> +				rdp->rcu_iw_pending = true;
> +				irq_work_queue_remote(rdp->cpu, &rdp->rcu_iw);
> +			}
>  		}
> -#endif
>  	}
>  
>  	return 0;

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote()
  2020-10-29 16:04           ` Paul E. McKenney
@ 2020-10-29 16:14             ` Peter Zijlstra
  0 siblings, 0 replies; 24+ messages in thread
From: Peter Zijlstra @ 2020-10-29 16:14 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: mingo, linux-kernel, will, hch, axboe, chris, davem, kuba,
	fweisbec, oleg, vincent.guittot

On Thu, Oct 29, 2020 at 09:04:48AM -0700, Paul E. McKenney wrote:
> On Thu, Oct 29, 2020 at 10:10:53AM +0100, Peter Zijlstra wrote:

> > Dang, clearly TREE01 didn't actually hit any of this code :/ Is there
> > another test I should be running?
> 
> TREE01 is fine, but you have to tell rcutorture to actually generate an
> RCU CPU stall warning.  Like this for a 25-second stall with interrupts
> disabled:
> 
> tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 3 --configs "10*TREE04" --bootargs "rcutorture.stall_cpu=25 rcutorture.stall_cpu_irqsoff=1" --trust-make

> Of course, to test your change, you also need the grace-period kthread to
> migrate to the stalled CPU just after interrupts are enabled.  For this,
> you need something like an 11-second stall plus something to move the
> grace-period kthread at the right (wrong?) time.  Or just run the above
> commands in a loop on a system with ample storage over night or some such.
> I see about 70MB of storage per run, so disk size shouldn't be too much
> of a problem.

Thanks!, I'll make the above run over night in a loop.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2020-10-29 16:14 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-28 11:07 [PATCH v3 0/6] smp: irq_work / smp_call_function rework Peter Zijlstra
2020-10-28 11:07 ` [PATCH v3 1/6] irq_work: Cleanup Peter Zijlstra
2020-10-28 13:01   ` Frederic Weisbecker
2020-10-28 11:07 ` [PATCH v3 2/6] smp: Cleanup smp_call_function*() Peter Zijlstra
2020-10-28 13:25   ` Frederic Weisbecker
2020-10-28 11:07 ` [PATCH v3 3/6] irq_work: Optimize irq_work_single() Peter Zijlstra
2020-10-28 12:06   ` Frederic Weisbecker
2020-10-28 11:07 ` [PATCH v3 4/6] irq_work: Unconditionally build on SMP Peter Zijlstra
2020-10-28 13:34   ` Frederic Weisbecker
2020-10-28 14:52     ` Peter Zijlstra
2020-10-28 11:07 ` [PATCH v3 5/6] irq_work: Provide irq_work_queue_remote() Peter Zijlstra
2020-10-28 13:40   ` Frederic Weisbecker
2020-10-28 14:38     ` Peter Zijlstra
2020-10-28 14:53     ` Peter Zijlstra
2020-10-28 14:56       ` Frederic Weisbecker
2020-10-28 11:07 ` [RFC][PATCH v3 6/6] rcu/tree: Use irq_work_queue_remote() Peter Zijlstra
2020-10-28 14:54   ` Peter Zijlstra
2020-10-28 20:02     ` Peter Zijlstra
2020-10-28 20:15       ` Paul E. McKenney
2020-10-29  9:10         ` Peter Zijlstra
2020-10-29 16:04           ` Paul E. McKenney
2020-10-29 16:14             ` Peter Zijlstra
2020-10-29  9:15         ` Peter Zijlstra
2020-10-29 16:06           ` Paul E. McKenney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.