All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] smp: add a best_effort version of smp_call_function_many()
@ 2021-04-19 18:44 Luigi Rizzo
  2021-04-19 18:52 ` Randy Dunlap
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Luigi Rizzo @ 2021-04-19 18:44 UTC (permalink / raw)
  To: linux-kernel, peterz, axboe, paulmck; +Cc: Luigi Rizzo

Regardless of the 'wait' argument, smp_call_function_many() must spin
if any of the target CPUs have their csd busy waiting to be processed
for a previous call. This may cause high tail latencies e.g. when some
of the target CPUs are running functions that disable interrupts for a
long time; getrusage() is one possible culprit.

Here we introduce a variant, __smp_call_function_many(), that adds
a third 'best_effort' mode to the two existing ones (nowait, wait).
In best effort mode, the call will skip CPUs whose csd is busy, and if
any CPU is skipped it returns -EBUSY and the set of busy in the mask.
This allows the caller to decide how to proceed, e.g. it might retry at
a later time, or use a private csd, etc..

The new function is a compromise to avoid touching existing callers of
smp_call_function_many(). If the feature is considered interesting, we
could even replace the 'wait' argument with a ternary 'mode' in all
smp_call_function_*() and derived methods.

Signed-off-by: Luigi Rizzo <lrizzo@google.com>
---
 include/linux/smp.h | 10 ++++++
 kernel/smp.c        | 75 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 70c6f6284dcf..5c6c7d3e1f19 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -75,6 +75,11 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
 
 int smp_call_function_single_async(int cpu, call_single_data_t *csd);
 
+/* Modes for __smp_call_function_many() */
+#define SMP_CFM_NOWAIT		0
+#define SMP_CFM_WAIT		1
+#define SMP_CFM_BEST_EFFORT	2
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
@@ -120,6 +125,8 @@ extern void smp_cpus_done(unsigned int max_cpus);
 void smp_call_function(smp_call_func_t func, void *info, int wait);
 void smp_call_function_many(const struct cpumask *mask,
 			    smp_call_func_t func, void *info, bool wait);
+int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func,
+			     void *info, int mode);
 
 int smp_call_function_any(const struct cpumask *mask,
 			  smp_call_func_t func, void *info, int wait);
@@ -170,6 +177,9 @@ static inline void smp_send_reschedule(int cpu) { }
 #define smp_prepare_boot_cpu()			do {} while (0)
 #define smp_call_function_many(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
+#define ____smp_call_function_many(mask, func, info, mode) \
+		(up_smp_call_function(func, info), 0)
+
 static inline void call_function_init(void) { }
 
 static inline int
diff --git a/kernel/smp.c b/kernel/smp.c
index aeb0adfa0606..75155875fadc 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -242,6 +242,18 @@ static __always_inline void csd_lock(call_single_data_t *csd)
 	smp_wmb();
 }
 
+static __always_inline bool csd_trylock(call_single_data_t *csd)
+{
+	unsigned int flags = READ_ONCE(csd->node.u_flags);
+
+	if (flags & CSD_FLAG_LOCK)
+		return false;
+	csd->node.u_flags |= CSD_FLAG_LOCK;
+	/* See csd_trylock() */
+	smp_wmb();
+	return true;
+}
+
 static __always_inline void csd_unlock(call_single_data_t *csd)
 {
 	WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
@@ -608,12 +620,14 @@ int smp_call_function_any(const struct cpumask *mask,
 }
 EXPORT_SYMBOL_GPL(smp_call_function_any);
 
-static void smp_call_function_many_cond(const struct cpumask *mask,
-					smp_call_func_t func, void *info,
-					bool wait, smp_cond_func_t cond_func)
+static struct cpumask *smp_call_function_many_cond(const struct cpumask *mask,
+						   smp_call_func_t func,
+						   void *info, int mode,
+						   smp_cond_func_t cond_func)
 {
 	struct call_function_data *cfd;
 	int cpu, next_cpu, this_cpu = smp_processor_id();
+	bool busy = false, wait = (mode == SMP_CFM_WAIT);
 
 	/*
 	 * Can deadlock when called with interrupts disabled.
@@ -639,18 +653,18 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 
 	/* No online cpus?  We're done. */
 	if (cpu >= nr_cpu_ids)
-		return;
+		return NULL;
 
 	/* Do we have another CPU which isn't us? */
 	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
 	if (next_cpu == this_cpu)
 		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
 
-	/* Fastpath: do that cpu by itself. */
-	if (next_cpu >= nr_cpu_ids) {
+	/* Fastpath: if not best-effort do that cpu by itself. */
+	if (next_cpu >= nr_cpu_ids && mode != SMP_CFM_BEST_EFFORT) {
 		if (!cond_func || cond_func(cpu, info))
 			smp_call_function_single(cpu, func, info, wait);
-		return;
+		return NULL;
 	}
 
 	cfd = this_cpu_ptr(&cfd_data);
@@ -660,7 +674,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 
 	/* Some callers race with other cpus changing the passed mask */
 	if (unlikely(!cpumask_weight(cfd->cpumask)))
-		return;
+		return NULL;
 
 	cpumask_clear(cfd->cpumask_ipi);
 	for_each_cpu(cpu, cfd->cpumask) {
@@ -669,9 +683,17 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 		if (cond_func && !cond_func(cpu, info))
 			continue;
 
-		csd_lock(csd);
-		if (wait)
-			csd->node.u_flags |= CSD_TYPE_SYNC;
+		if (mode == SMP_CFM_BEST_EFFORT) {
+			if (!csd_trylock(csd)) {
+				cpumask_clear_cpu(cpu, cfd->cpumask);
+				busy = true;
+				continue;
+			}
+		} else {
+			csd_lock(csd);
+			if (wait)
+				csd->node.u_flags |= CSD_TYPE_SYNC;
+		}
 		csd->func = func;
 		csd->info = info;
 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
@@ -693,8 +715,32 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 			csd_lock_wait(csd);
 		}
 	}
+	return busy ? cfd->cpumask : NULL;
 }
 
+/**
+ * Extended version of smp_call_function_many(). Same constraints.
+ * @mode == 0 same as wait = false, returns 0;
+ * @mode == 1 same as wait = true, returns 0;
+ * @mode = SMP_CFM_BEST_EFFORT: skips CPUs with previous pending requests,
+ *     returns 0 and *mask unmodified if no CPUs are skipped,
+ *     -EBUSY if CPUs are skipped, and *mask is the set of skipped CPUs
+ */
+int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func,
+			     void *info, int mode)
+{
+	struct cpumask *ret = smp_call_function_many_cond(mask, func, info,
+							  mode, NULL);
+
+	if (!ret)
+		return 0;
+	cpumask_andnot(mask, mask, ret);
+	cpumask_and(mask, mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), mask);
+	return -EBUSY;
+}
+EXPORT_SYMBOL(__smp_call_function_many);
+
 /**
  * smp_call_function_many(): Run a function on a set of other CPUs.
  * @mask: The set of cpus to run on (only runs on online subset).
@@ -712,7 +758,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 void smp_call_function_many(const struct cpumask *mask,
 			    smp_call_func_t func, void *info, bool wait)
 {
-	smp_call_function_many_cond(mask, func, info, wait, NULL);
+	const int mode = wait ? SMP_CFM_WAIT : SMP_CFM_NOWAIT;
+
+	smp_call_function_many_cond(mask, func, info, mode, NULL);
 }
 EXPORT_SYMBOL(smp_call_function_many);
 
@@ -898,9 +946,10 @@ EXPORT_SYMBOL(on_each_cpu_mask);
 void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
 			   void *info, bool wait, const struct cpumask *mask)
 {
+	const int mode = wait ? SMP_CFM_WAIT : SMP_CFM_NOWAIT;
 	int cpu = get_cpu();
 
-	smp_call_function_many_cond(mask, func, info, wait, cond_func);
+	smp_call_function_many_cond(mask, func, info, mode, cond_func);
 	if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) {
 		unsigned long flags;
 
-- 
2.31.1.368.gbe11c130af-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-19 18:44 [PATCH] smp: add a best_effort version of smp_call_function_many() Luigi Rizzo
@ 2021-04-19 18:52 ` Randy Dunlap
  2021-04-19 19:17 ` Peter Zijlstra
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Randy Dunlap @ 2021-04-19 18:52 UTC (permalink / raw)
  To: Luigi Rizzo, linux-kernel, peterz, axboe, paulmck

Hi--

On 4/19/21 11:44 AM, Luigi Rizzo wrote:

> 
> Signed-off-by: Luigi Rizzo <lrizzo@google.com>
> ---
>  include/linux/smp.h | 10 ++++++
>  kernel/smp.c        | 75 +++++++++++++++++++++++++++++++++++++--------
>  2 files changed, 72 insertions(+), 13 deletions(-)
> 

> diff --git a/kernel/smp.c b/kernel/smp.c
> index aeb0adfa0606..75155875fadc 100644
> --- a/kernel/smp.c
> +++ b/kernel/smp.c

> @@ -693,8 +715,32 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
>  			csd_lock_wait(csd);
>  		}
>  	}
> +	return busy ? cfd->cpumask : NULL;
>  }
>  
> +/**
> + * Extended version of smp_call_function_many(). Same constraints.
> + * @mode == 0 same as wait = false, returns 0;
> + * @mode == 1 same as wait = true, returns 0;
> + * @mode = SMP_CFM_BEST_EFFORT: skips CPUs with previous pending requests,
> + *     returns 0 and *mask unmodified if no CPUs are skipped,
> + *     -EBUSY if CPUs are skipped, and *mask is the set of skipped CPUs

Please convert those comments to real kernel-doc format.
(Documentation/doc-guide/kernel-doc.rst)

> + */
> +int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func,
> +			     void *info, int mode)
> +{

thanks.
-- 
~Randy


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-19 18:44 [PATCH] smp: add a best_effort version of smp_call_function_many() Luigi Rizzo
  2021-04-19 18:52 ` Randy Dunlap
@ 2021-04-19 19:17 ` Peter Zijlstra
  2021-04-19 21:07   ` Luigi Rizzo
  2021-04-19 22:22   ` kernel test robot
  2021-04-20  1:38   ` kernel test robot
  3 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2021-04-19 19:17 UTC (permalink / raw)
  To: Luigi Rizzo; +Cc: linux-kernel, axboe, paulmck

On Mon, Apr 19, 2021 at 11:44:55AM -0700, Luigi Rizzo wrote:
> Regardless of the 'wait' argument, smp_call_function_many() must spin
> if any of the target CPUs have their csd busy waiting to be processed
> for a previous call. This may cause high tail latencies e.g. when some
> of the target CPUs are running functions that disable interrupts for a
> long time; getrusage() is one possible culprit.
> 
> Here we introduce a variant, __smp_call_function_many(), that adds
> a third 'best_effort' mode to the two existing ones (nowait, wait).
> In best effort mode, the call will skip CPUs whose csd is busy, and if
> any CPU is skipped it returns -EBUSY and the set of busy in the mask.
> This allows the caller to decide how to proceed, e.g. it might retry at
> a later time, or use a private csd, etc..
> 
> The new function is a compromise to avoid touching existing callers of
> smp_call_function_many(). If the feature is considered interesting, we
> could even replace the 'wait' argument with a ternary 'mode' in all
> smp_call_function_*() and derived methods.

I don't see a user of this... 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-19 19:17 ` Peter Zijlstra
@ 2021-04-19 21:07   ` Luigi Rizzo
  2021-04-20  9:13     ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Luigi Rizzo @ 2021-04-19 21:07 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, axboe, paulmck

On Mon, Apr 19, 2021 at 9:17 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Apr 19, 2021 at 11:44:55AM -0700, Luigi Rizzo wrote:
> > Regardless of the 'wait' argument, smp_call_function_many() must spin
> > if any of the target CPUs have their csd busy waiting to be processed
> > for a previous call. This may cause high tail latencies e.g. when some
> > of the target CPUs are running functions that disable interrupts for a
> > long time; getrusage() is one possible culprit.
> >
> > Here we introduce a variant, __smp_call_function_many(), that adds
> > a third 'best_effort' mode to the two existing ones (nowait, wait).
> > In best effort mode, the call will skip CPUs whose csd is busy, and if
> > any CPU is skipped it returns -EBUSY and the set of busy in the mask.
> > This allows the caller to decide how to proceed, e.g. it might retry at
> > a later time, or use a private csd, etc..
> >
> > The new function is a compromise to avoid touching existing callers of
> > smp_call_function_many(). If the feature is considered interesting, we
> > could even replace the 'wait' argument with a ternary 'mode' in all
> > smp_call_function_*() and derived methods.
>
> I don't see a user of this...

This is actually something for which I was looking for feedback:

my use case is similar to a periodic garbage collect request:
the caller tells targets that it may be time to do some work,
but it does not matter if the request is dropped because the
caller knows who was busy and will reissue pending requests later.

I would expect something like the above could be useful e.g.
in various kinds of resource manager.

However, a grep for on_each_cpu_*() and smp_call_function_*()
mostly returns synchronous calls (wait=1).

Any possible candidates that people can think of ?

thanks
luigi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-19 18:44 [PATCH] smp: add a best_effort version of smp_call_function_many() Luigi Rizzo
@ 2021-04-19 22:22   ` kernel test robot
  2021-04-19 19:17 ` Peter Zijlstra
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: kernel test robot @ 2021-04-19 22:22 UTC (permalink / raw)
  To: Luigi Rizzo, linux-kernel, peterz, axboe, paulmck; +Cc: kbuild-all, Luigi Rizzo

[-- Attachment #1: Type: text/plain, Size: 2788 bytes --]

Hi Luigi,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linux/master]
[also build test WARNING on linus/master v5.12-rc8]
[cannot apply to next-20210419]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Luigi-Rizzo/smp-add-a-best_effort-version-of-smp_call_function_many/20210420-024713
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: i386-randconfig-s002-20210420 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-330-g09ec74f6-dirty
        # https://github.com/0day-ci/linux/commit/9b290e2d29303b7c5bae4a0eddc5bb15c01e72f7
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Luigi-Rizzo/smp-add-a-best_effort-version-of-smp_call_function_many/20210420-024713
        git checkout 9b290e2d29303b7c5bae4a0eddc5bb15c01e72f7
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> kernel/smp.c:722: warning: wrong kernel-doc identifier on line:
    * Extended version of smp_call_function_many(). Same constraints.
   kernel/smp.c:1022: warning: cannot understand function prototype: 'struct smp_call_on_cpu_struct '


vim +722 kernel/smp.c

   720	
   721	/**
 > 722	 * Extended version of smp_call_function_many(). Same constraints.
   723	 * @mode == 0 same as wait = false, returns 0;
   724	 * @mode == 1 same as wait = true, returns 0;
   725	 * @mode = SMP_CFM_BEST_EFFORT: skips CPUs with previous pending requests,
   726	 *     returns 0 and *mask unmodified if no CPUs are skipped,
   727	 *     -EBUSY if CPUs are skipped, and *mask is the set of skipped CPUs
   728	 */
   729	int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func,
   730				     void *info, int mode)
   731	{
   732		struct cpumask *ret = smp_call_function_many_cond(mask, func, info,
   733								  mode, NULL);
   734	
   735		if (!ret)
   736			return 0;
   737		cpumask_andnot(mask, mask, ret);
   738		cpumask_and(mask, mask, cpu_online_mask);
   739		cpumask_clear_cpu(smp_processor_id(), mask);
   740		return -EBUSY;
   741	}
   742	EXPORT_SYMBOL(__smp_call_function_many);
   743	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31079 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
@ 2021-04-19 22:22   ` kernel test robot
  0 siblings, 0 replies; 12+ messages in thread
From: kernel test robot @ 2021-04-19 22:22 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2855 bytes --]

Hi Luigi,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linux/master]
[also build test WARNING on linus/master v5.12-rc8]
[cannot apply to next-20210419]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Luigi-Rizzo/smp-add-a-best_effort-version-of-smp_call_function_many/20210420-024713
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: i386-randconfig-s002-20210420 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-330-g09ec74f6-dirty
        # https://github.com/0day-ci/linux/commit/9b290e2d29303b7c5bae4a0eddc5bb15c01e72f7
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Luigi-Rizzo/smp-add-a-best_effort-version-of-smp_call_function_many/20210420-024713
        git checkout 9b290e2d29303b7c5bae4a0eddc5bb15c01e72f7
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> kernel/smp.c:722: warning: wrong kernel-doc identifier on line:
    * Extended version of smp_call_function_many(). Same constraints.
   kernel/smp.c:1022: warning: cannot understand function prototype: 'struct smp_call_on_cpu_struct '


vim +722 kernel/smp.c

   720	
   721	/**
 > 722	 * Extended version of smp_call_function_many(). Same constraints.
   723	 * @mode == 0 same as wait = false, returns 0;
   724	 * @mode == 1 same as wait = true, returns 0;
   725	 * @mode = SMP_CFM_BEST_EFFORT: skips CPUs with previous pending requests,
   726	 *     returns 0 and *mask unmodified if no CPUs are skipped,
   727	 *     -EBUSY if CPUs are skipped, and *mask is the set of skipped CPUs
   728	 */
   729	int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func,
   730				     void *info, int mode)
   731	{
   732		struct cpumask *ret = smp_call_function_many_cond(mask, func, info,
   733								  mode, NULL);
   734	
   735		if (!ret)
   736			return 0;
   737		cpumask_andnot(mask, mask, ret);
   738		cpumask_and(mask, mask, cpu_online_mask);
   739		cpumask_clear_cpu(smp_processor_id(), mask);
   740		return -EBUSY;
   741	}
   742	EXPORT_SYMBOL(__smp_call_function_many);
   743	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 31079 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-19 18:44 [PATCH] smp: add a best_effort version of smp_call_function_many() Luigi Rizzo
@ 2021-04-20  1:38   ` kernel test robot
  2021-04-19 19:17 ` Peter Zijlstra
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: kernel test robot @ 2021-04-20  1:38 UTC (permalink / raw)
  To: Luigi Rizzo, linux-kernel, peterz, axboe, paulmck
  Cc: kbuild-all, clang-built-linux, Luigi Rizzo

[-- Attachment #1: Type: text/plain, Size: 3043 bytes --]

Hi Luigi,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linux/master]
[also build test WARNING on linus/master]
[cannot apply to next-20210419]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Luigi-Rizzo/smp-add-a-best_effort-version-of-smp_call_function_many/20210420-024713
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: riscv-randconfig-r015-20210419 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 2b50f5a4343f8fb06acaa5c36355bcf58092c9cd)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/0day-ci/linux/commit/9b290e2d29303b7c5bae4a0eddc5bb15c01e72f7
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Luigi-Rizzo/smp-add-a-best_effort-version-of-smp_call_function_many/20210420-024713
        git checkout 9b290e2d29303b7c5bae4a0eddc5bb15c01e72f7
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> kernel/smp.c:722: warning: wrong kernel-doc identifier on line:
    * Extended version of smp_call_function_many(). Same constraints.
   kernel/smp.c:1022: warning: cannot understand function prototype: 'struct smp_call_on_cpu_struct '


vim +722 kernel/smp.c

   720	
   721	/**
 > 722	 * Extended version of smp_call_function_many(). Same constraints.
   723	 * @mode == 0 same as wait = false, returns 0;
   724	 * @mode == 1 same as wait = true, returns 0;
   725	 * @mode = SMP_CFM_BEST_EFFORT: skips CPUs with previous pending requests,
   726	 *     returns 0 and *mask unmodified if no CPUs are skipped,
   727	 *     -EBUSY if CPUs are skipped, and *mask is the set of skipped CPUs
   728	 */
   729	int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func,
   730				     void *info, int mode)
   731	{
   732		struct cpumask *ret = smp_call_function_many_cond(mask, func, info,
   733								  mode, NULL);
   734	
   735		if (!ret)
   736			return 0;
   737		cpumask_andnot(mask, mask, ret);
   738		cpumask_and(mask, mask, cpu_online_mask);
   739		cpumask_clear_cpu(smp_processor_id(), mask);
   740		return -EBUSY;
   741	}
   742	EXPORT_SYMBOL(__smp_call_function_many);
   743	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 27252 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
@ 2021-04-20  1:38   ` kernel test robot
  0 siblings, 0 replies; 12+ messages in thread
From: kernel test robot @ 2021-04-20  1:38 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 3112 bytes --]

Hi Luigi,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linux/master]
[also build test WARNING on linus/master]
[cannot apply to next-20210419]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Luigi-Rizzo/smp-add-a-best_effort-version-of-smp_call_function_many/20210420-024713
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5e46d1b78a03d52306f21f77a4e4a144b6d31486
config: riscv-randconfig-r015-20210419 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 2b50f5a4343f8fb06acaa5c36355bcf58092c9cd)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/0day-ci/linux/commit/9b290e2d29303b7c5bae4a0eddc5bb15c01e72f7
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Luigi-Rizzo/smp-add-a-best_effort-version-of-smp_call_function_many/20210420-024713
        git checkout 9b290e2d29303b7c5bae4a0eddc5bb15c01e72f7
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> kernel/smp.c:722: warning: wrong kernel-doc identifier on line:
    * Extended version of smp_call_function_many(). Same constraints.
   kernel/smp.c:1022: warning: cannot understand function prototype: 'struct smp_call_on_cpu_struct '


vim +722 kernel/smp.c

   720	
   721	/**
 > 722	 * Extended version of smp_call_function_many(). Same constraints.
   723	 * @mode == 0 same as wait = false, returns 0;
   724	 * @mode == 1 same as wait = true, returns 0;
   725	 * @mode = SMP_CFM_BEST_EFFORT: skips CPUs with previous pending requests,
   726	 *     returns 0 and *mask unmodified if no CPUs are skipped,
   727	 *     -EBUSY if CPUs are skipped, and *mask is the set of skipped CPUs
   728	 */
   729	int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func,
   730				     void *info, int mode)
   731	{
   732		struct cpumask *ret = smp_call_function_many_cond(mask, func, info,
   733								  mode, NULL);
   734	
   735		if (!ret)
   736			return 0;
   737		cpumask_andnot(mask, mask, ret);
   738		cpumask_and(mask, mask, cpu_online_mask);
   739		cpumask_clear_cpu(smp_processor_id(), mask);
   740		return -EBUSY;
   741	}
   742	EXPORT_SYMBOL(__smp_call_function_many);
   743	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 27252 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-19 21:07   ` Luigi Rizzo
@ 2021-04-20  9:13     ` Peter Zijlstra
  2021-04-20 10:41       ` Luigi Rizzo
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2021-04-20  9:13 UTC (permalink / raw)
  To: Luigi Rizzo; +Cc: linux-kernel, axboe, paulmck

On Mon, Apr 19, 2021 at 11:07:08PM +0200, Luigi Rizzo wrote:
> On Mon, Apr 19, 2021 at 9:17 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Mon, Apr 19, 2021 at 11:44:55AM -0700, Luigi Rizzo wrote:
> > > Regardless of the 'wait' argument, smp_call_function_many() must spin
> > > if any of the target CPUs have their csd busy waiting to be processed
> > > for a previous call. This may cause high tail latencies e.g. when some
> > > of the target CPUs are running functions that disable interrupts for a
> > > long time; getrusage() is one possible culprit.
> > >
> > > Here we introduce a variant, __smp_call_function_many(), that adds
> > > a third 'best_effort' mode to the two existing ones (nowait, wait).
> > > In best effort mode, the call will skip CPUs whose csd is busy, and if
> > > any CPU is skipped it returns -EBUSY and the set of busy in the mask.
> > > This allows the caller to decide how to proceed, e.g. it might retry at
> > > a later time, or use a private csd, etc..
> > >
> > > The new function is a compromise to avoid touching existing callers of
> > > smp_call_function_many(). If the feature is considered interesting, we
> > > could even replace the 'wait' argument with a ternary 'mode' in all
> > > smp_call_function_*() and derived methods.
> >
> > I don't see a user of this...
> 
> This is actually something for which I was looking for feedback:
> 
> my use case is similar to a periodic garbage collect request:
> the caller tells targets that it may be time to do some work,
> but it does not matter if the request is dropped because the
> caller knows who was busy and will reissue pending requests later.
> 
> I would expect something like the above could be useful e.g.
> in various kinds of resource manager.
> 
> However, a grep for on_each_cpu_*() and smp_call_function_*()
> mostly returns synchronous calls (wait=1).
> 
> Any possible candidates that people can think of ?

We mostly try and avoid using this stuff wherever possible. Only when
no other choice is left do we send IPIs.

NOHZ_FULL already relies on this and gets massively unhappy when a new
user comes and starts to spray IPIs.

So no; mostly we send an IPI because we _HAVE_ to, not because giggles.

That said; there's still some places left where we can avoid sending
IPIs, but in all those cases correctness mandates we actually handle
things and not randomly not do anything.

For example, look at arch/x86/kernel/alternative.c:text_poke_sync(). The
purpose of that is to ensure all CPUs observe modified *kernel* code.
Now, if that CPU is currently running userspace, it doesn't much care
kernel code is changed, however that does mean it needs to call
sync_core() upon entering kernel, *BEFORE* hitting any code that's
possibly modified (and self modifying code is everywhere today,
ironically also very much in the NOHZ_FULL entry paths).

So untangling all that should be possible, but is something that
requires quite a bit of care and doesn't benefit from anything like the
proposed.

Mostly it sounds like you shouldn't be using IPIs either.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-20  9:13     ` Peter Zijlstra
@ 2021-04-20 10:41       ` Luigi Rizzo
  2021-04-20 13:31         ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Luigi Rizzo @ 2021-04-20 10:41 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, axboe, paulmck

On Tue, Apr 20, 2021 at 11:14 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Apr 19, 2021 at 11:07:08PM +0200, Luigi Rizzo wrote:
> > On Mon, Apr 19, 2021 at 9:17 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Mon, Apr 19, 2021 at 11:44:55AM -0700, Luigi Rizzo wrote:
> > > > Regardless of the 'wait' argument, smp_call_function_many() must spin
> > > > if any of the target CPUs have their csd busy waiting to be processed
> > > > for a previous call. This may cause high tail latencies e.g. when some
> > > > of the target CPUs are running functions that disable interrupts for a
> > > > long time; getrusage() is one possible culprit.
> > > >
> > > > Here we introduce a variant, __smp_call_function_many(), that adds
> > > > a third 'best_effort' mode to the two existing ones (nowait, wait).
> > > > In best effort mode, the call will skip CPUs whose csd is busy, and if
> > > > any CPU is skipped it returns -EBUSY and the set of busy in the mask.
> > > > This allows the caller to decide how to proceed, e.g. it might retry at
> > > > a later time, or use a private csd, etc..
> > > >
> > > > The new function is a compromise to avoid touching existing callers of
> > > > smp_call_function_many(). If the feature is considered interesting, we
> > > > could even replace the 'wait' argument with a ternary 'mode' in all
> > > > smp_call_function_*() and derived methods.
> > >
> > > I don't see a user of this...
> >
> > This is actually something for which I was looking for feedback:
> >
> > my use case is similar to a periodic garbage collect request:
> > the caller tells targets that it may be time to do some work,
> > but it does not matter if the request is dropped because the
> > caller knows who was busy and will reissue pending requests later.
...
> > Any possible candidates that people can think of ?
>
> We mostly try and avoid using this stuff wherever possible. Only when
> no other choice is left do we send IPIs.
>
> NOHZ_FULL already relies on this and gets massively unhappy when a new
> user comes and starts to spray IPIs.

I am curious, why is that -- is it because the new user is stealing
the shared csd's in cfd_data (see below), or some other reason ?

>
> So no; mostly we send an IPI because we _HAVE_ to, not because giggles.
>
> That said; there's still some places left where we can avoid sending
> IPIs, but in all those cases correctness mandates we actually handle
> things and not randomly not do anything.

My case too requires that the request is eventually handled, but with
this non-blocking IPI the caller has a better option than blocking:
it can either retry the multicast IPI at a later time if conditions allow,
or it can post a dedicated CSD (with the advantage that being my
requests idempotent, if the CSD is locked there is no need to retry
because it means the handler has not started yet).

In fact, if we had the option to use dedicated CSDs for multicast IPI,
we wouldn't even need to retry because we'd know that the posted CSD
is for our call back and not someone else's.

cheers
luigi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-20 10:41       ` Luigi Rizzo
@ 2021-04-20 13:31         ` Peter Zijlstra
  2021-04-20 14:40           ` Luigi Rizzo
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2021-04-20 13:31 UTC (permalink / raw)
  To: Luigi Rizzo; +Cc: linux-kernel, axboe, paulmck

On Tue, Apr 20, 2021 at 12:41:08PM +0200, Luigi Rizzo wrote:
> On Tue, Apr 20, 2021 at 11:14 AM Peter Zijlstra <peterz@infradead.org> wrote:

> > We mostly try and avoid using this stuff wherever possible. Only when
> > no other choice is left do we send IPIs.
> >
> > NOHZ_FULL already relies on this and gets massively unhappy when a new
> > user comes and starts to spray IPIs.
> 
> I am curious, why is that -- is it because the new user is stealing
> the shared csd's in cfd_data (see below), or some other reason ?

The premise of NOHZ_FULL is that it will not be interrupted. There are
users who are working on a mode where any interruption will cause a
(fatal) signal.

> > So no; mostly we send an IPI because we _HAVE_ to, not because giggles.
> >
> > That said; there's still some places left where we can avoid sending
> > IPIs, but in all those cases correctness mandates we actually handle
> > things and not randomly not do anything.
> 
> My case too requires that the request is eventually handled, but with
> this non-blocking IPI the caller has a better option than blocking:
> it can either retry the multicast IPI at a later time if conditions allow,
> or it can post a dedicated CSD (with the advantage that being my
> requests idempotent, if the CSD is locked there is no need to retry
> because it means the handler has not started yet).
> 
> In fact, if we had the option to use dedicated CSDs for multicast IPI,
> we wouldn't even need to retry because we'd know that the posted CSD
> is for our call back and not someone else's.

What are you doing that CSD contention is such a problem?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] smp: add a best_effort version of smp_call_function_many()
  2021-04-20 13:31         ` Peter Zijlstra
@ 2021-04-20 14:40           ` Luigi Rizzo
  0 siblings, 0 replies; 12+ messages in thread
From: Luigi Rizzo @ 2021-04-20 14:40 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, axboe, paulmck

On Tue, Apr 20, 2021 at 3:33 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Apr 20, 2021 at 12:41:08PM +0200, Luigi Rizzo wrote:
> > On Tue, Apr 20, 2021 at 11:14 AM Peter Zijlstra <peterz@infradead.org> wrote:
...
> > My case too requires that the request is eventually handled, but with
> > this non-blocking IPI the caller has a better option than blocking:
> > it can either retry the multicast IPI at a later time if conditions allow,
> > or it can post a dedicated CSD (with the advantage that being my
> > requests idempotent, if the CSD is locked there is no need to retry
> > because it means the handler has not started yet).
> >
> > In fact, if we had the option to use dedicated CSDs for multicast IPI,
> > we wouldn't even need to retry because we'd know that the posted CSD
> > is for our call back and not someone else's.
>
> What are you doing that CSD contention is such a problem?

Basically what I said in a previous email: send a targeted interrupt to a
subset of the CPUs (large enough that the multicast IPI makes sense) so
they can start doing some work that has been posted for them.
Not too different from RFS, in a way.

The sender doesn't need (or want, obviously) to block, but occasional
O(100+us) stalls were clearly visible, and trivial to reproduce in tests
(e.g. when the process on the target CPU runs getrusage() and has
a very large number of threads, even if idle ones).

Even the _cond() version is not a sufficient to avoid the stall:
I could in principle use the callback to skip CPUs for which I
have a request posted and not processed yet, but if the csd
is in use by another pending IPI I have no alternative but spin.

cheers
luigi

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-04-20 14:40 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-19 18:44 [PATCH] smp: add a best_effort version of smp_call_function_many() Luigi Rizzo
2021-04-19 18:52 ` Randy Dunlap
2021-04-19 19:17 ` Peter Zijlstra
2021-04-19 21:07   ` Luigi Rizzo
2021-04-20  9:13     ` Peter Zijlstra
2021-04-20 10:41       ` Luigi Rizzo
2021-04-20 13:31         ` Peter Zijlstra
2021-04-20 14:40           ` Luigi Rizzo
2021-04-19 22:22 ` kernel test robot
2021-04-19 22:22   ` kernel test robot
2021-04-20  1:38 ` kernel test robot
2021-04-20  1:38   ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.