From: Linus Torvalds <torvalds@linux-foundation.org>
To: Rafael David Tinoco <inaddy@ubuntu.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Jens Axboe <axboe@kernel.dk>,
Frederic Weisbecker <fweisbec@gmail.com>
Subject: Re: smp_call_function_single lockups
Date: Wed, 11 Feb 2015 12:42:10 -0800 [thread overview]
Message-ID: <CA+55aFz492bzLFhdbKN-Hygjcreup7CjMEYk3nTSfRWjppz-OA@mail.gmail.com> (raw)
In-Reply-To: <CA+55aFyGkOCVGD3Ds7Wt1z9Dw7cmk_yXw7YwruACQh5QAXOQvQ@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1260 bytes --]
[ Added Frederic to the cc, since he's touched this file/area most ]
On Wed, Feb 11, 2015 at 11:59 AM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> So the caller has a really hard time guaranteeing that CSD_LOCK isn't
> set. And if the call is done in interrupt context, for all we know it
> is interrupting the code that is going to clear CSD_LOCK, so CSD_LOCK
> will never be cleared at all, and csd_lock() will wait forever.
>
> So I actually think that for the async case, we really *should* unlock
> before doing the callback (which is what Thomas' old patch did).
>
> And we migth well be better off doing something like
>
> WARN_ON_ONCE(csd->flags & CSD_LOCK);
>
> in smp_call_function_single_async(), because that really is a hard requirement.
>
> And it strikes me that hrtick_csd is one of these cases that do this
> with interrupts disabled, and use the callback for serialization. So I
> really wonder if this is part of the problem..
>
> Thomas? Am I missing something?
Ok, this is a more involved patch than I'd like, but making the
*caller* do all the CSD maintenance actually cleans things up.
And this is still completely untested, and may be entirely buggy. What
do you guys think?
Linus
[-- Attachment #2: patch.diff --]
[-- Type: text/plain, Size: 4698 bytes --]
kernel/smp.c | 78 ++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 47 insertions(+), 31 deletions(-)
diff --git a/kernel/smp.c b/kernel/smp.c
index f38a1e692259..2aaac2c47683 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -19,7 +19,7 @@
enum {
CSD_FLAG_LOCK = 0x01,
- CSD_FLAG_WAIT = 0x02,
+ CSD_FLAG_SYNCHRONOUS = 0x02,
};
struct call_function_data {
@@ -107,7 +107,7 @@ void __init call_function_init(void)
*/
static void csd_lock_wait(struct call_single_data *csd)
{
- while (csd->flags & CSD_FLAG_LOCK)
+ while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
cpu_relax();
}
@@ -121,19 +121,17 @@ static void csd_lock(struct call_single_data *csd)
* to ->flags with any subsequent assignments to other
* fields of the specified call_single_data structure:
*/
- smp_mb();
+ smp_wmb();
}
static void csd_unlock(struct call_single_data *csd)
{
- WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK));
+ WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
/*
* ensure we're all done before releasing data:
*/
- smp_mb();
-
- csd->flags &= ~CSD_FLAG_LOCK;
+ smp_store_release(&csd->flags, 0);
}
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
@@ -144,13 +142,16 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
* ->func, ->info, and ->flags set.
*/
static int generic_exec_single(int cpu, struct call_single_data *csd,
- smp_call_func_t func, void *info, int wait)
+ smp_call_func_t func, void *info)
{
- struct call_single_data csd_stack = { .flags = 0 };
- unsigned long flags;
-
-
if (cpu == smp_processor_id()) {
+ unsigned long flags;
+
+ /*
+ * We can unlock early even for the synchronous on-stack case,
+ * since we're doing this from the same CPU..
+ */
+ csd_unlock(csd);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
@@ -161,21 +162,9 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO;
-
- if (!csd) {
- csd = &csd_stack;
- if (!wait)
- csd = this_cpu_ptr(&csd_data);
- }
-
- csd_lock(csd);
-
csd->func = func;
csd->info = info;
- if (wait)
- csd->flags |= CSD_FLAG_WAIT;
-
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
@@ -190,9 +179,6 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
arch_send_call_function_single_ipi(cpu);
- if (wait)
- csd_lock_wait(csd);
-
return 0;
}
@@ -250,8 +236,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
}
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
- csd->func(csd->info);
- csd_unlock(csd);
+ smp_call_func_t func = csd->func;
+ void *info = csd->info;
+
+ /* Do we wait until *after* callback? */
+ if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
+ func(info);
+ csd_unlock(csd);
+ } else {
+ csd_unlock(csd);
+ func(info);
+ }
}
/*
@@ -274,6 +269,8 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
int wait)
{
+ struct call_single_data *csd;
+ struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS };
int this_cpu;
int err;
@@ -292,7 +289,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
- err = generic_exec_single(cpu, NULL, func, info, wait);
+ csd = &csd_stack;
+ if (!wait) {
+ csd = this_cpu_ptr(&csd_data);
+ csd_lock(csd);
+ }
+
+ err = generic_exec_single(cpu, csd, func, info);
+
+ if (wait)
+ csd_lock_wait(csd);
put_cpu();
@@ -321,7 +327,15 @@ int smp_call_function_single_async(int cpu, struct call_single_data *csd)
int err = 0;
preempt_disable();
- err = generic_exec_single(cpu, csd, csd->func, csd->info, 0);
+
+ /* We could deadlock if we have to wait here with interrupts disabled! */
+ if (WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK))
+ csd_lock_wait(csd);
+
+ csd->flags = CSD_FLAG_LOCK;
+ smp_wmb();
+
+ err = generic_exec_single(cpu, csd, csd->func, csd->info);
preempt_enable();
return err;
@@ -433,6 +447,8 @@ void smp_call_function_many(const struct cpumask *mask,
struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock(csd);
+ if (wait)
+ csd->flags |= CSD_FLAG_SYNCHRONOUS;
csd->func = func;
csd->info = info;
llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
next prev parent reply other threads:[~2015-02-11 20:42 UTC|newest]
Thread overview: 77+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-02-11 13:19 smp_call_function_single lockups Rafael David Tinoco
2015-02-11 18:18 ` Linus Torvalds
2015-02-11 19:59 ` Linus Torvalds
2015-02-11 20:42 ` Linus Torvalds [this message]
2015-02-12 16:38 ` Rafael David Tinoco
2015-02-18 22:25 ` Peter Zijlstra
2015-02-19 15:42 ` Rafael David Tinoco
2015-02-19 16:14 ` Linus Torvalds
2015-02-23 14:01 ` Rafael David Tinoco
2015-02-23 19:32 ` Linus Torvalds
2015-02-23 20:50 ` Peter Zijlstra
2015-02-23 21:02 ` Rafael David Tinoco
2015-02-19 16:16 ` Peter Zijlstra
2015-02-19 16:26 ` Linus Torvalds
2015-02-19 16:32 ` Rafael David Tinoco
2015-02-19 16:59 ` Linus Torvalds
2015-02-19 17:30 ` Rafael David Tinoco
2015-02-19 17:39 ` Linus Torvalds
2015-02-19 20:29 ` Linus Torvalds
2015-02-19 21:59 ` Linus Torvalds
2015-02-19 22:45 ` Linus Torvalds
2015-03-31 3:15 ` Chris J Arges
2015-03-31 4:28 ` Linus Torvalds
2015-03-31 10:56 ` [debug PATCHes] " Ingo Molnar
2015-03-31 22:38 ` Chris J Arges
2015-04-01 12:39 ` Ingo Molnar
2015-04-01 14:10 ` Chris J Arges
2015-04-01 14:55 ` Ingo Molnar
2015-03-31 4:46 ` Linus Torvalds
2015-03-31 15:08 ` Linus Torvalds
2015-03-31 22:23 ` Chris J Arges
2015-03-31 23:07 ` Linus Torvalds
2015-04-01 14:32 ` Chris J Arges
2015-04-01 15:36 ` Linus Torvalds
2015-04-02 9:55 ` Ingo Molnar
2015-04-02 17:35 ` Linus Torvalds
2015-04-01 12:43 ` Ingo Molnar
2015-04-01 16:10 ` Chris J Arges
2015-04-01 16:14 ` Linus Torvalds
2015-04-01 21:59 ` Chris J Arges
2015-04-02 17:31 ` Linus Torvalds
2015-04-02 18:26 ` Ingo Molnar
2015-04-02 18:51 ` Chris J Arges
2015-04-02 19:07 ` Ingo Molnar
2015-04-02 20:57 ` Linus Torvalds
2015-04-02 21:13 ` Chris J Arges
2015-04-03 5:43 ` [PATCH] smp/call: Detect stuck CSD locks Ingo Molnar
2015-04-03 5:47 ` Ingo Molnar
2015-04-06 16:58 ` Chris J Arges
2015-04-06 17:32 ` Linus Torvalds
2015-04-07 9:21 ` Ingo Molnar
2015-04-07 20:59 ` Chris J Arges
2015-04-07 21:15 ` Linus Torvalds
2015-04-08 6:47 ` Ingo Molnar
2015-04-13 3:56 ` Chris J Arges
2015-04-13 6:14 ` Ingo Molnar
2015-04-15 19:54 ` Chris J Arges
2015-04-16 11:04 ` Ingo Molnar
2015-04-16 15:58 ` Chris J Arges
2015-04-16 16:31 ` Ingo Molnar
2015-04-29 21:08 ` Chris J Arges
2015-05-11 14:00 ` Ingo Molnar
2015-05-20 18:19 ` Chris J Arges
2015-04-03 5:45 ` smp_call_function_single lockups Ingo Molnar
2015-04-06 17:23 ` Chris J Arges
2015-02-20 9:30 ` Ingo Molnar
2015-02-20 16:49 ` Linus Torvalds
2015-02-20 19:41 ` Ingo Molnar
2015-02-20 20:03 ` Linus Torvalds
2015-02-20 20:11 ` Ingo Molnar
2015-03-20 10:15 ` Peter Zijlstra
2015-03-20 16:26 ` Linus Torvalds
2015-03-20 17:14 ` Mike Galbraith
2015-04-01 14:22 ` Frederic Weisbecker
2015-04-18 10:13 ` [tip:locking/urgent] smp: Fix smp_call_function_single_async() locking tip-bot for Linus Torvalds
2015-02-22 8:59 smp_call_function_single lockups Daniel J Blueman
2015-02-22 10:37 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CA+55aFz492bzLFhdbKN-Hygjcreup7CjMEYk3nTSfRWjppz-OA@mail.gmail.com \
--to=torvalds@linux-foundation.org \
--cc=axboe@kernel.dk \
--cc=fweisbec@gmail.com \
--cc=inaddy@ubuntu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).