All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] pstore: Convert buf_lock to semaphore
@ 2018-11-30 22:47 Kees Cook
  2018-11-30 22:51 ` Arnd Bergmann
  2018-12-04 15:40 ` Sebastian Andrzej Siewior
  0 siblings, 2 replies; 12+ messages in thread
From: Kees Cook @ 2018-11-30 22:47 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Thomas Gleixner, Borislav Petkov, linux-efi, Ard Biesheuvel,
	Anton Vorontsov, Colin Cross, Tony Luck, linux-kernel

Instead of running with interrupts disabled, use a semaphore. This should
make it easier for backends that may need to sleep (e.g. EFI) when
performing a write:

|BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
|in_atomic(): 1, irqs_disabled(): 1, pid: 2236, name: sig-xstate-bum
|Preemption disabled at:
|[<ffffffff99d60512>] pstore_dump+0x72/0x330
|CPU: 26 PID: 2236 Comm: sig-xstate-bum Tainted: G      D           4.20.0-rc3 #45
|Call Trace:
| dump_stack+0x4f/0x6a
| ___might_sleep.cold.91+0xd3/0xe4
| __might_sleep+0x50/0x90
| wait_for_completion+0x32/0x130
| virt_efi_query_variable_info+0x14e/0x160
| efi_query_variable_store+0x51/0x1a0
| efivar_entry_set_safe+0xa3/0x1b0
| efi_pstore_write+0x109/0x140
| pstore_dump+0x11c/0x330
| kmsg_dump+0xa4/0xd0
| oops_exit+0x22/0x30
...

Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Fixes: 21b3ddd39fee ("efi: Don't use spinlocks for efi vars")
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/powerpc/kernel/nvram_64.c    |  2 --
 drivers/acpi/apei/erst.c          |  1 -
 drivers/firmware/efi/efi-pstore.c |  4 +--
 fs/pstore/platform.c              | 41 +++++++++++++++----------------
 fs/pstore/ram.c                   |  1 -
 include/linux/pstore.h            |  7 +++---
 6 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 22e9d281324d..e7d4ce6964ae 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -563,8 +563,6 @@ static int nvram_pstore_init(void)
 	nvram_pstore_info.buf = oops_data;
 	nvram_pstore_info.bufsize = oops_data_sz;
 
-	spin_lock_init(&nvram_pstore_info.buf_lock);
-
 	rc = pstore_register(&nvram_pstore_info);
 	if (rc && (rc != -EPERM))
 		/* Print error only when pstore.backend == nvram */
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index a5e1d963208e..9953e50667ec 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1176,7 +1176,6 @@ static int __init erst_init(void)
 	"Error Record Serialization Table (ERST) support is initialized.\n");
 
 	buf = kmalloc(erst_erange.size, GFP_KERNEL);
-	spin_lock_init(&erst_info.buf_lock);
 	if (buf) {
 		erst_info.buf = buf + sizeof(struct cper_pstore_record);
 		erst_info.bufsize = erst_erange.size -
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index cfe87b465819..0f7d97917197 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -259,8 +259,7 @@ static int efi_pstore_write(struct pstore_record *record)
 		efi_name[i] = name[i];
 
 	ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
-			      !pstore_cannot_block_path(record->reason),
-			      record->size, record->psi->buf);
+			      preemptible(), record->size, record->psi->buf);
 
 	if (record->reason == KMSG_DUMP_OOPS)
 		efivar_run_worker();
@@ -369,7 +368,6 @@ static __init int efivars_pstore_init(void)
 		return -ENOMEM;
 
 	efi_pstore_info.bufsize = 1024;
-	spin_lock_init(&efi_pstore_info.buf_lock);
 
 	if (pstore_register(&efi_pstore_info)) {
 		kfree(efi_pstore_info.buf);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 2387cb74f729..afdfd3687f94 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -161,26 +161,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
 	}
 }
 
-bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
+/*
+ * Should pstore_dump() wait for a concurrent pstore_dump()? If
+ * not, the current pstore_dump() will report a failure to dump
+ * and return.
+ */
+static bool pstore_cannot_wait(enum kmsg_dump_reason reason)
 {
-	/*
-	 * In case of NMI path, pstore shouldn't be blocked
-	 * regardless of reason.
-	 */
+	/* In NMI path, pstore shouldn't block regardless of reason. */
 	if (in_nmi())
 		return true;
 
 	switch (reason) {
 	/* In panic case, other cpus are stopped by smp_send_stop(). */
 	case KMSG_DUMP_PANIC:
-	/* Emergency restart shouldn't be blocked by spin lock. */
+	/* Emergency restart shouldn't be blocked. */
 	case KMSG_DUMP_EMERG:
 		return true;
 	default:
 		return false;
 	}
 }
-EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
 
 #if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
 static int zbufsize_deflate(size_t size)
@@ -400,23 +401,20 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	unsigned long	total = 0;
 	const char	*why;
 	unsigned int	part = 1;
-	unsigned long	flags = 0;
-	int		is_locked;
 	int		ret;
 
 	why = get_reason_str(reason);
 
-	if (pstore_cannot_block_path(reason)) {
-		is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
-		if (!is_locked) {
-			pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
-				       , in_nmi() ? "NMI" : why);
+	if (down_trylock(&psinfo->buf_lock)) {
+		/* Failed to acquire lock: give up if we cannot wait. */
+		if (pstore_cannot_wait(reason)) {
+			pr_err("dump skipped in %s path: may corrupt error record\n",
+				in_nmi() ? "NMI" : why);
 			return;
 		}
-	} else {
-		spin_lock_irqsave(&psinfo->buf_lock, flags);
-		is_locked = 1;
+		down_interruptible(&psinfo->buf_lock);
 	}
+
 	oopscount++;
 	while (total < kmsg_bytes) {
 		char *dst;
@@ -433,7 +431,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		record.part = part;
 		record.buf = psinfo->buf;
 
-		if (big_oops_buf && is_locked) {
+		if (big_oops_buf) {
 			dst = big_oops_buf;
 			dst_size = big_oops_buf_sz;
 		} else {
@@ -451,7 +449,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 					  dst_size, &dump_size))
 			break;
 
-		if (big_oops_buf && is_locked) {
+		if (big_oops_buf) {
 			zipped_len = pstore_compress(dst, psinfo->buf,
 						header_size + dump_size,
 						psinfo->bufsize);
@@ -474,8 +472,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		total += record.size;
 		part++;
 	}
-	if (is_locked)
-		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+
+	up(&psinfo->buf_lock);
 }
 
 static struct kmsg_dumper pstore_dumper = {
@@ -594,6 +592,7 @@ int pstore_register(struct pstore_info *psi)
 		psi->write_user = pstore_write_user_compat;
 	psinfo = psi;
 	mutex_init(&psinfo->read_mutex);
+	sema_init(&psinfo->buf_lock, 1);
 	spin_unlock(&pstore_lock);
 
 	if (owner && !try_module_get(owner)) {
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 202eaa82bcc6..e6d9560ea455 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -815,7 +815,6 @@ static int ramoops_probe(struct platform_device *pdev)
 		err = -ENOMEM;
 		goto fail_clear;
 	}
-	spin_lock_init(&cxt->pstore.buf_lock);
 
 	cxt->pstore.flags = PSTORE_FLAGS_DMESG;
 	if (cxt->console_size)
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index a9ec285d85d1..b146181e8709 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -26,7 +26,7 @@
 #include <linux/errno.h>
 #include <linux/kmsg_dump.h>
 #include <linux/mutex.h>
-#include <linux/spinlock.h>
+#include <linux/semaphore.h>
 #include <linux/time.h>
 #include <linux/types.h>
 
@@ -99,7 +99,7 @@ struct pstore_record {
  * @owner:	module which is responsible for this backend driver
  * @name:	name of the backend driver
  *
- * @buf_lock:	spinlock to serialize access to @buf
+ * @buf_lock:	semaphore to serialize access to @buf
  * @buf:	preallocated crash dump buffer
  * @bufsize:	size of @buf available for crash dump bytes (must match
  *		smallest number of bytes available for writing to a
@@ -184,7 +184,7 @@ struct pstore_info {
 	struct module	*owner;
 	char		*name;
 
-	spinlock_t	buf_lock;
+	struct semaphore buf_lock;
 	char		*buf;
 	size_t		bufsize;
 
@@ -210,7 +210,6 @@ struct pstore_info {
 
 extern int pstore_register(struct pstore_info *);
 extern void pstore_unregister(struct pstore_info *);
-extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason);
 
 struct pstore_ftrace_record {
 	unsigned long ip;
-- 
2.17.1


-- 
Kees Cook

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-11-30 22:47 [PATCH] pstore: Convert buf_lock to semaphore Kees Cook
@ 2018-11-30 22:51 ` Arnd Bergmann
  2018-12-01  2:42   ` Kees Cook
  2018-12-04 15:40 ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 12+ messages in thread
From: Arnd Bergmann @ 2018-11-30 22:51 UTC (permalink / raw)
  To: Kees Cook
  Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Borislav Petkov,
	linux-efi, Ard Biesheuvel, Anton Vorontsov, Colin Cross,
	Tony Luck, Linux Kernel Mailing List

On Fri, Nov 30, 2018 at 11:48 PM Kees Cook <keescook@chromium.org> wrote:
>
> Instead of running with interrupts disabled, use a semaphore. This should
> make it easier for backends that may need to sleep (e.g. EFI) when
> performing a write:
>
> |BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
> |in_atomic(): 1, irqs_disabled(): 1, pid: 2236, name: sig-xstate-bum
> |Preemption disabled at:
> |[<ffffffff99d60512>] pstore_dump+0x72/0x330
> |CPU: 26 PID: 2236 Comm: sig-xstate-bum Tainted: G      D           4.20.0-rc3 #45
> |Call Trace:
> | dump_stack+0x4f/0x6a
> | ___might_sleep.cold.91+0xd3/0xe4
> | __might_sleep+0x50/0x90
> | wait_for_completion+0x32/0x130
> | virt_efi_query_variable_info+0x14e/0x160
> | efi_query_variable_store+0x51/0x1a0
> | efivar_entry_set_safe+0xa3/0x1b0
> | efi_pstore_write+0x109/0x140
> | pstore_dump+0x11c/0x330
> | kmsg_dump+0xa4/0xd0
> | oops_exit+0x22/0x30
> ...
>
> Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Fixes: 21b3ddd39fee ("efi: Don't use spinlocks for efi vars")
> Signed-off-by: Kees Cook <keescook@chromium.org>

Hmm, I've actually been working on a patch set recently to deprecate
all semaphores from the kernel and replace them with something
else as much as possible.

Why can't this be a mutex instead?

      Arnd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-11-30 22:51 ` Arnd Bergmann
@ 2018-12-01  2:42   ` Kees Cook
  2018-12-01  8:42     ` Arnd Bergmann
  0 siblings, 1 reply; 12+ messages in thread
From: Kees Cook @ 2018-12-01  2:42 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Borislav Petkov,
	linux-efi, Ard Biesheuvel, Anton Vorontsov, Colin Cross,
	Tony Luck, LKML

On Fri, Nov 30, 2018 at 2:52 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Fri, Nov 30, 2018 at 11:48 PM Kees Cook <keescook@chromium.org> wrote:
> >
> > Instead of running with interrupts disabled, use a semaphore. This should
> > make it easier for backends that may need to sleep (e.g. EFI) when
> > performing a write:
> >
> > |BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
> > |in_atomic(): 1, irqs_disabled(): 1, pid: 2236, name: sig-xstate-bum
> > |Preemption disabled at:
> > |[<ffffffff99d60512>] pstore_dump+0x72/0x330
> > |CPU: 26 PID: 2236 Comm: sig-xstate-bum Tainted: G      D           4.20.0-rc3 #45
> > |Call Trace:
> > | dump_stack+0x4f/0x6a
> > | ___might_sleep.cold.91+0xd3/0xe4
> > | __might_sleep+0x50/0x90
> > | wait_for_completion+0x32/0x130
> > | virt_efi_query_variable_info+0x14e/0x160
> > | efi_query_variable_store+0x51/0x1a0
> > | efivar_entry_set_safe+0xa3/0x1b0
> > | efi_pstore_write+0x109/0x140
> > | pstore_dump+0x11c/0x330
> > | kmsg_dump+0xa4/0xd0
> > | oops_exit+0x22/0x30
> > ...
> >
> > Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > Fixes: 21b3ddd39fee ("efi: Don't use spinlocks for efi vars")
> > Signed-off-by: Kees Cook <keescook@chromium.org>
>
> Hmm, I've actually been working on a patch set recently to deprecate
> all semaphores from the kernel and replace them with something
> else as much as possible.
>
> Why can't this be a mutex instead?

My understanding is that I can't use a mutex in interrupt context
(Documentation/kernel-hacking/locking.rst) and pstore_dump() needs to
handle being called from anywhere. I'm surprised it's managed to get
away with using a spinlock for this long. :P

-Kees

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-12-01  2:42   ` Kees Cook
@ 2018-12-01  8:42     ` Arnd Bergmann
  2018-12-03 11:18       ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 12+ messages in thread
From: Arnd Bergmann @ 2018-12-01  8:42 UTC (permalink / raw)
  To: Kees Cook
  Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Borislav Petkov,
	linux-efi, Ard Biesheuvel, Anton Vorontsov, Colin Cross,
	Tony Luck, Linux Kernel Mailing List

On Sat, Dec 1, 2018 at 3:42 AM Kees Cook <keescook@chromium.org> wrote:
> On Fri, Nov 30, 2018 at 2:52 PM Arnd Bergmann <arnd@arndb.de> wrote:
> > On Fri, Nov 30, 2018 at 11:48 PM Kees Cook <keescook@chromium.org> wrote:
> > >
> > > |BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
> > > |in_atomic(): 1, irqs_disabled(): 1, pid: 2236, name: sig-xstate-bum
> > > |Preemption disabled at:
> > > |[<ffffffff99d60512>] pstore_dump+0x72/0x330
> > > |CPU: 26 PID: 2236 Comm: sig-xstate-bum Tainted: G      D           4.20.0-rc3 #45
> > > |Call Trace:
> > > | dump_stack+0x4f/0x6a
> > > | ___might_sleep.cold.91+0xd3/0xe4
> > > | __might_sleep+0x50/0x90
> > > | wait_for_completion+0x32/0x130
> > > | virt_efi_query_variable_info+0x14e/0x160
> > > | efi_query_variable_store+0x51/0x1a0
> > > | efivar_entry_set_safe+0xa3/0x1b0
> > > | efi_pstore_write+0x109/0x140
> > > | pstore_dump+0x11c/0x330
> > > | kmsg_dump+0xa4/0xd0
> > > | oops_exit+0x22/0x30
> > > ...
> > >
> > > Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > Fixes: 21b3ddd39fee ("efi: Don't use spinlocks for efi vars")
> > > Signed-off-by: Kees Cook <keescook@chromium.org>
> >
> > Hmm, I've actually been working on a patch set recently to deprecate
> > all semaphores from the kernel and replace them with something
> > else as much as possible.
> >
> > Why can't this be a mutex instead?
>
> My understanding is that I can't use a mutex in interrupt context
> (Documentation/kernel-hacking/locking.rst) and pstore_dump() needs to
> handle being called from anywhere. I'm surprised it's managed to get
> away with using a spinlock for this long. :P

You are right that you can't take (or release) a mutex from interrupt
context. However, I don't think converting a spinlock to a semaphore
is going to help here either.

spinlock (or raw_spinlock) is generally the only thing that can be the
innermost lock that you take in any atomic context, and using
down_trylock doesn't make the context less atomic than it already is.

virt_efi_query_variable_info() however waits for a completion
and a semaphore, so that must not be called in atomic context.
Holding a semaphore instead of a spinlock is not going to help you
here, since the interrupt context means you might already be holding
arbitrary locks.

        Arnd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-12-01  8:42     ` Arnd Bergmann
@ 2018-12-03 11:18       ` Sebastian Andrzej Siewior
  2018-12-03 16:26         ` Arnd Bergmann
  0 siblings, 1 reply; 12+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-12-03 11:18 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Kees Cook, Thomas Gleixner, Borislav Petkov, linux-efi,
	Ard Biesheuvel, Anton Vorontsov, Colin Cross, Tony Luck,
	Linux Kernel Mailing List

On 2018-12-01 09:42:38 [+0100], Arnd Bergmann wrote:
> You are right that you can't take (or release) a mutex from interrupt
> context. However, I don't think converting a spinlock to a semaphore
> is going to help here either.

you can acquire a semaphore with a try_lock from interrupts context but
you can't do that with a mutex. You can also a acquire a semaphore in
one context and release in another. Not that I'm a fan of those things
but those two are often the reasons why people stick with a semaphore.

I haven't looked a general picture yet, will try to do so later today or
tomorrow.

>         Arnd

Sebastian

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-12-03 11:18       ` Sebastian Andrzej Siewior
@ 2018-12-03 16:26         ` Arnd Bergmann
  2018-12-04  0:28           ` Kees Cook
  0 siblings, 1 reply; 12+ messages in thread
From: Arnd Bergmann @ 2018-12-03 16:26 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Kees Cook, Thomas Gleixner, Borislav Petkov, linux-efi,
	Ard Biesheuvel, Anton Vorontsov, Colin Cross, Tony Luck,
	Linux Kernel Mailing List

On Mon, Dec 3, 2018 at 12:18 PM Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
>
> On 2018-12-01 09:42:38 [+0100], Arnd Bergmann wrote:
> > You are right that you can't take (or release) a mutex from interrupt
> > context. However, I don't think converting a spinlock to a semaphore
> > is going to help here either.
>
> you can acquire a semaphore with a try_lock from interrupts context but
> you can't do that with a mutex. You can also a acquire a semaphore in
> one context and release in another.

Right, that is the obvious part.

> I haven't looked a general picture yet, will try to do so later today or
> tomorrow.

To speed this up, the problem I'm referring to is in
virt_efi_query_variable_info() and efi_queue_work(),
as in the original BUG_ON() that Kees quoted:

> BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
> |in_atomic(): 1, irqs_disabled(): 1, pid: 2236, name: sig-xstate-bum
> |Preemption disabled at:
> |[<ffffffff99d60512>] pstore_dump+0x72/0x330
> |CPU: 26 PID: 2236 Comm: sig-xstate-bum Tainted: G      D           4.20.0-rc3 #45
> |Call Trace:
> | dump_stack+0x4f/0x6a
> | ___might_sleep.cold.91+0xd3/0xe4
> | __might_sleep+0x50/0x90
> | wait_for_completion+0x32/0x130
> | virt_efi_query_variable_info+0x14e/0x160
> | efi_query_variable_store+0x51/0x1a0
> | efivar_entry_set_safe+0xa3/0x1b0
> | efi_pstore_write+0x109/0x140
> | pstore_dump+0x11c/0x330
> | kmsg_dump+0xa4/0xd0
> | oops_exit+0x22/0x30

This will no longer happen when pstore is called from process
context with his patch, but we still get the same thing if we call
pstore from interrupt context, unless both the down_interruptible
and wait_for_completion in there are also changed to
nonblocking calls. However, once they are no longer blocking,
we don't need the outer lock to be changed from spinlock
to semaphore any more either.

        Arnd

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-12-03 16:26         ` Arnd Bergmann
@ 2018-12-04  0:28           ` Kees Cook
  0 siblings, 0 replies; 12+ messages in thread
From: Kees Cook @ 2018-12-04  0:28 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Sebastian Andrzej Siewior, Thomas Gleixner, Borislav Petkov,
	linux-efi, Ard Biesheuvel, Anton Vorontsov, Colin Cross,
	Tony Luck, LKML

On Mon, Dec 3, 2018 at 8:26 AM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Mon, Dec 3, 2018 at 12:18 PM Sebastian Andrzej Siewior
> <bigeasy@linutronix.de> wrote:
> >
> > On 2018-12-01 09:42:38 [+0100], Arnd Bergmann wrote:
> > > You are right that you can't take (or release) a mutex from interrupt
> > > context. However, I don't think converting a spinlock to a semaphore
> > > is going to help here either.
> >
> > you can acquire a semaphore with a try_lock from interrupts context but
> > you can't do that with a mutex. You can also a acquire a semaphore in
> > one context and release in another.
>
> Right, that is the obvious part.
>
> > I haven't looked a general picture yet, will try to do so later today or
> > tomorrow.
>
> To speed this up, the problem I'm referring to is in
> virt_efi_query_variable_info() and efi_queue_work(),
> as in the original BUG_ON() that Kees quoted:
>
> > BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
> > |in_atomic(): 1, irqs_disabled(): 1, pid: 2236, name: sig-xstate-bum
> > |Preemption disabled at:
> > |[<ffffffff99d60512>] pstore_dump+0x72/0x330
> > |CPU: 26 PID: 2236 Comm: sig-xstate-bum Tainted: G      D           4.20.0-rc3 #45
> > |Call Trace:
> > | dump_stack+0x4f/0x6a
> > | ___might_sleep.cold.91+0xd3/0xe4
> > | __might_sleep+0x50/0x90
> > | wait_for_completion+0x32/0x130
> > | virt_efi_query_variable_info+0x14e/0x160
> > | efi_query_variable_store+0x51/0x1a0
> > | efivar_entry_set_safe+0xa3/0x1b0
> > | efi_pstore_write+0x109/0x140
> > | pstore_dump+0x11c/0x330
> > | kmsg_dump+0xa4/0xd0
> > | oops_exit+0x22/0x30
>
> This will no longer happen when pstore is called from process
> context with his patch, but we still get the same thing if we call
> pstore from interrupt context, unless both the down_interruptible
> and wait_for_completion in there are also changed to
> nonblocking calls. However, once they are no longer blocking,
> we don't need the outer lock to be changed from spinlock
> to semaphore any more either.

My proposed patch was trying to do two things:

- have pstore not make things _worse_ (do not hold a spin lock)
- use preemptible() in efi pstore backend to get it right:

        ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
-                             !pstore_cannot_block_path(record->reason),
-                             record->size, record->psi->buf);
+                             preemptible(), record->size, record->psi->buf);

IIUC, that would make efi-vars take the nonblocking path so we don't
trip over the might_sleep().

-Kees

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-11-30 22:47 [PATCH] pstore: Convert buf_lock to semaphore Kees Cook
  2018-11-30 22:51 ` Arnd Bergmann
@ 2018-12-04 15:40 ` Sebastian Andrzej Siewior
  2018-12-04 16:17   ` Kees Cook
  2018-12-04 17:23   ` Kees Cook
  1 sibling, 2 replies; 12+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-12-04 15:40 UTC (permalink / raw)
  To: Kees Cook
  Cc: Thomas Gleixner, Borislav Petkov, linux-efi, Ard Biesheuvel,
	Anton Vorontsov, Colin Cross, Tony Luck, linux-kernel

On 2018-11-30 14:47:36 [-0800], Kees Cook wrote:
> diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
> index cfe87b465819..0f7d97917197 100644
> --- a/drivers/firmware/efi/efi-pstore.c
> +++ b/drivers/firmware/efi/efi-pstore.c
> @@ -259,8 +259,7 @@ static int efi_pstore_write(struct pstore_record *record)
>  		efi_name[i] = name[i];
>  
>  	ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
> -			      !pstore_cannot_block_path(record->reason),
> -			      record->size, record->psi->buf);
> +			      preemptible(), record->size, record->psi->buf);

Well. Better I think.
might_sleep() / preempt_count_equals() checks for preemptible() + rcu_preempt_depth().
kmsg_dump() starts with rcu_read_lock() which means with this patch applied I
got:

| BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
| in_atomic(): 0, irqs_disabled(): 0, pid: 2286, name: sig-xstate-bum PC: 0 RCU: 1
| Preemption disabled at:
| [<ffffffff9b959085>] __queue_work+0x95/0x440
| CPU: 30 PID: 2286 Comm: sig-xstate-bum Tainted: G      D           4.20.0-rc3+ #90
| Call Trace:
|  dump_stack+0x4f/0x6a
|  ___might_sleep.cold.91+0xef/0x100
|  __might_sleep+0x50/0x90
|  wait_for_completion+0x32/0x130
|  virt_efi_query_variable_info+0x14e/0x160
|  efi_query_variable_store+0x51/0x1a0
|  efivar_entry_set_safe+0xa3/0x1b0
|  efi_pstore_write+0x110/0x140
|  pstore_dump+0x114/0x320
|  kmsg_dump+0xa4/0xd0
|  oops_exit+0x7f/0x90
|  oops_end+0x67/0xd0
|  die+0x41/0x4a
|  do_general_protection+0xc1/0x150
|  general_protection+0x1e/0x30
| RIP: 0010:__fpu__restore_sig+0x1c1/0x540

just in case you wonder why both counter are zero and it still creates
this backtrace.

>  	if (record->reason == KMSG_DUMP_OOPS)
>  		efivar_run_worker();
> diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
> index 2387cb74f729..afdfd3687f94 100644
> --- a/fs/pstore/platform.c
> +++ b/fs/pstore/platform.c
> @@ -400,23 +401,20 @@ static void pstore_dump(struct kmsg_dumper *dumper,
>  	unsigned long	total = 0;
>  	const char	*why;
>  	unsigned int	part = 1;
> -	unsigned long	flags = 0;
> -	int		is_locked;
>  	int		ret;
>  
>  	why = get_reason_str(reason);
>  
> -	if (pstore_cannot_block_path(reason)) {
> -		is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
> -		if (!is_locked) {
> -			pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
> -				       , in_nmi() ? "NMI" : why);
> +	if (down_trylock(&psinfo->buf_lock)) {
> +		/* Failed to acquire lock: give up if we cannot wait. */
> +		if (pstore_cannot_wait(reason)) {
> +			pr_err("dump skipped in %s path: may corrupt error record\n",
> +				in_nmi() ? "NMI" : why);
>  			return;
>  		}
> -	} else {
> -		spin_lock_irqsave(&psinfo->buf_lock, flags);
> -		is_locked = 1;
> +		down_interruptible(&psinfo->buf_lock);

 In function ‘pstore_dump’:
fs/pstore/platform.c:393:3: warning: ignoring return value of ‘down_interruptible’, declared with attribute warn_unused_result [-Wunused-result]
   down_interruptible(&psinfo->buf_lock);
   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

>  	}

Sebastian

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-12-04 15:40 ` Sebastian Andrzej Siewior
@ 2018-12-04 16:17   ` Kees Cook
  2018-12-04 17:23   ` Kees Cook
  1 sibling, 0 replies; 12+ messages in thread
From: Kees Cook @ 2018-12-04 16:17 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Thomas Gleixner, Borislav Petkov, linux-efi, Ard Biesheuvel,
	Anton Vorontsov, Colin Cross, Tony Luck, LKML

On Tue, Dec 4, 2018 at 7:41 AM Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
>
> On 2018-11-30 14:47:36 [-0800], Kees Cook wrote:
> > diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
> > index cfe87b465819..0f7d97917197 100644
> > --- a/drivers/firmware/efi/efi-pstore.c
> > +++ b/drivers/firmware/efi/efi-pstore.c
> > @@ -259,8 +259,7 @@ static int efi_pstore_write(struct pstore_record *record)
> >               efi_name[i] = name[i];
> >
> >       ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
> > -                           !pstore_cannot_block_path(record->reason),
> > -                           record->size, record->psi->buf);
> > +                           preemptible(), record->size, record->psi->buf);
>
> Well. Better I think.
> might_sleep() / preempt_count_equals() checks for preemptible() + rcu_preempt_depth().
> kmsg_dump() starts with rcu_read_lock() which means with this patch applied I
> got:
>
> | BUG: sleeping function called from invalid context at kernel/sched/completion.c:99
> | in_atomic(): 0, irqs_disabled(): 0, pid: 2286, name: sig-xstate-bum PC: 0 RCU: 1
> | Preemption disabled at:
> | [<ffffffff9b959085>] __queue_work+0x95/0x440
> | CPU: 30 PID: 2286 Comm: sig-xstate-bum Tainted: G      D           4.20.0-rc3+ #90
> | Call Trace:
> |  dump_stack+0x4f/0x6a
> |  ___might_sleep.cold.91+0xef/0x100
> |  __might_sleep+0x50/0x90
> |  wait_for_completion+0x32/0x130
> |  virt_efi_query_variable_info+0x14e/0x160
> |  efi_query_variable_store+0x51/0x1a0
> |  efivar_entry_set_safe+0xa3/0x1b0
> |  efi_pstore_write+0x110/0x140
> |  pstore_dump+0x114/0x320
> |  kmsg_dump+0xa4/0xd0
> |  oops_exit+0x7f/0x90
> |  oops_end+0x67/0xd0
> |  die+0x41/0x4a
> |  do_general_protection+0xc1/0x150
> |  general_protection+0x1e/0x30
> | RIP: 0010:__fpu__restore_sig+0x1c1/0x540
>
> just in case you wonder why both counter are zero and it still creates
> this backtrace.

Oh, hmm. That didn't show up in my testing. Any thoughts on a solution?

>
> >       if (record->reason == KMSG_DUMP_OOPS)
> >               efivar_run_worker();
> > diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
> > index 2387cb74f729..afdfd3687f94 100644
> > --- a/fs/pstore/platform.c
> > +++ b/fs/pstore/platform.c
> > @@ -400,23 +401,20 @@ static void pstore_dump(struct kmsg_dumper *dumper,
> >       unsigned long   total = 0;
> >       const char      *why;
> >       unsigned int    part = 1;
> > -     unsigned long   flags = 0;
> > -     int             is_locked;
> >       int             ret;
> >
> >       why = get_reason_str(reason);
> >
> > -     if (pstore_cannot_block_path(reason)) {
> > -             is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
> > -             if (!is_locked) {
> > -                     pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
> > -                                    , in_nmi() ? "NMI" : why);
> > +     if (down_trylock(&psinfo->buf_lock)) {
> > +             /* Failed to acquire lock: give up if we cannot wait. */
> > +             if (pstore_cannot_wait(reason)) {
> > +                     pr_err("dump skipped in %s path: may corrupt error record\n",
> > +                             in_nmi() ? "NMI" : why);
> >                       return;
> >               }
> > -     } else {
> > -             spin_lock_irqsave(&psinfo->buf_lock, flags);
> > -             is_locked = 1;
> > +             down_interruptible(&psinfo->buf_lock);
>
>  In function ‘pstore_dump’:
> fs/pstore/platform.c:393:3: warning: ignoring return value of ‘down_interruptible’, declared with attribute warn_unused_result [-Wunused-result]
>    down_interruptible(&psinfo->buf_lock);
>    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>

Thanks, yes. I've fixed this in the version in -next.

-Kees

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-12-04 15:40 ` Sebastian Andrzej Siewior
  2018-12-04 16:17   ` Kees Cook
@ 2018-12-04 17:23   ` Kees Cook
  2018-12-04 18:06     ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 12+ messages in thread
From: Kees Cook @ 2018-12-04 17:23 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Thomas Gleixner, Borislav Petkov, linux-efi, Ard Biesheuvel,
	Anton Vorontsov, Colin Cross, Tony Luck, LKML

On Tue, Dec 4, 2018 at 7:41 AM Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
>
> On 2018-11-30 14:47:36 [-0800], Kees Cook wrote:
> > diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
> > index cfe87b465819..0f7d97917197 100644
> > --- a/drivers/firmware/efi/efi-pstore.c
> > +++ b/drivers/firmware/efi/efi-pstore.c
> > @@ -259,8 +259,7 @@ static int efi_pstore_write(struct pstore_record *record)
> >               efi_name[i] = name[i];
> >
> >       ret = efivar_entry_set_safe(efi_name, vendor, PSTORE_EFI_ATTRIBUTES,
> > -                           !pstore_cannot_block_path(record->reason),
> > -                           record->size, record->psi->buf);
> > +                           preemptible(), record->size, record->psi->buf);
>
> Well. Better I think.
> might_sleep() / preempt_count_equals() checks for preemptible() + rcu_preempt_depth().
> kmsg_dump() starts with rcu_read_lock() which means with this patch applied I
> got:

Okay, so, if kmsg_dump() uses rcu_read_lock(), that means efi-pstore
can _never_ sleep, and it's nothing to do with pstore internals. :( I
guess we just hard-code it, then? And efi-pstore should probably only
attach to pstore if it has a nonblock implementation (and warn if one
isn't available).

-Kees

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-12-04 17:23   ` Kees Cook
@ 2018-12-04 18:06     ` Sebastian Andrzej Siewior
  2018-12-21 15:37       ` Ard Biesheuvel
  0 siblings, 1 reply; 12+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-12-04 18:06 UTC (permalink / raw)
  To: Kees Cook
  Cc: Thomas Gleixner, Borislav Petkov, linux-efi, Ard Biesheuvel,
	Anton Vorontsov, Colin Cross, Tony Luck, LKML

On 2018-12-04 09:23:13 [-0800], Kees Cook wrote:
> Okay, so, if kmsg_dump() uses rcu_read_lock(), that means efi-pstore
> can _never_ sleep, and it's nothing to do with pstore internals. :( I
> guess we just hard-code it, then? And efi-pstore should probably only
> attach to pstore if it has a nonblock implementation (and warn if one
> isn't available).

I was about to suggest that. I am not aware if anything else could use
efi_pstore_write() use that but otherwise you could hardcode it.

> -Kees
> 
Sebastian

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] pstore: Convert buf_lock to semaphore
  2018-12-04 18:06     ` Sebastian Andrzej Siewior
@ 2018-12-21 15:37       ` Ard Biesheuvel
  0 siblings, 0 replies; 12+ messages in thread
From: Ard Biesheuvel @ 2018-12-21 15:37 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Kees Cook, Thomas Gleixner, Borislav Petkov, linux-efi,
	Anton Vorontsov, Colin Cross, Tony Luck, LKML

On Tue, 4 Dec 2018 at 19:06, Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
>
> On 2018-12-04 09:23:13 [-0800], Kees Cook wrote:
> > Okay, so, if kmsg_dump() uses rcu_read_lock(), that means efi-pstore
> > can _never_ sleep, and it's nothing to do with pstore internals. :( I
> > guess we just hard-code it, then? And efi-pstore should probably only
> > attach to pstore if it has a nonblock implementation (and warn if one
> > isn't available).
>
> I was about to suggest that. I am not aware if anything else could use
> efi_pstore_write() use that but otherwise you could hardcode it.
>

efivar_entry_set_safe() will only use the default backend if no
non-blocking variant is provided, in which case it assumes that the
default one is non-blocking. Perhaps we should just assign both
function pointers to the same function in this case.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2018-12-21 15:37 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-30 22:47 [PATCH] pstore: Convert buf_lock to semaphore Kees Cook
2018-11-30 22:51 ` Arnd Bergmann
2018-12-01  2:42   ` Kees Cook
2018-12-01  8:42     ` Arnd Bergmann
2018-12-03 11:18       ` Sebastian Andrzej Siewior
2018-12-03 16:26         ` Arnd Bergmann
2018-12-04  0:28           ` Kees Cook
2018-12-04 15:40 ` Sebastian Andrzej Siewior
2018-12-04 16:17   ` Kees Cook
2018-12-04 17:23   ` Kees Cook
2018-12-04 18:06     ` Sebastian Andrzej Siewior
2018-12-21 15:37       ` Ard Biesheuvel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.