All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joerg Vehlow <lkml@jv-coder.de>
To: peterz@infradead.org
Cc: Steven Rostedt <rostedt@goodmis.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Huang Ying <ying.huang@intel.com>,
	linux-kernel@vger.kernel.org,
	Joerg Vehlow <joerg.vehlow@aox-tech.de>
Subject: Re: [BUG RT] dump-capture kernel not executed for panic in interrupt context
Date: Wed, 9 Sep 2020 07:46:07 +0200	[thread overview]
Message-ID: <51f3b288-260b-a800-6a47-51d93f892c3d@jv-coder.de> (raw)
In-Reply-To: <5600c9f8-2c9d-7776-161a-5f5c1be62c10@jv-coder.de>

Hi,

here is the new version of the patch based on Peters suggestion
It looks like it works fine. I added the BUG_ON to __crash_kexec, 
because it is a precondition, that panic_cpu is set correctly, otherwise 
the whole locking logic fails.

The mutex_trylock can still be used, because it is only in syscall 
context and no interrupt context.

Jörg

---
  kernel/kexec.c          |  8 ++--
  kernel/kexec_core.c     | 86 +++++++++++++++++++++++++++--------------
  kernel/kexec_file.c     |  4 +-
  kernel/kexec_internal.h |  6 ++-
  4 files changed, 69 insertions(+), 35 deletions(-)

diff --git a/kernel/kexec.c b/kernel/kexec.c
index f977786fe498..118a012aeac2 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -255,12 +255,12 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, 
unsigned long, nr_segments,
       *
       * KISS: always take the mutex.
       */
-    if (!mutex_trylock(&kexec_mutex))
+    if (!kexec_trylock())
          return -EBUSY;

      result = do_kexec_load(entry, nr_segments, segments, flags);

-    mutex_unlock(&kexec_mutex);
+    kexec_unlock();

      return result;
  }
@@ -309,12 +309,12 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, 
entry,
       *
       * KISS: always take the mutex.
       */
-    if (!mutex_trylock(&kexec_mutex))
+    if (!kexec_trylock())
          return -EBUSY;

      result = do_kexec_load(entry, nr_segments, ksegments, flags);

-    mutex_unlock(&kexec_mutex);
+    kexec_unlock();

      return result;
  }
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index c19c0dad1ebe..71682a33b1ba 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -45,7 +45,7 @@
  #include <crypto/sha.h>
  #include "kexec_internal.h"

-DEFINE_MUTEX(kexec_mutex);
+static DEFINE_MUTEX(kexec_mutex);

  /* Per cpu memory for storing cpu states in case of system crash. */
  note_buf_t __percpu *crash_notes;
@@ -70,6 +70,43 @@ struct resource crashk_low_res = {
      .desc  = IORES_DESC_CRASH_KERNEL
  };

+void kexec_lock(void)
+{
+    /*
+     * LOCK kexec_mutex        cmpxchg(&panic_cpu, INVALID, cpu)
+     *   MB                  MB
+     * panic_cpu == INVALID        kexec_mutex == LOCKED
+     *
+     * Ensures either we observe the cmpxchg, or crash_kernel() observes
+     * our lock acquisition.
+     */
+    mutex_lock(&kexec_mutex);
+    smp_mb();
+    atomic_cond_read_acquire(&panic_cpu, VAL == PANIC_CPU_INVALID);
+}
+
+int kexec_trylock(void) {
+    if (!mutex_trylock(&kexec_mutex)) {
+        return 0;
+    }
+    smp_mb();
+    if (atomic_read(&panic_cpu) != PANIC_CPU_INVALID) {
+         mutex_unlock(&kexec_mutex);
+         return 0;
+    }
+    return 1;
+}
+
+void kexec_unlock(void)
+{
+    mutex_unlock(&kexec_mutex);
+}
+
+int kexec_is_locked(void)
+{
+    return mutex_is_locked(&kexec_mutex);
+}
+
  int kexec_should_crash(struct task_struct *p)
  {
      /*
@@ -943,24 +980,15 @@ int kexec_load_disabled;
   */
  void __noclone __crash_kexec(struct pt_regs *regs)
  {
-    /* Take the kexec_mutex here to prevent sys_kexec_load
-     * running on one cpu from replacing the crash kernel
-     * we are using after a panic on a different cpu.
-     *
-     * If the crash kernel was not located in a fixed area
-     * of memory the xchg(&kexec_crash_image) would be
-     * sufficient.  But since I reuse the memory...
-     */
-    if (mutex_trylock(&kexec_mutex)) {
-        if (kexec_crash_image) {
-            struct pt_regs fixed_regs;
-
-            crash_setup_regs(&fixed_regs, regs);
-            crash_save_vmcoreinfo();
-            machine_crash_shutdown(&fixed_regs);
-            machine_kexec(kexec_crash_image);
-        }
-        mutex_unlock(&kexec_mutex);
+    BUG_ON(atomic_read(&panic_cpu) != raw_smp_processor_id());
+
+    if (!kexec_is_locked() && kexec_crash_image) {
+        struct pt_regs fixed_regs;
+
+        crash_setup_regs(&fixed_regs, regs);
+        crash_save_vmcoreinfo();
+        machine_crash_shutdown(&fixed_regs);
+        machine_kexec(kexec_crash_image);
      }
  }
  STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -977,9 +1005,11 @@ void crash_kexec(struct pt_regs *regs)
      this_cpu = raw_smp_processor_id();
      old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
      if (old_cpu == PANIC_CPU_INVALID) {
-        /* This is the 1st CPU which comes here, so go ahead. */
-        printk_safe_flush_on_panic();
-        __crash_kexec(regs);
+        if (!kexec_is_locked()) {
+            /* This is the 1st CPU which comes here, so go ahead. */
+            printk_safe_flush_on_panic();
+            __crash_kexec(regs);
+        }

          /*
           * Reset panic_cpu to allow another panic()/crash_kexec()
@@ -993,10 +1023,10 @@ size_t crash_get_memory_size(void)
  {
      size_t size = 0;

-    mutex_lock(&kexec_mutex);
+    kexec_lock();
      if (crashk_res.end != crashk_res.start)
          size = resource_size(&crashk_res);
-    mutex_unlock(&kexec_mutex);
+    kexec_unlock();
      return size;
  }

@@ -1016,7 +1046,7 @@ int crash_shrink_memory(unsigned long new_size)
      unsigned long old_size;
      struct resource *ram_res;

-    mutex_lock(&kexec_mutex);
+    kexec_lock();

      if (kexec_crash_image) {
          ret = -ENOENT;
@@ -1054,7 +1084,7 @@ int crash_shrink_memory(unsigned long new_size)
      insert_resource(&iomem_resource, ram_res);

  unlock:
-    mutex_unlock(&kexec_mutex);
+    kexec_unlock();
      return ret;
  }

@@ -1126,7 +1156,7 @@ int kernel_kexec(void)
  {
      int error = 0;

-    if (!mutex_trylock(&kexec_mutex))
+    if (!kexec_trylock())
          return -EBUSY;
      if (!kexec_image) {
          error = -EINVAL;
@@ -1203,7 +1233,7 @@ int kernel_kexec(void)
  #endif

   Unlock:
-    mutex_unlock(&kexec_mutex);
+    kexec_unlock();
      return error;
  }

diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index ca40bef75a61..d40b0aedc187 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -362,7 +362,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, 
int, initrd_fd,

      image = NULL;

-    if (!mutex_trylock(&kexec_mutex))
+    if (!kexec_trylock())
          return -EBUSY;

      dest_image = &kexec_image;
@@ -434,7 +434,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, 
int, initrd_fd,
      if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
          arch_kexec_protect_crashkres();

-    mutex_unlock(&kexec_mutex);
+    kexec_unlock();
      kimage_free(image);
      return ret;
  }
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 39d30ccf8d87..2c1683cb1082 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,7 +15,11 @@ int kimage_is_destination_range(struct kimage *image,

  int machine_kexec_post_load(struct kimage *image);

-extern struct mutex kexec_mutex;
+void kexec_lock(void);
+int kexec_trylock(void);
+void kexec_unlock(void);
+int kexec_is_locked(void);
+

  #ifdef CONFIG_KEXEC_FILE
  #include <linux/purgatory.h>
-- 
2.25.1



  reply	other threads:[~2020-09-09  5:46 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-28 11:41 [BUG RT] dump-capture kernel not executed for panic in interrupt context Joerg Vehlow
2020-05-28 12:46 ` Steven Rostedt
2020-07-22  4:30   ` Joerg Vehlow
2020-07-22 20:51     ` Steven Rostedt
2020-07-27 23:36     ` Andrew Morton
2020-08-21 10:25       ` Joerg Vehlow
2020-08-21 15:08         ` Steven Rostedt
2020-08-21 20:47           ` Andrew Morton
2020-08-21 21:03             ` Steven Rostedt
2020-08-22 12:32               ` peterz
2020-08-22 23:49                 ` Steven Rostedt
2020-09-07 11:41                   ` peterz
2020-09-07 12:49                     ` Valentin Schneider
2020-09-14 19:00                     ` Steven Rostedt
2020-09-07 10:51               ` Joerg Vehlow
2020-09-07 11:46                 ` peterz
2020-09-07 12:03                   ` Joerg Vehlow
2020-09-07 16:23                     ` peterz
2020-09-08  5:48                       ` Joerg Vehlow
2020-09-09  5:46                         ` Joerg Vehlow [this message]
2020-09-11 22:48                           ` Eric W. Biederman
2020-09-14  6:03                             ` Joerg Vehlow
2020-09-14 16:46                               ` Eric W. Biederman
2020-09-14 16:46                                 ` Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51f3b288-260b-a800-6a47-51d93f892c3d@jv-coder.de \
    --to=lkml@jv-coder.de \
    --cc=akpm@linux-foundation.org \
    --cc=bigeasy@linutronix.de \
    --cc=joerg.vehlow@aox-tech.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.