linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot
@ 2023-09-14  1:54 Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 2/7] ceph: drop messages from MDS when unmounting Sasha Levin
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Sasha Levin @ 2023-09-14  1:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sean Christopherson, Andrew Cooper, Sasha Levin, tglx, mingo, bp,
	dave.hansen, x86, pbonzini, akpm, bhe, eric.devolder, hbathini,
	sourabhjain, bhelgaas, kai.huang, peterz, jpoimboe, tiwai, kvm

From: Sean Christopherson <seanjc@google.com>

[ Upstream commit b23c83ad2c638420ec0608a9de354507c41bec29 ]

VMCLEAR active VMCSes before any emergency reboot, not just if the kernel
may kexec into a new kernel after a crash.  Per Intel's SDM, the VMX
architecture doesn't require the CPU to flush the VMCS cache on INIT.  If
an emergency reboot doesn't RESET CPUs, cached VMCSes could theoretically
be kept and only be written back to memory after the new kernel is booted,
i.e. could effectively corrupt memory after reboot.

Opportunistically remove the setting of the global pointer to NULL to make
checkpatch happy.

Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
Link: https://lore.kernel.org/r/20230721201859.2307736-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/x86/include/asm/kexec.h  |  2 --
 arch/x86/include/asm/reboot.h |  2 ++
 arch/x86/kernel/crash.c       | 31 -------------------------------
 arch/x86/kernel/reboot.c      | 22 ++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c        | 10 +++-------
 5 files changed, 27 insertions(+), 40 deletions(-)

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5b77bbc28f969..819046974b997 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
 #endif
 #endif
 
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
 extern void kdump_nmi_shootdown_cpus(void);
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 9177b4354c3f5..dc201724a6433 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
 #define MRR_BIOS	0
 #define MRR_APM		1
 
+typedef void crash_vmclear_fn(void);
+extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
 void cpu_emergency_disable_virtualization(void);
 
 typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index cdd92ab43cda4..54cd959cb3160 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -48,38 +48,12 @@ struct crash_memmap_data {
 	unsigned int type;
 };
 
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
-crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
-EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
-{
-	crash_vmclear_fn *do_vmclear_operation = NULL;
-
-	rcu_read_lock();
-	do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
-	if (do_vmclear_operation)
-		do_vmclear_operation();
-	rcu_read_unlock();
-}
-
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
 static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 {
 	crash_save_cpu(regs, cpu);
 
-	/*
-	 * VMCLEAR VMCSs loaded on all cpus if needed.
-	 */
-	cpu_crash_vmclear_loaded_vmcss();
-
 	/*
 	 * Disable Intel PT to stop its logging
 	 */
@@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 
 	crash_smp_send_stop();
 
-	/*
-	 * VMCLEAR VMCSs loaded on this cpu if needed.
-	 */
-	cpu_crash_vmclear_loaded_vmcss();
-
 	cpu_emergency_disable_virtualization();
 
 	/*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 3adbe97015c13..3fa4c6717a1db 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -787,6 +787,26 @@ void machine_crash_shutdown(struct pt_regs *regs)
 }
 #endif
 
+/*
+ * This is used to VMCLEAR all VMCSs loaded on the
+ * processor. And when loading kvm_intel module, the
+ * callback function pointer will be assigned.
+ *
+ * protected by rcu.
+ */
+crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
+EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+
+static inline void cpu_crash_vmclear_loaded_vmcss(void)
+{
+	crash_vmclear_fn *do_vmclear_operation = NULL;
+
+	rcu_read_lock();
+	do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
+	if (do_vmclear_operation)
+		do_vmclear_operation();
+	rcu_read_unlock();
+}
 
 /* This is the CPU performing the emergency shutdown work. */
 int crashing_cpu = -1;
@@ -798,6 +818,8 @@ int crashing_cpu = -1;
  */
 void cpu_emergency_disable_virtualization(void)
 {
+	cpu_crash_vmclear_loaded_vmcss();
+
 	cpu_emergency_vmxoff();
 	cpu_emergency_svm_disable();
 }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index df461f387e20d..f60fb79fea881 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -41,7 +41,7 @@
 #include <asm/idtentry.h>
 #include <asm/io.h>
 #include <asm/irq_remapping.h>
-#include <asm/kexec.h>
+#include <asm/reboot.h>
 #include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/mshyperv.h>
@@ -754,7 +754,6 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
 	return ret;
 }
 
-#ifdef CONFIG_KEXEC_CORE
 static void crash_vmclear_local_loaded_vmcss(void)
 {
 	int cpu = raw_smp_processor_id();
@@ -764,7 +763,6 @@ static void crash_vmclear_local_loaded_vmcss(void)
 			    loaded_vmcss_on_cpu_link)
 		vmcs_clear(v->vmcs);
 }
-#endif /* CONFIG_KEXEC_CORE */
 
 static void __loaded_vmcs_clear(void *arg)
 {
@@ -8622,10 +8620,9 @@ static void __vmx_exit(void)
 {
 	allow_smaller_maxphyaddr = false;
 
-#ifdef CONFIG_KEXEC_CORE
 	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
 	synchronize_rcu();
-#endif
+
 	vmx_cleanup_l1d_flush();
 }
 
@@ -8674,10 +8671,9 @@ static int __init vmx_init(void)
 		pi_init_cpu(cpu);
 	}
 
-#ifdef CONFIG_KEXEC_CORE
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
 			   crash_vmclear_local_loaded_vmcss);
-#endif
+
 	vmx_check_vmcs12_offsets();
 
 	/*
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH AUTOSEL 6.5 2/7] ceph: drop messages from MDS when unmounting
  2023-09-14  1:54 [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Sasha Levin
@ 2023-09-14  1:54 ` Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 3/7] dma-debug: don't call __dma_entry_alloc_check_leak() under free_entries_lock Sasha Levin
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Sasha Levin @ 2023-09-14  1:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Xiubo Li, Luís Henriques, Milind Changire, Ilya Dryomov,
	Sasha Levin, ceph-devel

From: Xiubo Li <xiubli@redhat.com>

[ Upstream commit e3dfcab2080dc1f9a4b09cc1327361bc2845bfcd ]

When unmounting all the dirty buffers will be flushed and after
the last osd request is finished the last reference of the i_count
will be released. Then it will flush the dirty cap/snap to MDSs,
and the unmounting won't wait the possible acks, which will ihold
the inodes when updating the metadata locally but makes no sense
any more, of this. This will make the evict_inodes() to skip these
inodes.

If encrypt is enabled the kernel generate a warning when removing
the encrypt keys when the skipped inodes still hold the keyring:

WARNING: CPU: 4 PID: 168846 at fs/crypto/keyring.c:242 fscrypt_destroy_keyring+0x7e/0xd0
CPU: 4 PID: 168846 Comm: umount Tainted: G S  6.1.0-rc5-ceph-g72ead199864c #1
Hardware name: Supermicro SYS-5018R-WR/X10SRW-F, BIOS 2.0 12/17/2015
RIP: 0010:fscrypt_destroy_keyring+0x7e/0xd0
RSP: 0018:ffffc9000b277e28 EFLAGS: 00010202
RAX: 0000000000000002 RBX: ffff88810d52ac00 RCX: ffff88810b56aa00
RDX: 0000000080000000 RSI: ffffffff822f3a09 RDI: ffff888108f59000
RBP: ffff8881d394fb88 R08: 0000000000000028 R09: 0000000000000000
R10: 0000000000000001 R11: 11ff4fe6834fcd91 R12: ffff8881d394fc40
R13: ffff888108f59000 R14: ffff8881d394f800 R15: 0000000000000000
FS:  00007fd83f6f1080(0000) GS:ffff88885fd00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f918d417000 CR3: 000000017f89a005 CR4: 00000000003706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
generic_shutdown_super+0x47/0x120
kill_anon_super+0x14/0x30
ceph_kill_sb+0x36/0x90 [ceph]
deactivate_locked_super+0x29/0x60
cleanup_mnt+0xb8/0x140
task_work_run+0x67/0xb0
exit_to_user_mode_prepare+0x23d/0x240
syscall_exit_to_user_mode+0x25/0x60
do_syscall_64+0x40/0x80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7fd83dc39e9b

Later the kernel will crash when iput() the inodes and dereferencing
the "sb->s_master_keys", which has been released by the
generic_shutdown_super().

Link: https://tracker.ceph.com/issues/59162
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-and-tested-by: Luís Henriques <lhenriques@suse.de>
Reviewed-by: Milind Changire <mchangir@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/ceph/caps.c       |  6 +++-
 fs/ceph/mds_client.c | 12 +++++--
 fs/ceph/mds_client.h | 11 +++++--
 fs/ceph/quota.c      | 14 ++++-----
 fs/ceph/snap.c       | 10 +++---
 fs/ceph/super.c      | 75 +++++++++++++++++++++++++++++++++++++++++---
 fs/ceph/super.h      |  3 ++
 7 files changed, 109 insertions(+), 22 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index e2bb0d0072da5..c268bd07e7ddd 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4105,6 +4105,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
 	dout("handle_caps from mds%d\n", session->s_mds);
 
+	if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+		return;
+
 	/* decode */
 	end = msg->front.iov_base + msg->front.iov_len;
 	if (msg->front.iov_len < sizeof(*h))
@@ -4201,7 +4204,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	     vino.snap, inode);
 
 	mutex_lock(&session->s_mutex);
-	inc_session_sequence(session);
 	dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
 	     (unsigned)seq);
 
@@ -4309,6 +4311,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 done_unlocked:
 	iput(inode);
 out:
+	ceph_dec_mds_stopping_blocker(mdsc);
+
 	ceph_put_string(extra_info.pool_ns);
 
 	/* Defer closing the sessions after s_mutex lock being released */
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5fb367b1d4b06..4b0ba067e9c93 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4550,6 +4550,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 
 	dout("handle_lease from mds%d\n", mds);
 
+	if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+		return;
+
 	/* decode */
 	if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
 		goto bad;
@@ -4568,8 +4571,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 	     dname.len, dname.name);
 
 	mutex_lock(&session->s_mutex);
-	inc_session_sequence(session);
-
 	if (!inode) {
 		dout("handle_lease no inode %llx\n", vino.ino);
 		goto release;
@@ -4631,9 +4632,13 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 out:
 	mutex_unlock(&session->s_mutex);
 	iput(inode);
+
+	ceph_dec_mds_stopping_blocker(mdsc);
 	return;
 
 bad:
+	ceph_dec_mds_stopping_blocker(mdsc);
+
 	pr_err("corrupt lease message\n");
 	ceph_msg_dump(msg);
 }
@@ -4829,6 +4834,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	}
 
 	init_completion(&mdsc->safe_umount_waiters);
+	spin_lock_init(&mdsc->stopping_lock);
+	atomic_set(&mdsc->stopping_blockers, 0);
+	init_completion(&mdsc->stopping_waiter);
 	init_waitqueue_head(&mdsc->session_close_wq);
 	INIT_LIST_HEAD(&mdsc->waiting_for_map);
 	mdsc->quotarealms_inodes = RB_ROOT;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 86d2965e68a1f..cff7392809032 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -381,8 +381,9 @@ struct cap_wait {
 };
 
 enum {
-       CEPH_MDSC_STOPPING_BEGIN = 1,
-       CEPH_MDSC_STOPPING_FLUSHED = 2,
+	CEPH_MDSC_STOPPING_BEGIN = 1,
+	CEPH_MDSC_STOPPING_FLUSHING = 2,
+	CEPH_MDSC_STOPPING_FLUSHED = 3,
 };
 
 /*
@@ -401,7 +402,11 @@ struct ceph_mds_client {
 	struct ceph_mds_session **sessions;    /* NULL for mds if no session */
 	atomic_t		num_sessions;
 	int                     max_sessions;  /* len of sessions array */
-	int                     stopping;      /* true if shutting down */
+
+	spinlock_t              stopping_lock;  /* protect snap_empty */
+	int                     stopping;      /* the stage of shutting down */
+	atomic_t                stopping_blockers;
+	struct completion	stopping_waiter;
 
 	atomic64_t		quotarealms_count; /* # realms with quota */
 	/*
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 64592adfe48fb..f7fcf7f08ec64 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -47,25 +47,23 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
 	struct inode *inode;
 	struct ceph_inode_info *ci;
 
+	if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+		return;
+
 	if (msg->front.iov_len < sizeof(*h)) {
 		pr_err("%s corrupt message mds%d len %d\n", __func__,
 		       session->s_mds, (int)msg->front.iov_len);
 		ceph_msg_dump(msg);
-		return;
+		goto out;
 	}
 
-	/* increment msg sequence number */
-	mutex_lock(&session->s_mutex);
-	inc_session_sequence(session);
-	mutex_unlock(&session->s_mutex);
-
 	/* lookup inode */
 	vino.ino = le64_to_cpu(h->ino);
 	vino.snap = CEPH_NOSNAP;
 	inode = ceph_find_inode(sb, vino);
 	if (!inode) {
 		pr_warn("Failed to find inode %llu\n", vino.ino);
-		return;
+		goto out;
 	}
 	ci = ceph_inode(inode);
 
@@ -78,6 +76,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
 	spin_unlock(&ci->i_ceph_lock);
 
 	iput(inode);
+out:
+	ceph_dec_mds_stopping_blocker(mdsc);
 }
 
 static struct ceph_quotarealm_inode *
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 343d738448dcd..7ddc6bad77ef3 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1015,6 +1015,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 	int locked_rwsem = 0;
 	bool close_sessions = false;
 
+	if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+		return;
+
 	/* decode */
 	if (msg->front.iov_len < sizeof(*h))
 		goto bad;
@@ -1030,10 +1033,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 	dout("%s from mds%d op %s split %llx tracelen %d\n", __func__,
 	     mds, ceph_snap_op_name(op), split, trace_len);
 
-	mutex_lock(&session->s_mutex);
-	inc_session_sequence(session);
-	mutex_unlock(&session->s_mutex);
-
 	down_write(&mdsc->snap_rwsem);
 	locked_rwsem = 1;
 
@@ -1151,6 +1150,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 	up_write(&mdsc->snap_rwsem);
 
 	flush_snaps(mdsc);
+	ceph_dec_mds_stopping_blocker(mdsc);
 	return;
 
 bad:
@@ -1160,6 +1160,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 	if (locked_rwsem)
 		up_write(&mdsc->snap_rwsem);
 
+	ceph_dec_mds_stopping_blocker(mdsc);
+
 	if (close_sessions)
 		ceph_mdsc_close_sessions(mdsc);
 	return;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a5f52013314d6..281b493fdac8e 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1365,25 +1365,90 @@ static int ceph_init_fs_context(struct fs_context *fc)
 	return -ENOMEM;
 }
 
+/*
+ * Return true if it successfully increases the blocker counter,
+ * or false if the mdsc is in stopping and flushed state.
+ */
+static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+	spin_lock(&mdsc->stopping_lock);
+	if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
+		spin_unlock(&mdsc->stopping_lock);
+		return false;
+	}
+	atomic_inc(&mdsc->stopping_blockers);
+	spin_unlock(&mdsc->stopping_lock);
+	return true;
+}
+
+static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+	spin_lock(&mdsc->stopping_lock);
+	if (!atomic_dec_return(&mdsc->stopping_blockers) &&
+	    mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
+		complete_all(&mdsc->stopping_waiter);
+	spin_unlock(&mdsc->stopping_lock);
+}
+
+/* For metadata IO requests */
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+				   struct ceph_mds_session *session)
+{
+	mutex_lock(&session->s_mutex);
+	inc_session_sequence(session);
+	mutex_unlock(&session->s_mutex);
+
+	return __inc_stopping_blocker(mdsc);
+}
+
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+	__dec_stopping_blocker(mdsc);
+}
+
 static void ceph_kill_sb(struct super_block *s)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	bool wait;
 
 	dout("kill_sb %p\n", s);
 
-	ceph_mdsc_pre_umount(fsc->mdsc);
+	ceph_mdsc_pre_umount(mdsc);
 	flush_fs_workqueues(fsc);
 
 	/*
 	 * Though the kill_anon_super() will finally trigger the
-	 * sync_filesystem() anyway, we still need to do it here
-	 * and then bump the stage of shutdown to stop the work
-	 * queue as earlier as possible.
+	 * sync_filesystem() anyway, we still need to do it here and
+	 * then bump the stage of shutdown. This will allow us to
+	 * drop any further message, which will increase the inodes'
+	 * i_count reference counters but makes no sense any more,
+	 * from MDSs.
+	 *
+	 * Without this when evicting the inodes it may fail in the
+	 * kill_anon_super(), which will trigger a warning when
+	 * destroying the fscrypt keyring and then possibly trigger
+	 * a further crash in ceph module when the iput() tries to
+	 * evict the inodes later.
 	 */
 	sync_filesystem(s);
 
-	fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+	spin_lock(&mdsc->stopping_lock);
+	mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
+	wait = !!atomic_read(&mdsc->stopping_blockers);
+	spin_unlock(&mdsc->stopping_lock);
+
+	if (wait && atomic_read(&mdsc->stopping_blockers)) {
+		long timeleft = wait_for_completion_killable_timeout(
+					&mdsc->stopping_waiter,
+					fsc->client->options->mount_timeout);
+		if (!timeleft) /* timed out */
+			pr_warn("umount timed out, %ld\n", timeleft);
+		else if (timeleft < 0) /* killed */
+			pr_warn("umount was killed, %ld\n", timeleft);
+	}
 
+	mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
 	kill_anon_super(s);
 
 	fsc->client->extra_mon_dispatch = NULL;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3bfddf34d488b..e6c1edf9e12b0 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1375,4 +1375,7 @@ extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
 				     struct kstatfs *buf);
 extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
 
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+			       struct ceph_mds_session *session);
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc);
 #endif /* _FS_CEPH_SUPER_H */
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH AUTOSEL 6.5 3/7] dma-debug: don't call __dma_entry_alloc_check_leak() under free_entries_lock
  2023-09-14  1:54 [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 2/7] ceph: drop messages from MDS when unmounting Sasha Levin
@ 2023-09-14  1:54 ` Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 4/7] bpf: Annotate bpf_long_memcpy with data_race Sasha Levin
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Sasha Levin @ 2023-09-14  1:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sergey Senozhatsky, Rob Clark, Robin Murphy, Christoph Hellwig,
	Sasha Levin, m.szyprowski, iommu

From: Sergey Senozhatsky <senozhatsky@chromium.org>

[ Upstream commit fb5a4315591dae307a65fc246ca80b5159d296e1 ]

__dma_entry_alloc_check_leak() calls into printk -> serial console
output (qcom geni) and grabs port->lock under free_entries_lock
spin lock, which is a reverse locking dependency chain as qcom_geni
IRQ handler can call into dma-debug code and grab free_entries_lock
under port->lock.

Move __dma_entry_alloc_check_leak() call out of free_entries_lock
scope so that we don't acquire serial console's port->lock under it.

Trimmed-down lockdep splat:

 The existing dependency chain (in reverse order) is:

               -> #2 (free_entries_lock){-.-.}-{2:2}:
        _raw_spin_lock_irqsave+0x60/0x80
        dma_entry_alloc+0x38/0x110
        debug_dma_map_page+0x60/0xf8
        dma_map_page_attrs+0x1e0/0x230
        dma_map_single_attrs.constprop.0+0x6c/0xc8
        geni_se_rx_dma_prep+0x40/0xcc
        qcom_geni_serial_isr+0x310/0x510
        __handle_irq_event_percpu+0x110/0x244
        handle_irq_event_percpu+0x20/0x54
        handle_irq_event+0x50/0x88
        handle_fasteoi_irq+0xa4/0xcc
        handle_irq_desc+0x28/0x40
        generic_handle_domain_irq+0x24/0x30
        gic_handle_irq+0xc4/0x148
        do_interrupt_handler+0xa4/0xb0
        el1_interrupt+0x34/0x64
        el1h_64_irq_handler+0x18/0x24
        el1h_64_irq+0x64/0x68
        arch_local_irq_enable+0x4/0x8
        ____do_softirq+0x18/0x24
        ...

               -> #1 (&port_lock_key){-.-.}-{2:2}:
        _raw_spin_lock_irqsave+0x60/0x80
        qcom_geni_serial_console_write+0x184/0x1dc
        console_flush_all+0x344/0x454
        console_unlock+0x94/0xf0
        vprintk_emit+0x238/0x24c
        vprintk_default+0x3c/0x48
        vprintk+0xb4/0xbc
        _printk+0x68/0x90
        register_console+0x230/0x38c
        uart_add_one_port+0x338/0x494
        qcom_geni_serial_probe+0x390/0x424
        platform_probe+0x70/0xc0
        really_probe+0x148/0x280
        __driver_probe_device+0xfc/0x114
        driver_probe_device+0x44/0x100
        __device_attach_driver+0x64/0xdc
        bus_for_each_drv+0xb0/0xd8
        __device_attach+0xe4/0x140
        device_initial_probe+0x1c/0x28
        bus_probe_device+0x44/0xb0
        device_add+0x538/0x668
        of_device_add+0x44/0x50
        of_platform_device_create_pdata+0x94/0xc8
        of_platform_bus_create+0x270/0x304
        of_platform_populate+0xac/0xc4
        devm_of_platform_populate+0x60/0xac
        geni_se_probe+0x154/0x160
        platform_probe+0x70/0xc0
        ...

               -> #0 (console_owner){-...}-{0:0}:
        __lock_acquire+0xdf8/0x109c
        lock_acquire+0x234/0x284
        console_flush_all+0x330/0x454
        console_unlock+0x94/0xf0
        vprintk_emit+0x238/0x24c
        vprintk_default+0x3c/0x48
        vprintk+0xb4/0xbc
        _printk+0x68/0x90
        dma_entry_alloc+0xb4/0x110
        debug_dma_map_sg+0xdc/0x2f8
        __dma_map_sg_attrs+0xac/0xe4
        dma_map_sgtable+0x30/0x4c
        get_pages+0x1d4/0x1e4 [msm]
        msm_gem_pin_pages_locked+0x38/0xac [msm]
        msm_gem_pin_vma_locked+0x58/0x88 [msm]
        msm_ioctl_gem_submit+0xde4/0x13ac [msm]
        drm_ioctl_kernel+0xe0/0x15c
        drm_ioctl+0x2e8/0x3f4
        vfs_ioctl+0x30/0x50
        ...

 Chain exists of:
   console_owner --> &port_lock_key --> free_entries_lock

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(free_entries_lock);
                                lock(&port_lock_key);
                                lock(free_entries_lock);
   lock(console_owner);

                *** DEADLOCK ***

 Call trace:
  dump_backtrace+0xb4/0xf0
  show_stack+0x20/0x30
  dump_stack_lvl+0x60/0x84
  dump_stack+0x18/0x24
  print_circular_bug+0x1cc/0x234
  check_noncircular+0x78/0xac
  __lock_acquire+0xdf8/0x109c
  lock_acquire+0x234/0x284
  console_flush_all+0x330/0x454
  console_unlock+0x94/0xf0
  vprintk_emit+0x238/0x24c
  vprintk_default+0x3c/0x48
  vprintk+0xb4/0xbc
  _printk+0x68/0x90
  dma_entry_alloc+0xb4/0x110
  debug_dma_map_sg+0xdc/0x2f8
  __dma_map_sg_attrs+0xac/0xe4
  dma_map_sgtable+0x30/0x4c
  get_pages+0x1d4/0x1e4 [msm]
  msm_gem_pin_pages_locked+0x38/0xac [msm]
  msm_gem_pin_vma_locked+0x58/0x88 [msm]
  msm_ioctl_gem_submit+0xde4/0x13ac [msm]
  drm_ioctl_kernel+0xe0/0x15c
  drm_ioctl+0x2e8/0x3f4
  vfs_ioctl+0x30/0x50
  ...

Reported-by: Rob Clark <robdclark@chromium.org>
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 kernel/dma/debug.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index f190651bcaddc..06366acd27b08 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -637,15 +637,19 @@ static struct dma_debug_entry *__dma_entry_alloc(void)
 	return entry;
 }
 
-static void __dma_entry_alloc_check_leak(void)
+/*
+ * This should be called outside of free_entries_lock scope to avoid potential
+ * deadlocks with serial consoles that use DMA.
+ */
+static void __dma_entry_alloc_check_leak(u32 nr_entries)
 {
-	u32 tmp = nr_total_entries % nr_prealloc_entries;
+	u32 tmp = nr_entries % nr_prealloc_entries;
 
 	/* Shout each time we tick over some multiple of the initial pool */
 	if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) {
 		pr_info("dma_debug_entry pool grown to %u (%u00%%)\n",
-			nr_total_entries,
-			(nr_total_entries / nr_prealloc_entries));
+			nr_entries,
+			(nr_entries / nr_prealloc_entries));
 	}
 }
 
@@ -656,8 +660,10 @@ static void __dma_entry_alloc_check_leak(void)
  */
 static struct dma_debug_entry *dma_entry_alloc(void)
 {
+	bool alloc_check_leak = false;
 	struct dma_debug_entry *entry;
 	unsigned long flags;
+	u32 nr_entries;
 
 	spin_lock_irqsave(&free_entries_lock, flags);
 	if (num_free_entries == 0) {
@@ -667,13 +673,17 @@ static struct dma_debug_entry *dma_entry_alloc(void)
 			pr_err("debugging out of memory - disabling\n");
 			return NULL;
 		}
-		__dma_entry_alloc_check_leak();
+		alloc_check_leak = true;
+		nr_entries = nr_total_entries;
 	}
 
 	entry = __dma_entry_alloc();
 
 	spin_unlock_irqrestore(&free_entries_lock, flags);
 
+	if (alloc_check_leak)
+		__dma_entry_alloc_check_leak(nr_entries);
+
 #ifdef CONFIG_STACKTRACE
 	entry->stack_len = stack_trace_save(entry->stack_entries,
 					    ARRAY_SIZE(entry->stack_entries),
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH AUTOSEL 6.5 4/7] bpf: Annotate bpf_long_memcpy with data_race
  2023-09-14  1:54 [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 2/7] ceph: drop messages from MDS when unmounting Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 3/7] dma-debug: don't call __dma_entry_alloc_check_leak() under free_entries_lock Sasha Levin
@ 2023-09-14  1:54 ` Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 5/7] ASoC: amd: yc: Add DMI entries to support Victus by HP Gaming Laptop 15-fb0xxx (8A3E) Sasha Levin
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Sasha Levin @ 2023-09-14  1:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Daniel Borkmann, syzbot+97522333291430dd277f, Marco Elver,
	Sasha Levin, ast, andrii, bpf

From: Daniel Borkmann <daniel@iogearbox.net>

[ Upstream commit 6a86b5b5cd76d2734304a0173f5f01aa8aa2025e ]

syzbot reported a data race splat between two processes trying to
update the same BPF map value via syscall on different CPUs:

  BUG: KCSAN: data-race in bpf_percpu_array_update / bpf_percpu_array_update

  write to 0xffffe8fffe7425d8 of 8 bytes by task 8257 on cpu 1:
   bpf_long_memcpy include/linux/bpf.h:428 [inline]
   bpf_obj_memcpy include/linux/bpf.h:441 [inline]
   copy_map_value_long include/linux/bpf.h:464 [inline]
   bpf_percpu_array_update+0x3bb/0x500 kernel/bpf/arraymap.c:380
   bpf_map_update_value+0x190/0x370 kernel/bpf/syscall.c:175
   generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1749
   bpf_map_do_batch+0x2df/0x3d0 kernel/bpf/syscall.c:4648
   __sys_bpf+0x28a/0x780
   __do_sys_bpf kernel/bpf/syscall.c:5241 [inline]
   __se_sys_bpf kernel/bpf/syscall.c:5239 [inline]
   __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5239
   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
   do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
   entry_SYSCALL_64_after_hwframe+0x63/0xcd

  write to 0xffffe8fffe7425d8 of 8 bytes by task 8268 on cpu 0:
   bpf_long_memcpy include/linux/bpf.h:428 [inline]
   bpf_obj_memcpy include/linux/bpf.h:441 [inline]
   copy_map_value_long include/linux/bpf.h:464 [inline]
   bpf_percpu_array_update+0x3bb/0x500 kernel/bpf/arraymap.c:380
   bpf_map_update_value+0x190/0x370 kernel/bpf/syscall.c:175
   generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1749
   bpf_map_do_batch+0x2df/0x3d0 kernel/bpf/syscall.c:4648
   __sys_bpf+0x28a/0x780
   __do_sys_bpf kernel/bpf/syscall.c:5241 [inline]
   __se_sys_bpf kernel/bpf/syscall.c:5239 [inline]
   __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5239
   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
   do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
   entry_SYSCALL_64_after_hwframe+0x63/0xcd

  value changed: 0x0000000000000000 -> 0xfffffff000002788

The bpf_long_memcpy is used with 8-byte aligned pointers, power-of-8 size
and forced to use long read/writes to try to atomically copy long counters.
It is best-effort only and no barriers are here since it _will_ race with
concurrent updates from BPF programs. The bpf_long_memcpy() is called from
bpf(2) syscall. Marco suggested that the best way to make this known to
KCSAN would be to use data_race() annotation.

Reported-by: syzbot+97522333291430dd277f@syzkaller.appspotmail.com
Suggested-by: Marco Elver <elver@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/bpf/000000000000d87a7f06040c970c@google.com
Link: https://lore.kernel.org/bpf/57628f7a15e20d502247c3b55fceb1cb2b31f266.1693342186.git.daniel@iogearbox.net
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f58895830adae..eb1bb76e87f8b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -425,7 +425,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 
 	size /= sizeof(long);
 	while (size--)
-		*ldst++ = *lsrc++;
+		data_race(*ldst++ = *lsrc++);
 }
 
 /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH AUTOSEL 6.5 5/7] ASoC: amd: yc: Add DMI entries to support Victus by HP Gaming Laptop 15-fb0xxx (8A3E)
  2023-09-14  1:54 [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Sasha Levin
                   ` (2 preceding siblings ...)
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 4/7] bpf: Annotate bpf_long_memcpy with data_race Sasha Levin
@ 2023-09-14  1:54 ` Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 6/7] spi: sun6i: reduce DMA RX transfer width to single byte Sasha Levin
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Sasha Levin @ 2023-09-14  1:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Shubh, Mark Brown, Sasha Levin, lgirdwood, perex, tiwai,
	mario.limonciello, xazrael, Syed.SabaKareem, alsa-devel

From: Shubh <shubhisroking@gmail.com>

[ Upstream commit d1cf5d30b43f1a331032ebf3e11d9e366ab0f885 ]

This model requires an additional detection quirk to
enable the internal microphone.

Signed-off-by: Shubh <shubhisroking@gmail.com>
Link: https://lore.kernel.org/r/20230902150807.133523-1-shubhisroking@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 sound/soc/amd/yc/acp6x-mach.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index b304b3562c82b..f7ee792bd1be9 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -325,6 +325,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "8A22"),
 		}
 	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+			DMI_MATCH(DMI_BOARD_NAME, "8A3E"),
+		}
+	},
 	{
 		.driver_data = &acp6x_card,
 		.matches = {
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH AUTOSEL 6.5 6/7] spi: sun6i: reduce DMA RX transfer width to single byte
  2023-09-14  1:54 [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Sasha Levin
                   ` (3 preceding siblings ...)
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 5/7] ASoC: amd: yc: Add DMI entries to support Victus by HP Gaming Laptop 15-fb0xxx (8A3E) Sasha Levin
@ 2023-09-14  1:54 ` Sasha Levin
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 7/7] spi: sun6i: fix race between DMA RX transfer completion and RX FIFO drain Sasha Levin
  2023-10-15 11:59 ` [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Paolo Bonzini
  6 siblings, 0 replies; 8+ messages in thread
From: Sasha Levin @ 2023-09-14  1:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Tobias Schramm, Mark Brown, Sasha Levin, wens, jernej.skrabec,
	samuel, linux-spi, linux-arm-kernel, linux-sunxi

From: Tobias Schramm <t.schramm@manjaro.org>

[ Upstream commit 171f8a49f212e87a8b04087568e1b3d132e36a18 ]

Through empirical testing it has been determined that sometimes RX SPI
transfers with DMA enabled return corrupted data. This is down to single
or even multiple bytes lost during DMA transfer from SPI peripheral to
memory. It seems the RX FIFO within the SPI peripheral can become
confused when performing bus read accesses wider than a single byte to it
during an active SPI transfer.

This patch reduces the width of individual DMA read accesses to the
RX FIFO to a single byte to mitigate that issue.

Signed-off-by: Tobias Schramm <t.schramm@manjaro.org>
Link: https://lore.kernel.org/r/20230827152558.5368-2-t.schramm@manjaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/spi/spi-sun6i.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 30d541612253e..8fcb2696ec099 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -207,7 +207,7 @@ static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
 		struct dma_slave_config rxconf = {
 			.direction = DMA_DEV_TO_MEM,
 			.src_addr = sspi->dma_addr_rx,
-			.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+			.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
 			.src_maxburst = 8,
 		};
 
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH AUTOSEL 6.5 7/7] spi: sun6i: fix race between DMA RX transfer completion and RX FIFO drain
  2023-09-14  1:54 [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Sasha Levin
                   ` (4 preceding siblings ...)
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 6/7] spi: sun6i: reduce DMA RX transfer width to single byte Sasha Levin
@ 2023-09-14  1:54 ` Sasha Levin
  2023-10-15 11:59 ` [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Paolo Bonzini
  6 siblings, 0 replies; 8+ messages in thread
From: Sasha Levin @ 2023-09-14  1:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Tobias Schramm, Mark Brown, Sasha Levin, wens, jernej.skrabec,
	samuel, linux-spi, linux-arm-kernel, linux-sunxi

From: Tobias Schramm <t.schramm@manjaro.org>

[ Upstream commit 1f11f4202caf5710204d334fe63392052783876d ]

Previously the transfer complete IRQ immediately drained to RX FIFO to
read any data remaining in FIFO to the RX buffer. This behaviour is
correct when dealing with SPI in interrupt mode. However in DMA mode the
transfer complete interrupt still fires as soon as all bytes to be
transferred have been stored in the FIFO. At that point data in the FIFO
still needs to be picked up by the DMA engine. Thus the drain procedure
and DMA engine end up racing to read from RX FIFO, corrupting any data
read. Additionally the RX buffer pointer is never adjusted according to
DMA progress in DMA mode, thus calling the RX FIFO drain procedure in DMA
mode is a bug.
Fix corruptions in DMA RX mode by draining RX FIFO only in interrupt mode.
Also wait for completion of RX DMA when in DMA mode before returning to
ensure all data has been copied to the supplied memory buffer.

Signed-off-by: Tobias Schramm <t.schramm@manjaro.org>
Link: https://lore.kernel.org/r/20230827152558.5368-3-t.schramm@manjaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/spi/spi-sun6i.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 8fcb2696ec099..57c828e73c446 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -102,6 +102,7 @@ struct sun6i_spi {
 	struct reset_control	*rstc;
 
 	struct completion	done;
+	struct completion	dma_rx_done;
 
 	const u8		*tx_buf;
 	u8			*rx_buf;
@@ -196,6 +197,13 @@ static size_t sun6i_spi_max_transfer_size(struct spi_device *spi)
 	return SUN6I_MAX_XFER_SIZE - 1;
 }
 
+static void sun6i_spi_dma_rx_cb(void *param)
+{
+	struct sun6i_spi *sspi = param;
+
+	complete(&sspi->dma_rx_done);
+}
+
 static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
 				 struct spi_transfer *tfr)
 {
@@ -220,6 +228,8 @@ static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
 						 DMA_PREP_INTERRUPT);
 		if (!rxdesc)
 			return -EINVAL;
+		rxdesc->callback_param = sspi;
+		rxdesc->callback = sun6i_spi_dma_rx_cb;
 	}
 
 	txdesc = NULL;
@@ -275,6 +285,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 		return -EINVAL;
 
 	reinit_completion(&sspi->done);
+	reinit_completion(&sspi->dma_rx_done);
 	sspi->tx_buf = tfr->tx_buf;
 	sspi->rx_buf = tfr->rx_buf;
 	sspi->len = tfr->len;
@@ -459,6 +470,22 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
 					      msecs_to_jiffies(tx_time));
+
+	if (!use_dma) {
+		sun6i_spi_drain_fifo(sspi);
+	} else {
+		if (timeout && rx_len) {
+			/*
+			 * Even though RX on the peripheral side has finished
+			 * RX DMA might still be in flight
+			 */
+			timeout = wait_for_completion_timeout(&sspi->dma_rx_done,
+							      timeout);
+			if (!timeout)
+				dev_warn(&master->dev, "RX DMA timeout\n");
+		}
+	}
+
 	end = jiffies;
 	if (!timeout) {
 		dev_warn(&master->dev,
@@ -486,7 +513,6 @@ static irqreturn_t sun6i_spi_handler(int irq, void *dev_id)
 	/* Transfer complete */
 	if (status & SUN6I_INT_CTL_TC) {
 		sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
-		sun6i_spi_drain_fifo(sspi);
 		complete(&sspi->done);
 		return IRQ_HANDLED;
 	}
@@ -644,6 +670,7 @@ static int sun6i_spi_probe(struct platform_device *pdev)
 	}
 
 	init_completion(&sspi->done);
+	init_completion(&sspi->dma_rx_done);
 
 	sspi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
 	if (IS_ERR(sspi->rstc)) {
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot
  2023-09-14  1:54 [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Sasha Levin
                   ` (5 preceding siblings ...)
  2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 7/7] spi: sun6i: fix race between DMA RX transfer completion and RX FIFO drain Sasha Levin
@ 2023-10-15 11:59 ` Paolo Bonzini
  6 siblings, 0 replies; 8+ messages in thread
From: Paolo Bonzini @ 2023-10-15 11:59 UTC (permalink / raw)
  To: Sasha Levin
  Cc: linux-kernel, stable, Sean Christopherson, Andrew Cooper, tglx,
	mingo, bp, dave.hansen, x86, akpm, bhe, eric.devolder, hbathini,
	sourabhjain, bhelgaas, kai.huang, peterz, jpoimboe, tiwai, kvm

On Thu, Sep 14, 2023 at 3:55 AM Sasha Levin <sashal@kernel.org> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> [ Upstream commit b23c83ad2c638420ec0608a9de354507c41bec29 ]
>
> VMCLEAR active VMCSes before any emergency reboot, not just if the kernel
> may kexec into a new kernel after a crash.  Per Intel's SDM, the VMX
> architecture doesn't require the CPU to flush the VMCS cache on INIT.  If
> an emergency reboot doesn't RESET CPUs, cached VMCSes could theoretically
> be kept and only be written back to memory after the new kernel is booted,
> i.e. could effectively corrupt memory after reboot.
>
> Opportunistically remove the setting of the global pointer to NULL to make
> checkpatch happy.

Intended as a cleanup but I guess it does not hurt, since it was the first patch
in the large series that included it.

Acked-by: Paolo Bonzini <pbonzini@redhat.com>

Paolo


> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>
> Link: https://lore.kernel.org/r/20230721201859.2307736-2-seanjc@google.com
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sasha Levin <sashal@kernel.org>
> ---
>  arch/x86/include/asm/kexec.h  |  2 --
>  arch/x86/include/asm/reboot.h |  2 ++
>  arch/x86/kernel/crash.c       | 31 -------------------------------
>  arch/x86/kernel/reboot.c      | 22 ++++++++++++++++++++++
>  arch/x86/kvm/vmx/vmx.c        | 10 +++-------
>  5 files changed, 27 insertions(+), 40 deletions(-)
>
> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
> index 5b77bbc28f969..819046974b997 100644
> --- a/arch/x86/include/asm/kexec.h
> +++ b/arch/x86/include/asm/kexec.h
> @@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
>  #endif
>  #endif
>
> -typedef void crash_vmclear_fn(void);
> -extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
>  extern void kdump_nmi_shootdown_cpus(void);
>
>  #endif /* __ASSEMBLY__ */
> diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
> index 9177b4354c3f5..dc201724a6433 100644
> --- a/arch/x86/include/asm/reboot.h
> +++ b/arch/x86/include/asm/reboot.h
> @@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
>  #define MRR_BIOS       0
>  #define MRR_APM                1
>
> +typedef void crash_vmclear_fn(void);
> +extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
>  void cpu_emergency_disable_virtualization(void);
>
>  typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index cdd92ab43cda4..54cd959cb3160 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -48,38 +48,12 @@ struct crash_memmap_data {
>         unsigned int type;
>  };
>
> -/*
> - * This is used to VMCLEAR all VMCSs loaded on the
> - * processor. And when loading kvm_intel module, the
> - * callback function pointer will be assigned.
> - *
> - * protected by rcu.
> - */
> -crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
> -EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
> -
> -static inline void cpu_crash_vmclear_loaded_vmcss(void)
> -{
> -       crash_vmclear_fn *do_vmclear_operation = NULL;
> -
> -       rcu_read_lock();
> -       do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
> -       if (do_vmclear_operation)
> -               do_vmclear_operation();
> -       rcu_read_unlock();
> -}
> -
>  #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
>
>  static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
>  {
>         crash_save_cpu(regs, cpu);
>
> -       /*
> -        * VMCLEAR VMCSs loaded on all cpus if needed.
> -        */
> -       cpu_crash_vmclear_loaded_vmcss();
> -
>         /*
>          * Disable Intel PT to stop its logging
>          */
> @@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
>
>         crash_smp_send_stop();
>
> -       /*
> -        * VMCLEAR VMCSs loaded on this cpu if needed.
> -        */
> -       cpu_crash_vmclear_loaded_vmcss();
> -
>         cpu_emergency_disable_virtualization();
>
>         /*
> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
> index 3adbe97015c13..3fa4c6717a1db 100644
> --- a/arch/x86/kernel/reboot.c
> +++ b/arch/x86/kernel/reboot.c
> @@ -787,6 +787,26 @@ void machine_crash_shutdown(struct pt_regs *regs)
>  }
>  #endif
>
> +/*
> + * This is used to VMCLEAR all VMCSs loaded on the
> + * processor. And when loading kvm_intel module, the
> + * callback function pointer will be assigned.
> + *
> + * protected by rcu.
> + */
> +crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
> +EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
> +
> +static inline void cpu_crash_vmclear_loaded_vmcss(void)
> +{
> +       crash_vmclear_fn *do_vmclear_operation = NULL;
> +
> +       rcu_read_lock();
> +       do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
> +       if (do_vmclear_operation)
> +               do_vmclear_operation();
> +       rcu_read_unlock();
> +}
>
>  /* This is the CPU performing the emergency shutdown work. */
>  int crashing_cpu = -1;
> @@ -798,6 +818,8 @@ int crashing_cpu = -1;
>   */
>  void cpu_emergency_disable_virtualization(void)
>  {
> +       cpu_crash_vmclear_loaded_vmcss();
> +
>         cpu_emergency_vmxoff();
>         cpu_emergency_svm_disable();
>  }
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index df461f387e20d..f60fb79fea881 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -41,7 +41,7 @@
>  #include <asm/idtentry.h>
>  #include <asm/io.h>
>  #include <asm/irq_remapping.h>
> -#include <asm/kexec.h>
> +#include <asm/reboot.h>
>  #include <asm/perf_event.h>
>  #include <asm/mmu_context.h>
>  #include <asm/mshyperv.h>
> @@ -754,7 +754,6 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
>         return ret;
>  }
>
> -#ifdef CONFIG_KEXEC_CORE
>  static void crash_vmclear_local_loaded_vmcss(void)
>  {
>         int cpu = raw_smp_processor_id();
> @@ -764,7 +763,6 @@ static void crash_vmclear_local_loaded_vmcss(void)
>                             loaded_vmcss_on_cpu_link)
>                 vmcs_clear(v->vmcs);
>  }
> -#endif /* CONFIG_KEXEC_CORE */
>
>  static void __loaded_vmcs_clear(void *arg)
>  {
> @@ -8622,10 +8620,9 @@ static void __vmx_exit(void)
>  {
>         allow_smaller_maxphyaddr = false;
>
> -#ifdef CONFIG_KEXEC_CORE
>         RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
>         synchronize_rcu();
> -#endif
> +
>         vmx_cleanup_l1d_flush();
>  }
>
> @@ -8674,10 +8671,9 @@ static int __init vmx_init(void)
>                 pi_init_cpu(cpu);
>         }
>
> -#ifdef CONFIG_KEXEC_CORE
>         rcu_assign_pointer(crash_vmclear_loaded_vmcss,
>                            crash_vmclear_local_loaded_vmcss);
> -#endif
> +
>         vmx_check_vmcs12_offsets();
>
>         /*
> --
> 2.40.1
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-10-15 12:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-14  1:54 [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Sasha Levin
2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 2/7] ceph: drop messages from MDS when unmounting Sasha Levin
2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 3/7] dma-debug: don't call __dma_entry_alloc_check_leak() under free_entries_lock Sasha Levin
2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 4/7] bpf: Annotate bpf_long_memcpy with data_race Sasha Levin
2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 5/7] ASoC: amd: yc: Add DMI entries to support Victus by HP Gaming Laptop 15-fb0xxx (8A3E) Sasha Levin
2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 6/7] spi: sun6i: reduce DMA RX transfer width to single byte Sasha Levin
2023-09-14  1:54 ` [PATCH AUTOSEL 6.5 7/7] spi: sun6i: fix race between DMA RX transfer completion and RX FIFO drain Sasha Levin
2023-10-15 11:59 ` [PATCH AUTOSEL 6.5 1/7] x86/reboot: VMCLEAR active VMCSes before emergency reboot Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).