All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-4.14.y 1/4] cgroup/cpuset: remove circular dependency deadlock
@ 2018-10-10  9:29 Amit Pundir
  2018-10-10  9:29 ` [PATCH for-4.14.y 2/4] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait Amit Pundir
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Amit Pundir @ 2018-10-10  9:29 UTC (permalink / raw)
  To: Greg KH; +Cc: Stable, Prateek Sood, Tejun Heo

From: Prateek Sood <prsood@codeaurora.org>

commit aa24163b2ee5c92120e32e99b5a93143a0f4258e upstream.

Remove circular dependency deadlock in a scenario where hotplug of CPU is
being done while there is updation in cgroup and cpuset triggered from
userspace.

Process A => kthreadd => Process B => Process C => Process A

Process A
cpu_subsys_offline();
  cpu_down();
    _cpu_down();
      percpu_down_write(&cpu_hotplug_lock); //held
      cpuhp_invoke_callback();
	     workqueue_offline_cpu();
            queue_work_on(); // unbind_work on system_highpri_wq
               __queue_work();
                 insert_work();
                    wake_up_worker();
            flush_work();
               wait_for_completion();

worker_thread();
   manage_workers();
      create_worker();
	     kthread_create_on_node();
		    wake_up_process(kthreadd_task);

kthreadd
kthreadd();
  kernel_thread();
    do_fork();
      copy_process();
        percpu_down_read(&cgroup_threadgroup_rwsem);
          __rwsem_down_read_failed_common(); //waiting

Process B
kernfs_fop_write();
  cgroup_file_write();
    cgroup_procs_write();
      percpu_down_write(&cgroup_threadgroup_rwsem); //held
      cgroup_attach_task();
        cgroup_migrate();
          cgroup_migrate_execute();
            cpuset_can_attach();
              mutex_lock(&cpuset_mutex); //waiting

Process C
kernfs_fop_write();
  cgroup_file_write();
    cpuset_write_resmask();
      mutex_lock(&cpuset_mutex); //held
      update_cpumask();
        update_cpumasks_hier();
          rebuild_sched_domains_locked();
            get_online_cpus();
              percpu_down_read(&cpu_hotplug_lock); //waiting

Eliminating deadlock by reversing the locking order for cpuset_mutex and
cpu_hotplug_lock.

Signed-off-by: Prateek Sood <prsood@codeaurora.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
---
Build tested on 4.14.74 for ARCH=arm/arm64 allmodconfig.

 kernel/cgroup/cpuset.c | 53 ++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 4657e2924ecb..54f4855b92fa 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -817,6 +817,18 @@ static int generate_sched_domains(cpumask_var_t **domains,
 	return ndoms;
 }
 
+static void cpuset_sched_change_begin(void)
+{
+	cpus_read_lock();
+	mutex_lock(&cpuset_mutex);
+}
+
+static void cpuset_sched_change_end(void)
+{
+	mutex_unlock(&cpuset_mutex);
+	cpus_read_unlock();
+}
+
 /*
  * Rebuild scheduler domains.
  *
@@ -826,16 +838,14 @@ static int generate_sched_domains(cpumask_var_t **domains,
  * 'cpus' is removed, then call this routine to rebuild the
  * scheduler's dynamic sched domains.
  *
- * Call with cpuset_mutex held.  Takes get_online_cpus().
  */
-static void rebuild_sched_domains_locked(void)
+static void rebuild_sched_domains_cpuslocked(void)
 {
 	struct sched_domain_attr *attr;
 	cpumask_var_t *doms;
 	int ndoms;
 
 	lockdep_assert_held(&cpuset_mutex);
-	get_online_cpus();
 
 	/*
 	 * We have raced with CPU hotplug. Don't do anything to avoid
@@ -843,27 +853,25 @@ static void rebuild_sched_domains_locked(void)
 	 * Anyways, hotplug work item will rebuild sched domains.
 	 */
 	if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
-		goto out;
+		return;
 
 	/* Generate domain masks and attrs */
 	ndoms = generate_sched_domains(&doms, &attr);
 
 	/* Have scheduler rebuild the domains */
 	partition_sched_domains(ndoms, doms, attr);
-out:
-	put_online_cpus();
 }
 #else /* !CONFIG_SMP */
-static void rebuild_sched_domains_locked(void)
+static void rebuild_sched_domains_cpuslocked(void)
 {
 }
 #endif /* CONFIG_SMP */
 
 void rebuild_sched_domains(void)
 {
-	mutex_lock(&cpuset_mutex);
-	rebuild_sched_domains_locked();
-	mutex_unlock(&cpuset_mutex);
+	cpuset_sched_change_begin();
+	rebuild_sched_domains_cpuslocked();
+	cpuset_sched_change_end();
 }
 
 /**
@@ -949,7 +957,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
 	rcu_read_unlock();
 
 	if (need_rebuild_sched_domains)
-		rebuild_sched_domains_locked();
+		rebuild_sched_domains_cpuslocked();
 }
 
 /**
@@ -1281,7 +1289,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
 		cs->relax_domain_level = val;
 		if (!cpumask_empty(cs->cpus_allowed) &&
 		    is_sched_load_balance(cs))
-			rebuild_sched_domains_locked();
+			rebuild_sched_domains_cpuslocked();
 	}
 
 	return 0;
@@ -1314,7 +1322,6 @@ static void update_tasks_flags(struct cpuset *cs)
  *
  * Call with cpuset_mutex held.
  */
-
 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 		       int turning_on)
 {
@@ -1347,7 +1354,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
 	spin_unlock_irq(&callback_lock);
 
 	if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
-		rebuild_sched_domains_locked();
+		rebuild_sched_domains_cpuslocked();
 
 	if (spread_flag_changed)
 		update_tasks_flags(cs);
@@ -1615,7 +1622,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 	cpuset_filetype_t type = cft->private;
 	int retval = 0;
 
-	mutex_lock(&cpuset_mutex);
+	cpuset_sched_change_begin();
 	if (!is_cpuset_online(cs)) {
 		retval = -ENODEV;
 		goto out_unlock;
@@ -1651,7 +1658,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 		break;
 	}
 out_unlock:
-	mutex_unlock(&cpuset_mutex);
+	cpuset_sched_change_end();
 	return retval;
 }
 
@@ -1662,7 +1669,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
 	cpuset_filetype_t type = cft->private;
 	int retval = -ENODEV;
 
-	mutex_lock(&cpuset_mutex);
+	cpuset_sched_change_begin();
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
 
@@ -1675,7 +1682,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
 		break;
 	}
 out_unlock:
-	mutex_unlock(&cpuset_mutex);
+	cpuset_sched_change_end();
 	return retval;
 }
 
@@ -1714,7 +1721,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
 	kernfs_break_active_protection(of->kn);
 	flush_work(&cpuset_hotplug_work);
 
-	mutex_lock(&cpuset_mutex);
+	cpuset_sched_change_begin();
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
 
@@ -1738,7 +1745,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
 
 	free_trial_cpuset(trialcs);
 out_unlock:
-	mutex_unlock(&cpuset_mutex);
+	cpuset_sched_change_end();
 	kernfs_unbreak_active_protection(of->kn);
 	css_put(&cs->css);
 	flush_workqueue(cpuset_migrate_mm_wq);
@@ -2039,14 +2046,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 /*
  * If the cpuset being removed has its flag 'sched_load_balance'
  * enabled, then simulate turning sched_load_balance off, which
- * will call rebuild_sched_domains_locked().
+ * will call rebuild_sched_domains_cpuslocked().
  */
 
 static void cpuset_css_offline(struct cgroup_subsys_state *css)
 {
 	struct cpuset *cs = css_cs(css);
 
-	mutex_lock(&cpuset_mutex);
+	cpuset_sched_change_begin();
 
 	if (is_sched_load_balance(cs))
 		update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
@@ -2054,7 +2061,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
 	cpuset_dec();
 	clear_bit(CS_ONLINE, &cs->flags);
 
-	mutex_unlock(&cpuset_mutex);
+	cpuset_sched_change_end();
 }
 
 static void cpuset_css_free(struct cgroup_subsys_state *css)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH for-4.14.y 2/4] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait
  2018-10-10  9:29 [PATCH for-4.14.y 1/4] cgroup/cpuset: remove circular dependency deadlock Amit Pundir
@ 2018-10-10  9:29 ` Amit Pundir
  2018-10-11  9:25   ` Greg KH
  2018-10-10  9:29 ` [PATCH for-4.14.y 3/4] ath10k: fix kernel panic issue during pci probe Amit Pundir
  2018-10-10  9:29 ` [PATCH for-4.14.y 4/4] nvme_fc: fix ctrl create failures racing with workq items Amit Pundir
  2 siblings, 1 reply; 7+ messages in thread
From: Amit Pundir @ 2018-10-10  9:29 UTC (permalink / raw)
  To: Greg KH; +Cc: Stable, Carl Huang, Kalle Valo

From: Carl Huang <cjhuang@codeaurora.org>

commit 9ef0f58ed7b4a55da4a64641d538e0d9e46579ac upstream.

The skb may be freed in tx completion context before
trace_ath10k_wmi_cmd is called. This can be easily captured when
KASAN(Kernel Address Sanitizer) is enabled. The fix is to move
trace_ath10k_wmi_cmd before the send operation. As the ret has no
meaning in trace_ath10k_wmi_cmd then, so remove this parameter too.

Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
Tested-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
---
To be applied on 4.9.y and 4.4.y as well.
Build tested on 4.14.74, 4.9.131 and 4.4.159 for ARCH=arm/arm64 allmodconfig.

 drivers/net/wireless/ath/ath10k/trace.h | 12 ++++--------
 drivers/net/wireless/ath/ath10k/wmi.c   |  2 +-
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/trace.h b/drivers/net/wireless/ath/ath10k/trace.h
index e0d00cef0bd8..5b974bb76e6c 100644
--- a/drivers/net/wireless/ath/ath10k/trace.h
+++ b/drivers/net/wireless/ath/ath10k/trace.h
@@ -152,10 +152,9 @@ TRACE_EVENT(ath10k_log_dbg_dump,
 );
 
 TRACE_EVENT(ath10k_wmi_cmd,
-	TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len,
-		 int ret),
+	TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len),
 
-	TP_ARGS(ar, id, buf, buf_len, ret),
+	TP_ARGS(ar, id, buf, buf_len),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(ar->dev))
@@ -163,7 +162,6 @@ TRACE_EVENT(ath10k_wmi_cmd,
 		__field(unsigned int, id)
 		__field(size_t, buf_len)
 		__dynamic_array(u8, buf, buf_len)
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
@@ -171,17 +169,15 @@ TRACE_EVENT(ath10k_wmi_cmd,
 		__assign_str(driver, dev_driver_string(ar->dev));
 		__entry->id = id;
 		__entry->buf_len = buf_len;
-		__entry->ret = ret;
 		memcpy(__get_dynamic_array(buf), buf, buf_len);
 	),
 
 	TP_printk(
-		"%s %s id %d len %zu ret %d",
+		"%s %s id %d len %zu",
 		__get_str(driver),
 		__get_str(device),
 		__entry->id,
-		__entry->buf_len,
-		__entry->ret
+		__entry->buf_len
 	)
 );
 
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 38a97086708b..2ab5311659ea 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1741,8 +1741,8 @@ int ath10k_wmi_cmd_send_nowait(struct ath10k *ar, struct sk_buff *skb,
 	cmd_hdr->cmd_id = __cpu_to_le32(cmd);
 
 	memset(skb_cb, 0, sizeof(*skb_cb));
+	trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len);
 	ret = ath10k_htc_send(&ar->htc, ar->wmi.eid, skb);
-	trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len, ret);
 
 	if (ret)
 		goto err_pull;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH for-4.14.y 3/4] ath10k: fix kernel panic issue during pci probe
  2018-10-10  9:29 [PATCH for-4.14.y 1/4] cgroup/cpuset: remove circular dependency deadlock Amit Pundir
  2018-10-10  9:29 ` [PATCH for-4.14.y 2/4] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait Amit Pundir
@ 2018-10-10  9:29 ` Amit Pundir
  2018-10-11  9:28   ` Greg KH
  2018-10-10  9:29 ` [PATCH for-4.14.y 4/4] nvme_fc: fix ctrl create failures racing with workq items Amit Pundir
  2 siblings, 1 reply; 7+ messages in thread
From: Amit Pundir @ 2018-10-10  9:29 UTC (permalink / raw)
  To: Greg KH; +Cc: Stable, Yu Wang, Kalle Valo

From: Yu Wang <yyuwang@codeaurora.org>

commit 50e79e25250bf928369996277e85b00536b380c7 upstream.

If device gone during chip reset, ar->normal_mode_fw.board is not
initialized, but ath10k_debug_print_hwfw_info() will try to access its
member, which will cause 'kernel NULL pointer' issue. This was found
using a faulty device (pci link went down sometimes) in a random
insmod/rmmod/other-op test.
To fix it, check ar->normal_mode_fw.board before accessing the member.

pci 0000:02:00.0: BAR 0: assigned [mem 0xf7400000-0xf75fffff 64bit]
ath10k_pci 0000:02:00.0: enabling device (0000 -> 0002)
ath10k_pci 0000:02:00.0: pci irq msi oper_irq_mode 2 irq_mode 0 reset_mode 0
ath10k_pci 0000:02:00.0: failed to read device register, device is gone
ath10k_pci 0000:02:00.0: failed to wait for target init: -5
ath10k_pci 0000:02:00.0: failed to warm reset: -5
ath10k_pci 0000:02:00.0: firmware crashed during chip reset
ath10k_pci 0000:02:00.0: firmware crashed! (uuid 5d018951-b8e1-404a-8fde-923078b4423a)
ath10k_pci 0000:02:00.0: (null) target 0x00000000 chip_id 0x00340aff sub 0000:0000
ath10k_pci 0000:02:00.0: kconfig debug 1 debugfs 1 tracing 1 dfs 1 testmode 1
ath10k_pci 0000:02:00.0: firmware ver  api 0 features  crc32 00000000
...
BUG: unable to handle kernel NULL pointer dereference at 00000004
...
Call Trace:
 [<fb4e7882>] ath10k_print_driver_info+0x12/0x20 [ath10k_core]
 [<fb62b7dd>] ath10k_pci_fw_crashed_dump+0x6d/0x4d0 [ath10k_pci]
 [<fb629f07>] ? ath10k_pci_sleep.part.19+0x57/0xc0 [ath10k_pci]
 [<fb62c8ee>] ath10k_pci_hif_power_up+0x14e/0x1b0 [ath10k_pci]
 [<c10477fb>] ? do_page_fault+0xb/0x10
 [<fb4eb934>] ath10k_core_register_work+0x24/0x840 [ath10k_core]
 [<c18a00d8>] ? netlbl_unlhsh_remove+0x178/0x410
 [<c10477f0>] ? __do_page_fault+0x480/0x480
 [<c1068e44>] process_one_work+0x114/0x3e0
 [<c1069d07>] worker_thread+0x37/0x4a0
 [<c106e294>] kthread+0xa4/0xc0
 [<c1069cd0>] ? create_worker+0x180/0x180
 [<c106e1f0>] ? kthread_park+0x50/0x50
 [<c18ab4f7>] ret_from_fork+0x1b/0x28
 Code: 78 80 b8 50 09 00 00 00 75 5d 8d 75 94 c7 44 24 08 aa d7 52 fb c7 44 24 04 64 00 00 00
 89 34 24 e8 82 52 e2 c5 8b 83 dc 08 00 00 <8b> 50 04 8b 08 31 c0 e8 20 57 e3 c5 89 44 24 10 8b 83 58 09 00
 EIP: [<fb4e7754>]-
 ath10k_debug_print_board_info+0x34/0xb0 [ath10k_core]
 SS:ESP 0068:f4921d90
 CR2: 0000000000000004

Signed-off-by: Yu Wang <yyuwang@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
[AmitP: Minor rebasing for 4.14.y and 4.9.y]
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
---
To be applied on 4.9.y as well.
Build tested on 4.14.74 and 4.9.131 for ARCH=arm/arm64 allmodconfig.

 drivers/net/wireless/ath/ath10k/debug.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index df514507d3f1..22003895f854 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2013 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -163,6 +164,8 @@ void ath10k_debug_print_hwfw_info(struct ath10k *ar)
 void ath10k_debug_print_board_info(struct ath10k *ar)
 {
 	char boardinfo[100];
+	const struct firmware *board;
+	u32 crc;
 
 	if (ar->id.bmi_ids_valid)
 		scnprintf(boardinfo, sizeof(boardinfo), "%d:%d",
@@ -170,11 +173,16 @@ void ath10k_debug_print_board_info(struct ath10k *ar)
 	else
 		scnprintf(boardinfo, sizeof(boardinfo), "N/A");
 
+	board = ar->normal_mode_fw.board;
+	if (!IS_ERR_OR_NULL(board))
+		crc = crc32_le(0, board->data, board->size);
+	else
+		crc = 0;
+
 	ath10k_info(ar, "board_file api %d bmi_id %s crc32 %08x",
 		    ar->bd_api,
 		    boardinfo,
-		    crc32_le(0, ar->normal_mode_fw.board->data,
-			     ar->normal_mode_fw.board->size));
+		    crc);
 }
 
 void ath10k_debug_print_boot_info(struct ath10k *ar)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH for-4.14.y 4/4] nvme_fc: fix ctrl create failures racing with workq items
  2018-10-10  9:29 [PATCH for-4.14.y 1/4] cgroup/cpuset: remove circular dependency deadlock Amit Pundir
  2018-10-10  9:29 ` [PATCH for-4.14.y 2/4] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait Amit Pundir
  2018-10-10  9:29 ` [PATCH for-4.14.y 3/4] ath10k: fix kernel panic issue during pci probe Amit Pundir
@ 2018-10-10  9:29 ` Amit Pundir
  2018-10-11  9:28   ` Greg KH
  2 siblings, 1 reply; 7+ messages in thread
From: Amit Pundir @ 2018-10-10  9:29 UTC (permalink / raw)
  To: Greg KH; +Cc: Stable, James Smart, James Smart, Keith Busch, Jens Axboe

From: James Smart <jsmart2021@gmail.com>

commit cf25809bec2c7df4b45df5b2196845d9a4a3c89b upstream.

If there are errors during initial controller create, the transport
will teardown the partially initialized controller struct and free
the ctlr memory.  Trouble is - most of those errors can occur due
to asynchronous events happening such io timeouts and subsystem
connectivity failures. Those failures invoke async workq items to
reset the controller and attempt reconnect.  Those may be in progress
as the main thread frees the ctrl memory, resulting in NULL ptr oops.

Prevent this from happening by having the main ctrl failure thread
changing state to DELETING followed by synchronously cancelling any
pending queued work item. The change of state will prevent the
scheduling of resets or reconnect events.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
---
Build tested on 4.14.74 for ARCH=arm/arm64 allmodconfig.

 drivers/nvme/host/fc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 7deb7b5d8683..058d542647dd 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2868,6 +2868,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	}
 
 	if (ret) {
+		nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
+		cancel_work_sync(&ctrl->ctrl.reset_work);
+		cancel_delayed_work_sync(&ctrl->connect_work);
+
 		/* couldn't schedule retry - fail out */
 		dev_err(ctrl->ctrl.device,
 			"NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH for-4.14.y 2/4] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait
  2018-10-10  9:29 ` [PATCH for-4.14.y 2/4] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait Amit Pundir
@ 2018-10-11  9:25   ` Greg KH
  0 siblings, 0 replies; 7+ messages in thread
From: Greg KH @ 2018-10-11  9:25 UTC (permalink / raw)
  To: Amit Pundir; +Cc: Stable, Carl Huang, Kalle Valo

On Wed, Oct 10, 2018 at 02:59:47PM +0530, Amit Pundir wrote:
> From: Carl Huang <cjhuang@codeaurora.org>
> 
> commit 9ef0f58ed7b4a55da4a64641d538e0d9e46579ac upstream.
> 
> The skb may be freed in tx completion context before
> trace_ath10k_wmi_cmd is called. This can be easily captured when
> KASAN(Kernel Address Sanitizer) is enabled. The fix is to move
> trace_ath10k_wmi_cmd before the send operation. As the ret has no
> meaning in trace_ath10k_wmi_cmd then, so remove this parameter too.
> 
> Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
> Tested-by: Brian Norris <briannorris@chromium.org>
> Reviewed-by: Brian Norris <briannorris@chromium.org>
> Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
> Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
> ---
> To be applied on 4.9.y and 4.4.y as well.
> Build tested on 4.14.74, 4.9.131 and 4.4.159 for ARCH=arm/arm64 allmodconfig.

Applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-4.14.y 3/4] ath10k: fix kernel panic issue during pci probe
  2018-10-10  9:29 ` [PATCH for-4.14.y 3/4] ath10k: fix kernel panic issue during pci probe Amit Pundir
@ 2018-10-11  9:28   ` Greg KH
  0 siblings, 0 replies; 7+ messages in thread
From: Greg KH @ 2018-10-11  9:28 UTC (permalink / raw)
  To: Amit Pundir; +Cc: Stable, Yu Wang, Kalle Valo

On Wed, Oct 10, 2018 at 02:59:48PM +0530, Amit Pundir wrote:
> From: Yu Wang <yyuwang@codeaurora.org>
> 
> commit 50e79e25250bf928369996277e85b00536b380c7 upstream.
> 
> If device gone during chip reset, ar->normal_mode_fw.board is not
> initialized, but ath10k_debug_print_hwfw_info() will try to access its
> member, which will cause 'kernel NULL pointer' issue. This was found
> using a faulty device (pci link went down sometimes) in a random
> insmod/rmmod/other-op test.
> To fix it, check ar->normal_mode_fw.board before accessing the member.
> 
> pci 0000:02:00.0: BAR 0: assigned [mem 0xf7400000-0xf75fffff 64bit]
> ath10k_pci 0000:02:00.0: enabling device (0000 -> 0002)
> ath10k_pci 0000:02:00.0: pci irq msi oper_irq_mode 2 irq_mode 0 reset_mode 0
> ath10k_pci 0000:02:00.0: failed to read device register, device is gone
> ath10k_pci 0000:02:00.0: failed to wait for target init: -5
> ath10k_pci 0000:02:00.0: failed to warm reset: -5
> ath10k_pci 0000:02:00.0: firmware crashed during chip reset
> ath10k_pci 0000:02:00.0: firmware crashed! (uuid 5d018951-b8e1-404a-8fde-923078b4423a)
> ath10k_pci 0000:02:00.0: (null) target 0x00000000 chip_id 0x00340aff sub 0000:0000
> ath10k_pci 0000:02:00.0: kconfig debug 1 debugfs 1 tracing 1 dfs 1 testmode 1
> ath10k_pci 0000:02:00.0: firmware ver  api 0 features  crc32 00000000
> ...
> BUG: unable to handle kernel NULL pointer dereference at 00000004
> ...
> Call Trace:
>  [<fb4e7882>] ath10k_print_driver_info+0x12/0x20 [ath10k_core]
>  [<fb62b7dd>] ath10k_pci_fw_crashed_dump+0x6d/0x4d0 [ath10k_pci]
>  [<fb629f07>] ? ath10k_pci_sleep.part.19+0x57/0xc0 [ath10k_pci]
>  [<fb62c8ee>] ath10k_pci_hif_power_up+0x14e/0x1b0 [ath10k_pci]
>  [<c10477fb>] ? do_page_fault+0xb/0x10
>  [<fb4eb934>] ath10k_core_register_work+0x24/0x840 [ath10k_core]
>  [<c18a00d8>] ? netlbl_unlhsh_remove+0x178/0x410
>  [<c10477f0>] ? __do_page_fault+0x480/0x480
>  [<c1068e44>] process_one_work+0x114/0x3e0
>  [<c1069d07>] worker_thread+0x37/0x4a0
>  [<c106e294>] kthread+0xa4/0xc0
>  [<c1069cd0>] ? create_worker+0x180/0x180
>  [<c106e1f0>] ? kthread_park+0x50/0x50
>  [<c18ab4f7>] ret_from_fork+0x1b/0x28
>  Code: 78 80 b8 50 09 00 00 00 75 5d 8d 75 94 c7 44 24 08 aa d7 52 fb c7 44 24 04 64 00 00 00
>  89 34 24 e8 82 52 e2 c5 8b 83 dc 08 00 00 <8b> 50 04 8b 08 31 c0 e8 20 57 e3 c5 89 44 24 10 8b 83 58 09 00
>  EIP: [<fb4e7754>]-
>  ath10k_debug_print_board_info+0x34/0xb0 [ath10k_core]
>  SS:ESP 0068:f4921d90
>  CR2: 0000000000000004
> 
> Signed-off-by: Yu Wang <yyuwang@codeaurora.org>
> Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
> [AmitP: Minor rebasing for 4.14.y and 4.9.y]
> Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
> ---
> To be applied on 4.9.y as well.
> Build tested on 4.14.74 and 4.9.131 for ARCH=arm/arm64 allmodconfig.

Now applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-4.14.y 4/4] nvme_fc: fix ctrl create failures racing with workq items
  2018-10-10  9:29 ` [PATCH for-4.14.y 4/4] nvme_fc: fix ctrl create failures racing with workq items Amit Pundir
@ 2018-10-11  9:28   ` Greg KH
  0 siblings, 0 replies; 7+ messages in thread
From: Greg KH @ 2018-10-11  9:28 UTC (permalink / raw)
  To: Amit Pundir; +Cc: Stable, James Smart, James Smart, Keith Busch, Jens Axboe

On Wed, Oct 10, 2018 at 02:59:49PM +0530, Amit Pundir wrote:
> From: James Smart <jsmart2021@gmail.com>
> 
> commit cf25809bec2c7df4b45df5b2196845d9a4a3c89b upstream.
> 
> If there are errors during initial controller create, the transport
> will teardown the partially initialized controller struct and free
> the ctlr memory.  Trouble is - most of those errors can occur due
> to asynchronous events happening such io timeouts and subsystem
> connectivity failures. Those failures invoke async workq items to
> reset the controller and attempt reconnect.  Those may be in progress
> as the main thread frees the ctrl memory, resulting in NULL ptr oops.
> 
> Prevent this from happening by having the main ctrl failure thread
> changing state to DELETING followed by synchronously cancelling any
> pending queued work item. The change of state will prevent the
> scheduling of resets or reconnect events.
> 
> Signed-off-by: James Smart <james.smart@broadcom.com>
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
> ---
> Build tested on 4.14.74 for ARCH=arm/arm64 allmodconfig.

Now applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-10-11 16:55 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-10  9:29 [PATCH for-4.14.y 1/4] cgroup/cpuset: remove circular dependency deadlock Amit Pundir
2018-10-10  9:29 ` [PATCH for-4.14.y 2/4] ath10k: fix use-after-free in ath10k_wmi_cmd_send_nowait Amit Pundir
2018-10-11  9:25   ` Greg KH
2018-10-10  9:29 ` [PATCH for-4.14.y 3/4] ath10k: fix kernel panic issue during pci probe Amit Pundir
2018-10-11  9:28   ` Greg KH
2018-10-10  9:29 ` [PATCH for-4.14.y 4/4] nvme_fc: fix ctrl create failures racing with workq items Amit Pundir
2018-10-11  9:28   ` Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.