All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev, Yanjun Zhang <zhangyanjun@cestc.cn>,
	Sagi Grimberg <sagi@grimberg.me>,
	Yanjun Zhang <zhangyanjun@cestc.com>,
	Christoph Hellwig <hch@lst.de>, Sasha Levin <sashal@kernel.org>
Subject: [PATCH 5.4 20/39] nvme-tcp: fix a possible UAF when failing to allocate an io queue
Date: Mon, 24 Apr 2023 15:17:23 +0200	[thread overview]
Message-ID: <20230424131123.804550969@linuxfoundation.org> (raw)
In-Reply-To: <20230424131123.040556994@linuxfoundation.org>

From: Sagi Grimberg <sagi@grimberg.me>

[ Upstream commit 88eaba80328b31ef81813a1207b4056efd7006a6 ]

When we allocate a nvme-tcp queue, we set the data_ready callback before
we actually need to use it. This creates the potential that if a stray
controller sends us data on the socket before we connect, we can trigger
the io_work and start consuming the socket.

In this case reported: we failed to allocate one of the io queues, and
as we start releasing the queues that we already allocated, we get
a UAF [1] from the io_work which is running before it should really.

Fix this by setting the socket ops callbacks only before we start the
queue, so that we can't accidentally schedule the io_work in the
initialization phase before the queue started. While we are at it,
rename nvme_tcp_restore_sock_calls to pair with nvme_tcp_setup_sock_ops.

[1]:
[16802.107284] nvme nvme4: starting error recovery
[16802.109166] nvme nvme4: Reconnecting in 10 seconds...
[16812.173535] nvme nvme4: failed to connect socket: -111
[16812.173745] nvme nvme4: Failed reconnect attempt 1
[16812.173747] nvme nvme4: Reconnecting in 10 seconds...
[16822.413555] nvme nvme4: failed to connect socket: -111
[16822.413762] nvme nvme4: Failed reconnect attempt 2
[16822.413765] nvme nvme4: Reconnecting in 10 seconds...
[16832.661274] nvme nvme4: creating 32 I/O queues.
[16833.919887] BUG: kernel NULL pointer dereference, address: 0000000000000088
[16833.920068] nvme nvme4: Failed reconnect attempt 3
[16833.920094] #PF: supervisor write access in kernel mode
[16833.920261] nvme nvme4: Reconnecting in 10 seconds...
[16833.920368] #PF: error_code(0x0002) - not-present page
[16833.921086] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp]
[16833.921191] RIP: 0010:_raw_spin_lock_bh+0x17/0x30
...
[16833.923138] Call Trace:
[16833.923271]  <TASK>
[16833.923402]  lock_sock_nested+0x1e/0x50
[16833.923545]  nvme_tcp_try_recv+0x40/0xa0 [nvme_tcp]
[16833.923685]  nvme_tcp_io_work+0x68/0xa0 [nvme_tcp]
[16833.923824]  process_one_work+0x1e8/0x390
[16833.923969]  worker_thread+0x53/0x3d0
[16833.924104]  ? process_one_work+0x390/0x390
[16833.924240]  kthread+0x124/0x150
[16833.924376]  ? set_kthread_struct+0x50/0x50
[16833.924518]  ret_from_fork+0x1f/0x30
[16833.924655]  </TASK>

Reported-by: Yanjun Zhang <zhangyanjun@cestc.cn>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Tested-by: Yanjun Zhang <zhangyanjun@cestc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/nvme/host/tcp.c | 46 +++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3169859cd3906..4250081595c14 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1387,22 +1387,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	if (ret)
 		goto err_init_connect;
 
-	queue->rd_enabled = true;
 	set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
-	nvme_tcp_init_recv_ctx(queue);
-
-	write_lock_bh(&queue->sock->sk->sk_callback_lock);
-	queue->sock->sk->sk_user_data = queue;
-	queue->state_change = queue->sock->sk->sk_state_change;
-	queue->data_ready = queue->sock->sk->sk_data_ready;
-	queue->write_space = queue->sock->sk->sk_write_space;
-	queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
-	queue->sock->sk->sk_state_change = nvme_tcp_state_change;
-	queue->sock->sk->sk_write_space = nvme_tcp_write_space;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-	queue->sock->sk->sk_ll_usec = 1;
-#endif
-	write_unlock_bh(&queue->sock->sk->sk_callback_lock);
 
 	return 0;
 
@@ -1419,7 +1404,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	return ret;
 }
 
-static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
+static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue)
 {
 	struct socket *sock = queue->sock;
 
@@ -1434,7 +1419,7 @@ static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
 static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
 {
 	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
-	nvme_tcp_restore_sock_calls(queue);
+	nvme_tcp_restore_sock_ops(queue);
 	cancel_work_sync(&queue->io_work);
 }
 
@@ -1448,21 +1433,42 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
 	__nvme_tcp_stop_queue(queue);
 }
 
+static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue)
+{
+	write_lock_bh(&queue->sock->sk->sk_callback_lock);
+	queue->sock->sk->sk_user_data = queue;
+	queue->state_change = queue->sock->sk->sk_state_change;
+	queue->data_ready = queue->sock->sk->sk_data_ready;
+	queue->write_space = queue->sock->sk->sk_write_space;
+	queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
+	queue->sock->sk->sk_state_change = nvme_tcp_state_change;
+	queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	queue->sock->sk->sk_ll_usec = 1;
+#endif
+	write_unlock_bh(&queue->sock->sk->sk_callback_lock);
+}
+
 static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
 {
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+	struct nvme_tcp_queue *queue = &ctrl->queues[idx];
 	int ret;
 
+	queue->rd_enabled = true;
+	nvme_tcp_init_recv_ctx(queue);
+	nvme_tcp_setup_sock_ops(queue);
+
 	if (idx)
 		ret = nvmf_connect_io_queue(nctrl, idx, false);
 	else
 		ret = nvmf_connect_admin_queue(nctrl);
 
 	if (!ret) {
-		set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
+		set_bit(NVME_TCP_Q_LIVE, &queue->flags);
 	} else {
-		if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
-			__nvme_tcp_stop_queue(&ctrl->queues[idx]);
+		if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+			__nvme_tcp_stop_queue(queue);
 		dev_err(nctrl->device,
 			"failed to connect queue: %d ret=%d\n", idx, ret);
 	}
-- 
2.39.2




  parent reply	other threads:[~2023-04-24 13:23 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-24 13:17 [PATCH 5.4 00/39] 5.4.242-rc1 review Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 01/39] ARM: dts: rockchip: fix a typo error for rk3288 spdif node Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 02/39] arm64: dts: meson-g12-common: specify full DMC range Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 03/39] netfilter: br_netfilter: fix recent physdev match breakage Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 04/39] regulator: fan53555: Explicitly include bits header Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 05/39] net: sched: sch_qfq: prevent slab-out-of-bounds in qfq_activate_agg Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 06/39] virtio_net: bugfix overflow inside xdp_linearize_page() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 07/39] netfilter: nf_tables: fix ifdef to also consider nf_tables=m Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 08/39] i40e: fix accessing vsi->active_filters without holding lock Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 09/39] i40e: fix i40e_setup_misc_vector() error handling Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 10/39] mlxfw: fix null-ptr-deref in mlxfw_mfa2_tlv_next() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 11/39] bpf: Fix incorrect verifier pruning due to missing register precision taints Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 12/39] e1000e: Disable TSO on i219-LM card to increase speed Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 13/39] f2fs: Fix f2fs_truncate_partial_nodes ftrace event Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 14/39] Input: i8042 - add quirk for Fujitsu Lifebook A574/H Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 15/39] selftests: sigaltstack: fix -Wuninitialized Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 16/39] scsi: megaraid_sas: Fix fw_crash_buffer_show() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 17/39] scsi: core: Improve scsi_vpd_inquiry() checks Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 18/39] net: dsa: b53: mmap: add phy ops Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 19/39] s390/ptrace: fix PTRACE_GET_LAST_BREAK error handling Greg Kroah-Hartman
2023-04-24 13:17 ` Greg Kroah-Hartman [this message]
2023-04-24 13:17 ` [PATCH 5.4 21/39] xen/netback: use same error messages for same errors Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 22/39] iio: light: tsl2772: fix reading proximity-diodes from device tree Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 23/39] nilfs2: initialize unused bytes in segment summary blocks Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 24/39] memstick: fix memory leak if card device is never registered Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 25/39] mmc: sdhci_am654: Set HIGH_SPEED_ENA for SDR12 and SDR25 Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 26/39] MIPS: Define RUNTIME_DISCARD_EXIT in LD script Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 27/39] x86/purgatory: Dont generate debug info for purgatory.ro Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 28/39] Revert "ext4: fix use-after-free in ext4_xattr_set_entry" Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 29/39] ext4: remove duplicate definition of ext4_xattr_ibody_inline_set() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 30/39] ext4: fix use-after-free in ext4_xattr_set_entry Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 31/39] udp: Call inet6_destroy_sock() in setsockopt(IPV6_ADDRFORM) Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 32/39] tcp/udp: Call inet6_destroy_sock() in IPv6 sk->sk_destruct() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 33/39] inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 34/39] dccp: Call inet6_destroy_sock() via sk->sk_destruct() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 35/39] sctp: " Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 36/39] xfs: fix forkoff miscalculation related to XFS_LITINO(mp) Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 37/39] pwm: meson: Explicitly set .polarity in .get_state() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 38/39] iio: adc: at91-sama5d2_adc: fix an error code in at91_adc_allocate_trigger() Greg Kroah-Hartman
2023-04-24 13:17 ` [PATCH 5.4 39/39] ASN.1: Fix check for strdup() success Greg Kroah-Hartman
2023-04-25  1:04 ` [PATCH 5.4 00/39] 5.4.242-rc1 review Guenter Roeck
2023-04-25 10:44 ` Jon Hunter
2023-04-25 11:13 ` Naresh Kamboju
2023-04-25 13:59 ` Harshit Mogalapalli
2023-04-25 17:39 ` Florian Fainelli
2023-04-26  0:30 ` Shuah Khan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230424131123.804550969@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=patches@lists.linux.dev \
    --cc=sagi@grimberg.me \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=zhangyanjun@cestc.cn \
    --cc=zhangyanjun@cestc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.