linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH AUTOSEL 5.3 19/33] net/rds: Whitelist rdma_cookie and rx_tstamp for usercopy
       [not found] <20191025135505.24762-1-sashal@kernel.org>
@ 2019-10-25 13:54 ` Sasha Levin
  2019-10-25 13:54 ` [PATCH AUTOSEL 5.3 27/33] RDMA/siw: Fix serialization issue in write_space() Sasha Levin
  2019-10-25 13:55 ` [PATCH AUTOSEL 5.3 28/33] RDMA/iwcm: Fix a lock inversion issue Sasha Levin
  2 siblings, 0 replies; 3+ messages in thread
From: Sasha Levin @ 2019-10-25 13:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Dag Moxnes, Jenny, Santosh Shilimkar, David S . Miller,
	Sasha Levin, netdev, linux-rdma

From: Dag Moxnes <dag.moxnes@oracle.com>

[ Upstream commit bf1867db9b850fff2dd54a1a117a684a10b8cd90 ]

Add the RDMA cookie and RX timestamp to the usercopy whitelist.

After the introduction of hardened usercopy whitelisting
(https://lwn.net/Articles/727322/), a warning is displayed when the
RDMA cookie or RX timestamp is copied to userspace:

kernel: WARNING: CPU: 3 PID: 5750 at
mm/usercopy.c:81 usercopy_warn+0x8e/0xa6
[...]
kernel: Call Trace:
kernel: __check_heap_object+0xb8/0x11b
kernel: __check_object_size+0xe3/0x1bc
kernel: put_cmsg+0x95/0x115
kernel: rds_recvmsg+0x43d/0x620 [rds]
kernel: sock_recvmsg+0x43/0x4a
kernel: ___sys_recvmsg+0xda/0x1e6
kernel: ? __handle_mm_fault+0xcae/0xf79
kernel: __sys_recvmsg+0x51/0x8a
kernel: SyS_recvmsg+0x12/0x1c
kernel: do_syscall_64+0x79/0x1ae

When the whitelisting feature was introduced, the memory for the RDMA
cookie and RX timestamp in RDS was not added to the whitelist, causing
the warning above.

Signed-off-by: Dag Moxnes <dag.moxnes@oracle.com>
Tested-by: Jenny <jenny.x.xu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/rds/ib_recv.c | 11 ++++++++---
 net/rds/rds.h     |  9 +++++++--
 net/rds/recv.c    | 22 ++++++++++++----------
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 3cae88cbdaa02..fecd0abdc7e8e 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -1038,9 +1038,14 @@ int rds_ib_recv_init(void)
 	si_meminfo(&si);
 	rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
 
-	rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
-					sizeof(struct rds_ib_incoming),
-					0, SLAB_HWCACHE_ALIGN, NULL);
+	rds_ib_incoming_slab =
+		kmem_cache_create_usercopy("rds_ib_incoming",
+					   sizeof(struct rds_ib_incoming),
+					   0, SLAB_HWCACHE_ALIGN,
+					   offsetof(struct rds_ib_incoming,
+						    ii_inc.i_usercopy),
+					   sizeof(struct rds_inc_usercopy),
+					   NULL);
 	if (!rds_ib_incoming_slab)
 		goto out;
 
diff --git a/net/rds/rds.h b/net/rds/rds.h
index f0066d1684993..e792a67dd5788 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -271,6 +271,12 @@ struct rds_ext_header_rdma_dest {
 #define	RDS_MSG_RX_END		2
 #define	RDS_MSG_RX_CMSG		3
 
+/* The following values are whitelisted for usercopy */
+struct rds_inc_usercopy {
+	rds_rdma_cookie_t	rdma_cookie;
+	ktime_t			rx_tstamp;
+};
+
 struct rds_incoming {
 	refcount_t		i_refcount;
 	struct list_head	i_item;
@@ -280,8 +286,7 @@ struct rds_incoming {
 	unsigned long		i_rx_jiffies;
 	struct in6_addr		i_saddr;
 
-	rds_rdma_cookie_t	i_rdma_cookie;
-	ktime_t			i_rx_tstamp;
+	struct rds_inc_usercopy i_usercopy;
 	u64			i_rx_lat_trace[RDS_RX_MAX_TRACES];
 };
 
diff --git a/net/rds/recv.c b/net/rds/recv.c
index a42ba7fa06d5d..c8404971d5ab3 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -47,8 +47,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
 	INIT_LIST_HEAD(&inc->i_item);
 	inc->i_conn = conn;
 	inc->i_saddr = *saddr;
-	inc->i_rdma_cookie = 0;
-	inc->i_rx_tstamp = ktime_set(0, 0);
+	inc->i_usercopy.rdma_cookie = 0;
+	inc->i_usercopy.rx_tstamp = ktime_set(0, 0);
 
 	memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace));
 }
@@ -62,8 +62,8 @@ void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
 	inc->i_conn = cp->cp_conn;
 	inc->i_conn_path = cp;
 	inc->i_saddr = *saddr;
-	inc->i_rdma_cookie = 0;
-	inc->i_rx_tstamp = ktime_set(0, 0);
+	inc->i_usercopy.rdma_cookie = 0;
+	inc->i_usercopy.rx_tstamp = ktime_set(0, 0);
 }
 EXPORT_SYMBOL_GPL(rds_inc_path_init);
 
@@ -186,7 +186,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock
 		case RDS_EXTHDR_RDMA_DEST:
 			/* We ignore the size for now. We could stash it
 			 * somewhere and use it for error checking. */
-			inc->i_rdma_cookie = rds_rdma_make_cookie(
+			inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie(
 					be32_to_cpu(buffer.rdma_dest.h_rdma_rkey),
 					be32_to_cpu(buffer.rdma_dest.h_rdma_offset));
 
@@ -380,7 +380,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
 				      be32_to_cpu(inc->i_hdr.h_len),
 				      inc->i_hdr.h_dport);
 		if (sock_flag(sk, SOCK_RCVTSTAMP))
-			inc->i_rx_tstamp = ktime_get_real();
+			inc->i_usercopy.rx_tstamp = ktime_get_real();
 		rds_inc_addref(inc);
 		inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
 		list_add_tail(&inc->i_item, &rs->rs_recv_queue);
@@ -540,16 +540,18 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
 {
 	int ret = 0;
 
-	if (inc->i_rdma_cookie) {
+	if (inc->i_usercopy.rdma_cookie) {
 		ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
-				sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
+				sizeof(inc->i_usercopy.rdma_cookie),
+				&inc->i_usercopy.rdma_cookie);
 		if (ret)
 			goto out;
 	}
 
-	if ((inc->i_rx_tstamp != 0) &&
+	if ((inc->i_usercopy.rx_tstamp != 0) &&
 	    sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
-		struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp);
+		struct __kernel_old_timeval tv =
+			ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp);
 
 		if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) {
 			ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH AUTOSEL 5.3 27/33] RDMA/siw: Fix serialization issue in write_space()
       [not found] <20191025135505.24762-1-sashal@kernel.org>
  2019-10-25 13:54 ` [PATCH AUTOSEL 5.3 19/33] net/rds: Whitelist rdma_cookie and rx_tstamp for usercopy Sasha Levin
@ 2019-10-25 13:54 ` Sasha Levin
  2019-10-25 13:55 ` [PATCH AUTOSEL 5.3 28/33] RDMA/iwcm: Fix a lock inversion issue Sasha Levin
  2 siblings, 0 replies; 3+ messages in thread
From: Sasha Levin @ 2019-10-25 13:54 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Krishnamraju Eraparaju, Bernard Metzler, Jason Gunthorpe,
	Sasha Levin, linux-rdma

From: Krishnamraju Eraparaju <krishna2@chelsio.com>

[ Upstream commit df791c54d627bae53c9be3be40a69594c55de487 ]

In siw_qp_llp_write_space(), 'sock' members should be accessed with
sk_callback_lock held, otherwise, it could race with
siw_sk_restore_upcalls(). And this could cause "NULL deref" panic.  Below
panic is due to the NULL cep returned from sk_to_cep(sk):

  Call Trace:
   <IRQ>    siw_qp_llp_write_space+0x11/0x40 [siw]
   tcp_check_space+0x4c/0xf0
   tcp_rcv_established+0x52b/0x630
   tcp_v4_do_rcv+0xf4/0x1e0
   tcp_v4_rcv+0x9b8/0xab0
   ip_protocol_deliver_rcu+0x2c/0x1c0
   ip_local_deliver_finish+0x44/0x50
   ip_local_deliver+0x6b/0xf0
   ? ip_protocol_deliver_rcu+0x1c0/0x1c0
   ip_rcv+0x52/0xd0
   ? ip_rcv_finish_core.isra.14+0x390/0x390
   __netif_receive_skb_one_core+0x83/0xa0
   netif_receive_skb_internal+0x73/0xb0
   napi_gro_frags+0x1ff/0x2b0
   t4_ethrx_handler+0x4a7/0x740 [cxgb4]
   process_responses+0x2c9/0x590 [cxgb4]
   ? t4_sge_intr_msix+0x1d/0x30 [cxgb4]
   ? handle_irq_event_percpu+0x51/0x70
   ? handle_irq_event+0x41/0x60
   ? handle_edge_irq+0x97/0x1a0
   napi_rx_handler+0x14/0xe0 [cxgb4]
   net_rx_action+0x2af/0x410
   __do_softirq+0xda/0x2a8
   do_softirq_own_stack+0x2a/0x40
   </IRQ>
   do_softirq+0x50/0x60
   __local_bh_enable_ip+0x50/0x60
   ip_finish_output2+0x18f/0x520
   ip_output+0x6e/0xf0
   ? __ip_finish_output+0x1f0/0x1f0
   __ip_queue_xmit+0x14f/0x3d0
   ? __slab_alloc+0x4b/0x58
   __tcp_transmit_skb+0x57d/0xa60
   tcp_write_xmit+0x23b/0xfd0
   __tcp_push_pending_frames+0x2e/0xf0
   tcp_sendmsg_locked+0x939/0xd50
   tcp_sendmsg+0x27/0x40
   sock_sendmsg+0x57/0x80
   siw_tx_hdt+0x894/0xb20 [siw]
   ? find_busiest_group+0x3e/0x5b0
   ? common_interrupt+0xa/0xf
   ? common_interrupt+0xa/0xf
   ? common_interrupt+0xa/0xf
   siw_qp_sq_process+0xf1/0xe60 [siw]
   ? __wake_up_common_lock+0x87/0xc0
   siw_sq_resume+0x33/0xe0 [siw]
   siw_run_sq+0xac/0x190 [siw]
   ? remove_wait_queue+0x60/0x60
   kthread+0xf8/0x130
   ? siw_sq_resume+0xe0/0xe0 [siw]
   ? kthread_bind+0x10/0x10
   ret_from_fork+0x35/0x40

Fixes: f29dd55b0236 ("rdma/siw: queue pair methods")
Link: https://lore.kernel.org/r/20190923101112.32685-1-krishna2@chelsio.com
Signed-off-by: Krishnamraju Eraparaju <krishna2@chelsio.com>
Reviewed-by: Bernard Metzler <bmt@zurich.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/infiniband/sw/siw/siw_qp.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 430314c8abd94..52d402f39df93 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp)
  */
 void siw_qp_llp_write_space(struct sock *sk)
 {
-	struct siw_cep *cep = sk_to_cep(sk);
+	struct siw_cep *cep;
 
-	cep->sk_write_space(sk);
+	read_lock(&sk->sk_callback_lock);
+
+	cep  = sk_to_cep(sk);
+	if (cep) {
+		cep->sk_write_space(sk);
 
-	if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
-		(void)siw_sq_start(cep->qp);
+		if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+			(void)siw_sq_start(cep->qp);
+	}
+
+	read_unlock(&sk->sk_callback_lock);
 }
 
 static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH AUTOSEL 5.3 28/33] RDMA/iwcm: Fix a lock inversion issue
       [not found] <20191025135505.24762-1-sashal@kernel.org>
  2019-10-25 13:54 ` [PATCH AUTOSEL 5.3 19/33] net/rds: Whitelist rdma_cookie and rx_tstamp for usercopy Sasha Levin
  2019-10-25 13:54 ` [PATCH AUTOSEL 5.3 27/33] RDMA/siw: Fix serialization issue in write_space() Sasha Levin
@ 2019-10-25 13:55 ` Sasha Levin
  2 siblings, 0 replies; 3+ messages in thread
From: Sasha Levin @ 2019-10-25 13:55 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Bart Van Assche, Jason Gunthorpe, Sasha Levin, linux-rdma

From: Bart Van Assche <bvanassche@acm.org>

[ Upstream commit b66f31efbdad95ec274345721d99d1d835e6de01 ]

This patch fixes the lock inversion complaint:

============================================
WARNING: possible recursive locking detected
5.3.0-rc7-dbg+ #1 Not tainted
--------------------------------------------
kworker/u16:6/171 is trying to acquire lock:
00000000035c6e6c (&id_priv->handler_mutex){+.+.}, at: rdma_destroy_id+0x78/0x4a0 [rdma_cm]

but task is already holding lock:
00000000bc7c307d (&id_priv->handler_mutex){+.+.}, at: iw_conn_req_handler+0x151/0x680 [rdma_cm]

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&id_priv->handler_mutex);
  lock(&id_priv->handler_mutex);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by kworker/u16:6/171:
 #0: 00000000e2eaa773 ((wq_completion)iw_cm_wq){+.+.}, at: process_one_work+0x472/0xac0
 #1: 000000001efd357b ((work_completion)(&work->work)#3){+.+.}, at: process_one_work+0x476/0xac0
 #2: 00000000bc7c307d (&id_priv->handler_mutex){+.+.}, at: iw_conn_req_handler+0x151/0x680 [rdma_cm]

stack backtrace:
CPU: 3 PID: 171 Comm: kworker/u16:6 Not tainted 5.3.0-rc7-dbg+ #1
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
Workqueue: iw_cm_wq cm_work_handler [iw_cm]
Call Trace:
 dump_stack+0x8a/0xd6
 __lock_acquire.cold+0xe1/0x24d
 lock_acquire+0x106/0x240
 __mutex_lock+0x12e/0xcb0
 mutex_lock_nested+0x1f/0x30
 rdma_destroy_id+0x78/0x4a0 [rdma_cm]
 iw_conn_req_handler+0x5c9/0x680 [rdma_cm]
 cm_work_handler+0xe62/0x1100 [iw_cm]
 process_one_work+0x56d/0xac0
 worker_thread+0x7a/0x5d0
 kthread+0x1bc/0x210
 ret_from_fork+0x24/0x30

This is not a bug as there are actually two lock classes here.

Link: https://lore.kernel.org/r/20190930231707.48259-3-bvanassche@acm.org
Fixes: de910bd92137 ("RDMA/cma: Simplify locking needed for serialization of callbacks")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/infiniband/core/cma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a68d0ccf67a43..2e48b59926c19 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 		conn_id->cm_id.iw = NULL;
 		cma_exch(conn_id, RDMA_CM_DESTROYING);
 		mutex_unlock(&conn_id->handler_mutex);
+		mutex_unlock(&listen_id->handler_mutex);
 		cma_deref_id(conn_id);
 		rdma_destroy_id(&conn_id->id);
-		goto out;
+		return ret;
 	}
 
 	mutex_unlock(&conn_id->handler_mutex);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-10-25 14:06 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20191025135505.24762-1-sashal@kernel.org>
2019-10-25 13:54 ` [PATCH AUTOSEL 5.3 19/33] net/rds: Whitelist rdma_cookie and rx_tstamp for usercopy Sasha Levin
2019-10-25 13:54 ` [PATCH AUTOSEL 5.3 27/33] RDMA/siw: Fix serialization issue in write_space() Sasha Levin
2019-10-25 13:55 ` [PATCH AUTOSEL 5.3 28/33] RDMA/iwcm: Fix a lock inversion issue Sasha Levin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).