netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function
@ 2020-04-07 16:08 Ka-Cheong Poon
  2020-04-07 16:08 ` [PATCH net 2/2] net/rds: Fix MR reference counting problem Ka-Cheong Poon
  2020-04-07 18:48 ` [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function Leon Romanovsky
  0 siblings, 2 replies; 9+ messages in thread
From: Ka-Cheong Poon @ 2020-04-07 16:08 UTC (permalink / raw)
  To: netdev; +Cc: santosh.shilimkar, davem, rds-devel, sironhide0null

Added rds_ib_dev_get() and rds_mr_get() to improve code readability.

Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
---
 net/rds/ib.c      | 8 ++++----
 net/rds/ib.h      | 5 +++++
 net/rds/ib_rdma.c | 6 +++---
 net/rds/rdma.c    | 8 ++++----
 net/rds/rds.h     | 5 +++++
 5 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index a792d8a..c16cb1a 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2020 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -224,10 +224,10 @@ static void rds_ib_add_one(struct ib_device *device)
 	down_write(&rds_ib_devices_lock);
 	list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
 	up_write(&rds_ib_devices_lock);
-	refcount_inc(&rds_ibdev->refcount);
+	rds_ib_dev_get(rds_ibdev);
 
 	ib_set_client_data(device, &rds_ib_client, rds_ibdev);
-	refcount_inc(&rds_ibdev->refcount);
+	rds_ib_dev_get(rds_ibdev);
 
 	rds_ib_nodev_connect();
 
@@ -258,7 +258,7 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
 	rcu_read_lock();
 	rds_ibdev = ib_get_client_data(device, &rds_ib_client);
 	if (rds_ibdev)
-		refcount_inc(&rds_ibdev->refcount);
+		rds_ib_dev_get(rds_ibdev);
 	rcu_read_unlock();
 	return rds_ibdev;
 }
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 0296f1f..fe7ea4e 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -361,6 +361,11 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
 extern struct rds_transport rds_ib_transport;
 struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
+static inline void rds_ib_dev_get(struct rds_ib_device *rds_ibdev)
+{
+	refcount_inc(&rds_ibdev->refcount);
+}
+
 extern struct ib_client rds_ib_client;
 
 extern unsigned int rds_ib_retry_count;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index b34b24e..1b942d80 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2020 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -56,7 +56,7 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
 	list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
 		list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
 			if (i_ipaddr->ipaddr == ipaddr) {
-				refcount_inc(&rds_ibdev->refcount);
+				rds_ib_dev_get(rds_ibdev);
 				rcu_read_unlock();
 				return rds_ibdev;
 			}
@@ -139,7 +139,7 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
 	spin_unlock_irq(&ib_nodev_conns_lock);
 
 	ic->rds_ibdev = rds_ibdev;
-	refcount_inc(&rds_ibdev->refcount);
+	rds_ib_dev_get(rds_ibdev);
 }
 
 void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 585e6b3..d5abe0e 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2020 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key,
 	if (insert) {
 		rb_link_node(&insert->r_rb_node, parent, p);
 		rb_insert_color(&insert->r_rb_node, root);
-		refcount_inc(&insert->r_refcount);
+		rds_mr_get(insert);
 	}
 	return NULL;
 }
@@ -343,7 +343,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
 
 	rdsdebug("RDS: get_mr key is %x\n", mr->r_key);
 	if (mr_ret) {
-		refcount_inc(&mr->r_refcount);
+		rds_mr_get(mr);
 		*mr_ret = mr;
 	}
 
@@ -827,7 +827,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
 	if (!mr)
 		err = -EINVAL;	/* invalid r_key */
 	else
-		refcount_inc(&mr->r_refcount);
+		rds_mr_get(mr);
 	spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
 
 	if (mr) {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index e4a6035..6a665fa 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -953,6 +953,11 @@ static inline void rds_mr_put(struct rds_mr *mr)
 		__rds_put_mr_final(mr);
 }
 
+static inline void rds_mr_get(struct rds_mr *mr)
+{
+	refcount_inc(&mr->r_refcount);
+}
+
 static inline bool rds_destroy_pending(struct rds_connection *conn)
 {
 	return !check_net(rds_conn_net(conn)) ||
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net 2/2] net/rds: Fix MR reference counting problem
  2020-04-07 16:08 [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function Ka-Cheong Poon
@ 2020-04-07 16:08 ` Ka-Cheong Poon
  2020-04-07 19:30   ` santosh.shilimkar
  2020-04-07 18:48 ` [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function Leon Romanovsky
  1 sibling, 1 reply; 9+ messages in thread
From: Ka-Cheong Poon @ 2020-04-07 16:08 UTC (permalink / raw)
  To: netdev; +Cc: santosh.shilimkar, davem, rds-devel, sironhide0null

In rds_free_mr(), it calls rds_destroy_mr(mr) directly.  But this
defeats the purpose of reference counting and makes MR free handling
impossible.  It means that holding a reference does not guarantee that
it is safe to access some fields.  For example, In
rds_cmsg_rdma_dest(), it increases the ref count, unlocks and then
calls mr->r_trans->sync_mr().  But if rds_free_mr() (and
rds_destroy_mr()) is called in between (there is no lock preventing
this to happen), r_trans_private is set to NULL, causing a panic.
Similar issue is in rds_rdma_unuse().

Reported-by: zerons <sironhide0null@gmail.com>
Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
---
 net/rds/rdma.c | 25 ++++++++++++-------------
 net/rds/rds.h  |  8 --------
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index d5abe0e..4cb5485 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -101,9 +101,6 @@ static void rds_destroy_mr(struct rds_mr *mr)
 	rdsdebug("RDS: destroy mr key is %x refcnt %u\n",
 			mr->r_key, refcount_read(&mr->r_refcount));
 
-	if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state))
-		return;
-
 	spin_lock_irqsave(&rs->rs_rdma_lock, flags);
 	if (!RB_EMPTY_NODE(&mr->r_rb_node))
 		rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
@@ -140,7 +137,6 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
 		rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
 		RB_CLEAR_NODE(&mr->r_rb_node);
 		spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
-		rds_destroy_mr(mr);
 		rds_mr_put(mr);
 		spin_lock_irqsave(&rs->rs_rdma_lock, flags);
 	}
@@ -434,12 +430,6 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
 	if (!mr)
 		return -EINVAL;
 
-	/*
-	 * call rds_destroy_mr() ourselves so that we're sure it's done by the time
-	 * we return.  If we let rds_mr_put() do it it might not happen until
-	 * someone else drops their ref.
-	 */
-	rds_destroy_mr(mr);
 	rds_mr_put(mr);
 	return 0;
 }
@@ -464,6 +454,14 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
 		return;
 	}
 
+	/* Get a reference so that the MR won't go away before calling
+	 * sync_mr() below.
+	 */
+	rds_mr_get(mr);
+
+	/* If it is going to be freed, remove it from the tree now so
+	 * that no other thread can find it and free it.
+	 */
 	if (mr->r_use_once || force) {
 		rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
 		RB_CLEAR_NODE(&mr->r_rb_node);
@@ -477,12 +475,13 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
 	if (mr->r_trans->sync_mr)
 		mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
 
+	/* Release the reference held above. */
+	rds_mr_put(mr);
+
 	/* If the MR was marked as invalidate, this will
 	 * trigger an async flush. */
-	if (zot_me) {
-		rds_destroy_mr(mr);
+	if (zot_me)
 		rds_mr_put(mr);
-	}
 }
 
 void rds_rdma_free_op(struct rm_rdma_op *ro)
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 6a665fa..e889ef8 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -299,19 +299,11 @@ struct rds_mr {
 	unsigned int		r_invalidate:1;
 	unsigned int		r_write:1;
 
-	/* This is for RDS_MR_DEAD.
-	 * It would be nice & consistent to make this part of the above
-	 * bit field here, but we need to use test_and_set_bit.
-	 */
-	unsigned long		r_state;
 	struct rds_sock		*r_sock; /* back pointer to the socket that owns us */
 	struct rds_transport	*r_trans;
 	void			*r_trans_private;
 };
 
-/* Flags for mr->r_state */
-#define RDS_MR_DEAD		0
-
 static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
 {
 	return r_key | (((u64) offset) << 32);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function
  2020-04-07 16:08 [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function Ka-Cheong Poon
  2020-04-07 16:08 ` [PATCH net 2/2] net/rds: Fix MR reference counting problem Ka-Cheong Poon
@ 2020-04-07 18:48 ` Leon Romanovsky
  2020-04-07 20:19   ` David Miller
  2020-04-08  4:15   ` Ka-Cheong Poon
  1 sibling, 2 replies; 9+ messages in thread
From: Leon Romanovsky @ 2020-04-07 18:48 UTC (permalink / raw)
  To: Ka-Cheong Poon
  Cc: netdev, santosh.shilimkar, davem, rds-devel, sironhide0null

On Tue, Apr 07, 2020 at 09:08:01AM -0700, Ka-Cheong Poon wrote:
> Added rds_ib_dev_get() and rds_mr_get() to improve code readability.

It is very hard to agree with this sentence.
Hiding basic kernel primitives is very rare will improve code readability.
It is definitely not the case here.

Thanks

>
> Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
> ---
>  net/rds/ib.c      | 8 ++++----
>  net/rds/ib.h      | 5 +++++
>  net/rds/ib_rdma.c | 6 +++---
>  net/rds/rdma.c    | 8 ++++----
>  net/rds/rds.h     | 5 +++++
>  5 files changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/net/rds/ib.c b/net/rds/ib.c
> index a792d8a..c16cb1a 100644
> --- a/net/rds/ib.c
> +++ b/net/rds/ib.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2006, 2020 Oracle and/or its affiliates. All rights reserved.
>   *
>   * This software is available to you under a choice of one of two
>   * licenses.  You may choose to be licensed under the terms of the GNU
> @@ -224,10 +224,10 @@ static void rds_ib_add_one(struct ib_device *device)
>  	down_write(&rds_ib_devices_lock);
>  	list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
>  	up_write(&rds_ib_devices_lock);
> -	refcount_inc(&rds_ibdev->refcount);
> +	rds_ib_dev_get(rds_ibdev);
>
>  	ib_set_client_data(device, &rds_ib_client, rds_ibdev);
> -	refcount_inc(&rds_ibdev->refcount);
> +	rds_ib_dev_get(rds_ibdev);
>
>  	rds_ib_nodev_connect();
>
> @@ -258,7 +258,7 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
>  	rcu_read_lock();
>  	rds_ibdev = ib_get_client_data(device, &rds_ib_client);
>  	if (rds_ibdev)
> -		refcount_inc(&rds_ibdev->refcount);
> +		rds_ib_dev_get(rds_ibdev);
>  	rcu_read_unlock();
>  	return rds_ibdev;
>  }
> diff --git a/net/rds/ib.h b/net/rds/ib.h
> index 0296f1f..fe7ea4e 100644
> --- a/net/rds/ib.h
> +++ b/net/rds/ib.h
> @@ -361,6 +361,11 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
>  extern struct rds_transport rds_ib_transport;
>  struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
>  void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
> +static inline void rds_ib_dev_get(struct rds_ib_device *rds_ibdev)
> +{
> +	refcount_inc(&rds_ibdev->refcount);
> +}
> +
>  extern struct ib_client rds_ib_client;
>
>  extern unsigned int rds_ib_retry_count;
> diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
> index b34b24e..1b942d80 100644
> --- a/net/rds/ib_rdma.c
> +++ b/net/rds/ib_rdma.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2006, 2020 Oracle and/or its affiliates. All rights reserved.
>   *
>   * This software is available to you under a choice of one of two
>   * licenses.  You may choose to be licensed under the terms of the GNU
> @@ -56,7 +56,7 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
>  	list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
>  		list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
>  			if (i_ipaddr->ipaddr == ipaddr) {
> -				refcount_inc(&rds_ibdev->refcount);
> +				rds_ib_dev_get(rds_ibdev);
>  				rcu_read_unlock();
>  				return rds_ibdev;
>  			}
> @@ -139,7 +139,7 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
>  	spin_unlock_irq(&ib_nodev_conns_lock);
>
>  	ic->rds_ibdev = rds_ibdev;
> -	refcount_inc(&rds_ibdev->refcount);
> +	rds_ib_dev_get(rds_ibdev);
>  }
>
>  void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
> diff --git a/net/rds/rdma.c b/net/rds/rdma.c
> index 585e6b3..d5abe0e 100644
> --- a/net/rds/rdma.c
> +++ b/net/rds/rdma.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
> + * Copyright (c) 2007, 2020 Oracle and/or its affiliates. All rights reserved.
>   *
>   * This software is available to you under a choice of one of two
>   * licenses.  You may choose to be licensed under the terms of the GNU
> @@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key,
>  	if (insert) {
>  		rb_link_node(&insert->r_rb_node, parent, p);
>  		rb_insert_color(&insert->r_rb_node, root);
> -		refcount_inc(&insert->r_refcount);
> +		rds_mr_get(insert);
>  	}
>  	return NULL;
>  }
> @@ -343,7 +343,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
>
>  	rdsdebug("RDS: get_mr key is %x\n", mr->r_key);
>  	if (mr_ret) {
> -		refcount_inc(&mr->r_refcount);
> +		rds_mr_get(mr);
>  		*mr_ret = mr;
>  	}
>
> @@ -827,7 +827,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
>  	if (!mr)
>  		err = -EINVAL;	/* invalid r_key */
>  	else
> -		refcount_inc(&mr->r_refcount);
> +		rds_mr_get(mr);
>  	spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
>
>  	if (mr) {
> diff --git a/net/rds/rds.h b/net/rds/rds.h
> index e4a6035..6a665fa 100644
> --- a/net/rds/rds.h
> +++ b/net/rds/rds.h
> @@ -953,6 +953,11 @@ static inline void rds_mr_put(struct rds_mr *mr)
>  		__rds_put_mr_final(mr);
>  }
>
> +static inline void rds_mr_get(struct rds_mr *mr)
> +{
> +	refcount_inc(&mr->r_refcount);
> +}
> +
>  static inline bool rds_destroy_pending(struct rds_connection *conn)
>  {
>  	return !check_net(rds_conn_net(conn)) ||
> --
> 1.8.3.1
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net 2/2] net/rds: Fix MR reference counting problem
  2020-04-07 16:08 ` [PATCH net 2/2] net/rds: Fix MR reference counting problem Ka-Cheong Poon
@ 2020-04-07 19:30   ` santosh.shilimkar
  2020-04-08  2:25     ` zerons
  0 siblings, 1 reply; 9+ messages in thread
From: santosh.shilimkar @ 2020-04-07 19:30 UTC (permalink / raw)
  To: Ka-Cheong Poon, netdev, sironhide0null; +Cc: davem, rds-devel

On 4/7/20 9:08 AM, Ka-Cheong Poon wrote:
> In rds_free_mr(), it calls rds_destroy_mr(mr) directly.  But this
> defeats the purpose of reference counting and makes MR free handling
> impossible.  It means that holding a reference does not guarantee that
> it is safe to access some fields.  For example, In
> rds_cmsg_rdma_dest(), it increases the ref count, unlocks and then
> calls mr->r_trans->sync_mr().  But if rds_free_mr() (and
> rds_destroy_mr()) is called in between (there is no lock preventing
> this to happen), r_trans_private is set to NULL, causing a panic.
> Similar issue is in rds_rdma_unuse().
> 
> Reported-by: zerons <sironhide0null@gmail.com>
> Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
> ---
Thanks for getting this out on the list.

Hi zerons,
Can you please review it and see it addresses your concern ?

Regards,
Santosh

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function
  2020-04-07 18:48 ` [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function Leon Romanovsky
@ 2020-04-07 20:19   ` David Miller
  2020-04-08  4:15   ` Ka-Cheong Poon
  1 sibling, 0 replies; 9+ messages in thread
From: David Miller @ 2020-04-07 20:19 UTC (permalink / raw)
  To: leon; +Cc: ka-cheong.poon, netdev, santosh.shilimkar, rds-devel, sironhide0null

From: Leon Romanovsky <leon@kernel.org>
Date: Tue, 7 Apr 2020 21:48:09 +0300

> On Tue, Apr 07, 2020 at 09:08:01AM -0700, Ka-Cheong Poon wrote:
>> Added rds_ib_dev_get() and rds_mr_get() to improve code readability.
> 
> It is very hard to agree with this sentence.
> Hiding basic kernel primitives is very rare will improve code readability.
> It is definitely not the case here.

I completely agree.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net 2/2] net/rds: Fix MR reference counting problem
  2020-04-07 19:30   ` santosh.shilimkar
@ 2020-04-08  2:25     ` zerons
  2020-04-08  2:52       ` santosh.shilimkar
  0 siblings, 1 reply; 9+ messages in thread
From: zerons @ 2020-04-08  2:25 UTC (permalink / raw)
  To: santosh.shilimkar, Ka-Cheong Poon, netdev; +Cc: davem, rds-devel

On 4/8/20 03:30, santosh.shilimkar@oracle.com wrote:
> On 4/7/20 9:08 AM, Ka-Cheong Poon wrote:
>> In rds_free_mr(), it calls rds_destroy_mr(mr) directly.  But this
>> defeats the purpose of reference counting and makes MR free handling
>> impossible.  It means that holding a reference does not guarantee that
>> it is safe to access some fields.  For example, In
>> rds_cmsg_rdma_dest(), it increases the ref count, unlocks and then
>> calls mr->r_trans->sync_mr().  But if rds_free_mr() (and
>> rds_destroy_mr()) is called in between (there is no lock preventing
>> this to happen), r_trans_private is set to NULL, causing a panic.
>> Similar issue is in rds_rdma_unuse().
>>
>> Reported-by: zerons <sironhide0null@gmail.com>
>> Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
>> ---
> Thanks for getting this out on the list.
> 
> Hi zerons,
> Can you please review it and see it addresses your concern ?
> 

Yes, the MR gets freed only when the ref count decreases to zero does
address my concern. I think it make the logic cleaner as well. Fantastic!

Regards,
zerons

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net 2/2] net/rds: Fix MR reference counting problem
  2020-04-08  2:25     ` zerons
@ 2020-04-08  2:52       ` santosh.shilimkar
  0 siblings, 0 replies; 9+ messages in thread
From: santosh.shilimkar @ 2020-04-08  2:52 UTC (permalink / raw)
  To: zerons, Ka-Cheong Poon, netdev; +Cc: davem, rds-devel

On 4/7/20 7:25 PM, zerons wrote:
> On 4/8/20 03:30, santosh.shilimkar@oracle.com wrote:
>> On 4/7/20 9:08 AM, Ka-Cheong Poon wrote:
>>> In rds_free_mr(), it calls rds_destroy_mr(mr) directly.  But this
>>> defeats the purpose of reference counting and makes MR free handling
>>> impossible.  It means that holding a reference does not guarantee that
>>> it is safe to access some fields.  For example, In
>>> rds_cmsg_rdma_dest(), it increases the ref count, unlocks and then
>>> calls mr->r_trans->sync_mr().  But if rds_free_mr() (and
>>> rds_destroy_mr()) is called in between (there is no lock preventing
>>> this to happen), r_trans_private is set to NULL, causing a panic.
>>> Similar issue is in rds_rdma_unuse().
>>>
>>> Reported-by: zerons <sironhide0null@gmail.com>
>>> Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
>>> ---
>> Thanks for getting this out on the list.
>>
>> Hi zerons,
>> Can you please review it and see it addresses your concern ?
>>
> 
> Yes, the MR gets freed only when the ref count decreases to zero does
> address my concern. I think it make the logic cleaner as well. Fantastic!
> 
Thanks for confirmation.

FWIW,
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function
  2020-04-07 18:48 ` [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function Leon Romanovsky
  2020-04-07 20:19   ` David Miller
@ 2020-04-08  4:15   ` Ka-Cheong Poon
  2020-04-08  6:44     ` Leon Romanovsky
  1 sibling, 1 reply; 9+ messages in thread
From: Ka-Cheong Poon @ 2020-04-08  4:15 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: netdev, santosh.shilimkar, davem, rds-devel, sironhide0null

On 4/8/20 2:48 AM, Leon Romanovsky wrote:
> On Tue, Apr 07, 2020 at 09:08:01AM -0700, Ka-Cheong Poon wrote:
>> Added rds_ib_dev_get() and rds_mr_get() to improve code readability.
> 
> It is very hard to agree with this sentence.
> Hiding basic kernel primitives is very rare will improve code readability.
> It is definitely not the case here.


This is to match the rds_ib_dev_put() and rds_mr_put() functions.
Isn't it natural to have a pair of *_put()/*_get() functions?

Thanks.


> Thanks
> 
>>
>> Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
>> ---
>>   net/rds/ib.c      | 8 ++++----
>>   net/rds/ib.h      | 5 +++++
>>   net/rds/ib_rdma.c | 6 +++---
>>   net/rds/rdma.c    | 8 ++++----
>>   net/rds/rds.h     | 5 +++++
>>   5 files changed, 21 insertions(+), 11 deletions(-)
>>
>> diff --git a/net/rds/ib.c b/net/rds/ib.c
>> index a792d8a..c16cb1a 100644
>> --- a/net/rds/ib.c
>> +++ b/net/rds/ib.c
>> @@ -1,5 +1,5 @@
>>   /*
>> - * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
>> + * Copyright (c) 2006, 2020 Oracle and/or its affiliates. All rights reserved.
>>    *
>>    * This software is available to you under a choice of one of two
>>    * licenses.  You may choose to be licensed under the terms of the GNU
>> @@ -224,10 +224,10 @@ static void rds_ib_add_one(struct ib_device *device)
>>   	down_write(&rds_ib_devices_lock);
>>   	list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
>>   	up_write(&rds_ib_devices_lock);
>> -	refcount_inc(&rds_ibdev->refcount);
>> +	rds_ib_dev_get(rds_ibdev);
>>
>>   	ib_set_client_data(device, &rds_ib_client, rds_ibdev);
>> -	refcount_inc(&rds_ibdev->refcount);
>> +	rds_ib_dev_get(rds_ibdev);
>>
>>   	rds_ib_nodev_connect();
>>
>> @@ -258,7 +258,7 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
>>   	rcu_read_lock();
>>   	rds_ibdev = ib_get_client_data(device, &rds_ib_client);
>>   	if (rds_ibdev)
>> -		refcount_inc(&rds_ibdev->refcount);
>> +		rds_ib_dev_get(rds_ibdev);
>>   	rcu_read_unlock();
>>   	return rds_ibdev;
>>   }
>> diff --git a/net/rds/ib.h b/net/rds/ib.h
>> index 0296f1f..fe7ea4e 100644
>> --- a/net/rds/ib.h
>> +++ b/net/rds/ib.h
>> @@ -361,6 +361,11 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
>>   extern struct rds_transport rds_ib_transport;
>>   struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
>>   void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
>> +static inline void rds_ib_dev_get(struct rds_ib_device *rds_ibdev)
>> +{
>> +	refcount_inc(&rds_ibdev->refcount);
>> +}
>> +
>>   extern struct ib_client rds_ib_client;
>>
>>   extern unsigned int rds_ib_retry_count;
>> diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
>> index b34b24e..1b942d80 100644
>> --- a/net/rds/ib_rdma.c
>> +++ b/net/rds/ib_rdma.c
>> @@ -1,5 +1,5 @@
>>   /*
>> - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
>> + * Copyright (c) 2006, 2020 Oracle and/or its affiliates. All rights reserved.
>>    *
>>    * This software is available to you under a choice of one of two
>>    * licenses.  You may choose to be licensed under the terms of the GNU
>> @@ -56,7 +56,7 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
>>   	list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
>>   		list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
>>   			if (i_ipaddr->ipaddr == ipaddr) {
>> -				refcount_inc(&rds_ibdev->refcount);
>> +				rds_ib_dev_get(rds_ibdev);
>>   				rcu_read_unlock();
>>   				return rds_ibdev;
>>   			}
>> @@ -139,7 +139,7 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
>>   	spin_unlock_irq(&ib_nodev_conns_lock);
>>
>>   	ic->rds_ibdev = rds_ibdev;
>> -	refcount_inc(&rds_ibdev->refcount);
>> +	rds_ib_dev_get(rds_ibdev);
>>   }
>>
>>   void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
>> diff --git a/net/rds/rdma.c b/net/rds/rdma.c
>> index 585e6b3..d5abe0e 100644
>> --- a/net/rds/rdma.c
>> +++ b/net/rds/rdma.c
>> @@ -1,5 +1,5 @@
>>   /*
>> - * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
>> + * Copyright (c) 2007, 2020 Oracle and/or its affiliates. All rights reserved.
>>    *
>>    * This software is available to you under a choice of one of two
>>    * licenses.  You may choose to be licensed under the terms of the GNU
>> @@ -84,7 +84,7 @@ static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key,
>>   	if (insert) {
>>   		rb_link_node(&insert->r_rb_node, parent, p);
>>   		rb_insert_color(&insert->r_rb_node, root);
>> -		refcount_inc(&insert->r_refcount);
>> +		rds_mr_get(insert);
>>   	}
>>   	return NULL;
>>   }
>> @@ -343,7 +343,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
>>
>>   	rdsdebug("RDS: get_mr key is %x\n", mr->r_key);
>>   	if (mr_ret) {
>> -		refcount_inc(&mr->r_refcount);
>> +		rds_mr_get(mr);
>>   		*mr_ret = mr;
>>   	}
>>
>> @@ -827,7 +827,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
>>   	if (!mr)
>>   		err = -EINVAL;	/* invalid r_key */
>>   	else
>> -		refcount_inc(&mr->r_refcount);
>> +		rds_mr_get(mr);
>>   	spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
>>
>>   	if (mr) {
>> diff --git a/net/rds/rds.h b/net/rds/rds.h
>> index e4a6035..6a665fa 100644
>> --- a/net/rds/rds.h
>> +++ b/net/rds/rds.h
>> @@ -953,6 +953,11 @@ static inline void rds_mr_put(struct rds_mr *mr)
>>   		__rds_put_mr_final(mr);
>>   }
>>
>> +static inline void rds_mr_get(struct rds_mr *mr)
>> +{
>> +	refcount_inc(&mr->r_refcount);
>> +}
>> +
>>   static inline bool rds_destroy_pending(struct rds_connection *conn)
>>   {
>>   	return !check_net(rds_conn_net(conn)) ||
>> --
>> 1.8.3.1
>>


-- 
K. Poon
ka-cheong.poon@oracle.com



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function
  2020-04-08  4:15   ` Ka-Cheong Poon
@ 2020-04-08  6:44     ` Leon Romanovsky
  0 siblings, 0 replies; 9+ messages in thread
From: Leon Romanovsky @ 2020-04-08  6:44 UTC (permalink / raw)
  To: Ka-Cheong Poon
  Cc: netdev, santosh.shilimkar, davem, rds-devel, sironhide0null

On Wed, Apr 08, 2020 at 12:15:51PM +0800, Ka-Cheong Poon wrote:
> On 4/8/20 2:48 AM, Leon Romanovsky wrote:
> > On Tue, Apr 07, 2020 at 09:08:01AM -0700, Ka-Cheong Poon wrote:
> > > Added rds_ib_dev_get() and rds_mr_get() to improve code readability.
> >
> > It is very hard to agree with this sentence.
> > Hiding basic kernel primitives is very rare will improve code readability.
> > It is definitely not the case here.
>
>
> This is to match the rds_ib_dev_put() and rds_mr_put() functions.
> Isn't it natural to have a pair of *_put()/*_get() functions?

Ohhh, thank you for pointing that. It is great example why hiding basic
primitives is really bad idea.

123 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
124 {
125         BUG_ON(refcount_read(&rds_ibdev->refcount) == 0);
            ^^^^^^ no to this
126         if (refcount_dec_and_test(&rds_ibdev->refcount))
127                 queue_work(rds_wq, &rds_ibdev->free_work);
128 }

....

300         rds_ib_dev_put(rds_ibdev);
301         rds_ib_dev_put(rds_ibdev);

Double put -> you wrongly initialized/used refcount.

So instead of hiding refcount_inc(), I would say delete your *_put() variants,
fix reference counting and convert rds_mr_put() to be normal kref object
instead of current implementation. Which does exactly the same like your
custom *_put()/_get(), but better and with less errors.

Thanks

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-04-08  6:44 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-07 16:08 [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function Ka-Cheong Poon
2020-04-07 16:08 ` [PATCH net 2/2] net/rds: Fix MR reference counting problem Ka-Cheong Poon
2020-04-07 19:30   ` santosh.shilimkar
2020-04-08  2:25     ` zerons
2020-04-08  2:52       ` santosh.shilimkar
2020-04-07 18:48 ` [PATCH net 1/2] net/rds: Replace direct refcount_inc() by inline function Leon Romanovsky
2020-04-07 20:19   ` David Miller
2020-04-08  4:15   ` Ka-Cheong Poon
2020-04-08  6:44     ` Leon Romanovsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).