All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
@ 2019-05-21 13:19 Max Gurtovoy
  2019-05-23  8:13 ` [Suspected-Phishing][PATCH " Max Gurtovoy
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Max Gurtovoy @ 2019-05-21 13:19 UTC (permalink / raw)


RDMA transport ctrl holds a reference to it's underlaying transport
device, so we need to make sure that this reference is valid. Use kref
object to enforce that.

This commit fixes possible segmentation fault that may happen during
reconnection + device removal flow that was caused by removing the ref
count between block layer tagsets and the transport device.

Fixes: 87fd125344d6 ("nvme-rdma: remove redundant reference between ib_device and tagset")

Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
---
 drivers/nvme/host/rdma.c | 51 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f383146..07eddfb 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -354,6 +354,21 @@ static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
 	return kref_get_unless_zero(&dev->ref);
 }
 
+static void nvme_rdma_ctrl_dev_put(struct nvme_rdma_ctrl *ctrl,
+				   struct nvme_rdma_device *dev)
+{
+	ctrl->device = 	NULL;
+	kref_put(&dev->ref, nvme_rdma_free_dev);
+}
+
+static void nvme_rdma_ctrl_dev_get(struct nvme_rdma_ctrl *ctrl,
+				   struct nvme_rdma_device *dev)
+{
+	kref_get(&dev->ref);
+	ctrl->device = dev;
+}
+
+
 static struct nvme_rdma_device *
 nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
 {
@@ -743,12 +758,16 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		bool remove)
 {
+	struct nvme_rdma_device *ndev = ctrl->device;
+
 	if (remove) {
 		blk_cleanup_queue(ctrl->ctrl.admin_q);
 		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
+		/* ctrl releases refcount on device */
+		nvme_rdma_ctrl_dev_put(ctrl, ctrl->device);
 	}
 	if (ctrl->async_event_sqe.data) {
-		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+		nvme_rdma_free_qe(ndev->dev, &ctrl->async_event_sqe,
 				sizeof(struct nvme_command), DMA_TO_DEVICE);
 		ctrl->async_event_sqe.data = NULL;
 	}
@@ -758,23 +777,26 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		bool new)
 {
+	struct ib_device *ibdev;
 	int error;
 
 	error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
 	if (error)
 		return error;
 
-	ctrl->device = ctrl->queues[0].device;
-	ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
+	ibdev = ctrl->queues[0].device->dev;
+	ctrl->ctrl.numa_node = dev_to_node(ibdev->dma_device);
+	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ibdev);
 
-	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
-
-	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+	error = nvme_rdma_alloc_qe(ibdev, &ctrl->async_event_sqe,
 			sizeof(struct nvme_command), DMA_TO_DEVICE);
 	if (error)
 		goto out_free_queue;
 
 	if (new) {
+		/* ctrl takes refcount on device */
+		nvme_rdma_ctrl_dev_get(ctrl, ctrl->queues[0].device);
+
 		ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
 		if (IS_ERR(ctrl->ctrl.admin_tagset)) {
 			error = PTR_ERR(ctrl->ctrl.admin_tagset);
@@ -786,6 +808,14 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 			error = PTR_ERR(ctrl->ctrl.admin_q);
 			goto out_free_tagset;
 		}
+	} else if (ctrl->device != ctrl->queues[0].device) {
+		/* ctrl releases refcount on old device */
+		nvme_rdma_ctrl_dev_put(ctrl, ctrl->device);
+		/*
+		 * underlaying device might change, ctrl takes refcount on
+		 * new device.
+		 */
+		nvme_rdma_ctrl_dev_get(ctrl, ctrl->queues[0].device);
 	}
 
 	error = nvme_rdma_start_queue(ctrl, 0);
@@ -825,7 +855,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	if (new)
 		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
 out_free_async_qe:
-	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+	if (new)
+		nvme_rdma_ctrl_dev_put(ctrl, ctrl->device);
+	nvme_rdma_free_qe(ibdev, &ctrl->async_event_sqe,
 		sizeof(struct nvme_command), DMA_TO_DEVICE);
 	ctrl->async_event_sqe.data = NULL;
 out_free_queue:
@@ -2027,9 +2059,8 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
 	/* Delete all controllers using this device */
 	mutex_lock(&nvme_rdma_ctrl_mutex);
 	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
-		if (ctrl->device->dev != ib_device)
-			continue;
-		nvme_delete_ctrl(&ctrl->ctrl);
+		if (ctrl->device && ctrl->device->dev == ib_device)
+			nvme_delete_ctrl(&ctrl->ctrl);
 	}
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Suspected-Phishing][PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-21 13:19 [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev Max Gurtovoy
@ 2019-05-23  8:13 ` Max Gurtovoy
  2019-05-23 10:22 ` [PATCH " Christoph Hellwig
  2019-05-24  7:05 ` Sagi Grimberg
  2 siblings, 0 replies; 11+ messages in thread
From: Max Gurtovoy @ 2019-05-23  8:13 UTC (permalink / raw)


Sagi/Christoph,

can you review this please ? we need to fix that scenario for 5.2


On 5/21/2019 4:19 PM, Max Gurtovoy wrote:
> RDMA transport ctrl holds a reference to it's underlaying transport
> device, so we need to make sure that this reference is valid. Use kref
> object to enforce that.
>
> This commit fixes possible segmentation fault that may happen during
> reconnection + device removal flow that was caused by removing the ref
> count between block layer tagsets and the transport device.
>
> Fixes: 87fd125344d6 ("nvme-rdma: remove redundant reference between ib_device and tagset")
>
> Signed-off-by: Max Gurtovoy <maxg at mellanox.com>
> ---
>   drivers/nvme/host/rdma.c | 51 ++++++++++++++++++++++++++++++++++++++----------
>   1 file changed, 41 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index f383146..07eddfb 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -354,6 +354,21 @@ static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
>   	return kref_get_unless_zero(&dev->ref);
>   }
>   
> +static void nvme_rdma_ctrl_dev_put(struct nvme_rdma_ctrl *ctrl,
> +				   struct nvme_rdma_device *dev)
> +{
> +	ctrl->device = 	NULL;
> +	kref_put(&dev->ref, nvme_rdma_free_dev);
> +}
> +
> +static void nvme_rdma_ctrl_dev_get(struct nvme_rdma_ctrl *ctrl,
> +				   struct nvme_rdma_device *dev)
> +{
> +	kref_get(&dev->ref);
> +	ctrl->device = dev;
> +}
> +
> +
>   static struct nvme_rdma_device *
>   nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
>   {
> @@ -743,12 +758,16 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
>   static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   		bool remove)
>   {
> +	struct nvme_rdma_device *ndev = ctrl->device;
> +
>   	if (remove) {
>   		blk_cleanup_queue(ctrl->ctrl.admin_q);
>   		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
> +		/* ctrl releases refcount on device */
> +		nvme_rdma_ctrl_dev_put(ctrl, ctrl->device);
>   	}
>   	if (ctrl->async_event_sqe.data) {
> -		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
> +		nvme_rdma_free_qe(ndev->dev, &ctrl->async_event_sqe,
>   				sizeof(struct nvme_command), DMA_TO_DEVICE);
>   		ctrl->async_event_sqe.data = NULL;
>   	}
> @@ -758,23 +777,26 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   		bool new)
>   {
> +	struct ib_device *ibdev;
>   	int error;
>   
>   	error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
>   	if (error)
>   		return error;
>   
> -	ctrl->device = ctrl->queues[0].device;
> -	ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
> +	ibdev = ctrl->queues[0].device->dev;
> +	ctrl->ctrl.numa_node = dev_to_node(ibdev->dma_device);
> +	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ibdev);
>   
> -	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
> -
> -	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
> +	error = nvme_rdma_alloc_qe(ibdev, &ctrl->async_event_sqe,
>   			sizeof(struct nvme_command), DMA_TO_DEVICE);
>   	if (error)
>   		goto out_free_queue;
>   
>   	if (new) {
> +		/* ctrl takes refcount on device */
> +		nvme_rdma_ctrl_dev_get(ctrl, ctrl->queues[0].device);
> +
>   		ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
>   		if (IS_ERR(ctrl->ctrl.admin_tagset)) {
>   			error = PTR_ERR(ctrl->ctrl.admin_tagset);
> @@ -786,6 +808,14 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   			error = PTR_ERR(ctrl->ctrl.admin_q);
>   			goto out_free_tagset;
>   		}
> +	} else if (ctrl->device != ctrl->queues[0].device) {
> +		/* ctrl releases refcount on old device */
> +		nvme_rdma_ctrl_dev_put(ctrl, ctrl->device);
> +		/*
> +		 * underlaying device might change, ctrl takes refcount on
> +		 * new device.
> +		 */
> +		nvme_rdma_ctrl_dev_get(ctrl, ctrl->queues[0].device);
>   	}
>   
>   	error = nvme_rdma_start_queue(ctrl, 0);
> @@ -825,7 +855,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   	if (new)
>   		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
>   out_free_async_qe:
> -	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
> +	if (new)
> +		nvme_rdma_ctrl_dev_put(ctrl, ctrl->device);
> +	nvme_rdma_free_qe(ibdev, &ctrl->async_event_sqe,
>   		sizeof(struct nvme_command), DMA_TO_DEVICE);
>   	ctrl->async_event_sqe.data = NULL;
>   out_free_queue:
> @@ -2027,9 +2059,8 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
>   	/* Delete all controllers using this device */
>   	mutex_lock(&nvme_rdma_ctrl_mutex);
>   	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
> -		if (ctrl->device->dev != ib_device)
> -			continue;
> -		nvme_delete_ctrl(&ctrl->ctrl);
> +		if (ctrl->device && ctrl->device->dev == ib_device)
> +			nvme_delete_ctrl(&ctrl->ctrl);
>   	}
>   	mutex_unlock(&nvme_rdma_ctrl_mutex);
>   

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-21 13:19 [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev Max Gurtovoy
  2019-05-23  8:13 ` [Suspected-Phishing][PATCH " Max Gurtovoy
@ 2019-05-23 10:22 ` Christoph Hellwig
  2019-05-23 11:05   ` Max Gurtovoy
  2019-05-24  7:05 ` Sagi Grimberg
  2 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2019-05-23 10:22 UTC (permalink / raw)


> +static void nvme_rdma_ctrl_dev_put(struct nvme_rdma_ctrl *ctrl,
> +				   struct nvme_rdma_device *dev)
> +{
> +	ctrl->device = 	NULL;

double whitespace here.

> +	kref_put(&dev->ref, nvme_rdma_free_dev);
> +}
> +
> +static void nvme_rdma_ctrl_dev_get(struct nvme_rdma_ctrl *ctrl,
> +				   struct nvme_rdma_device *dev)
> +{
> +	kref_get(&dev->ref);
> +	ctrl->device = dev;

Why aren't these using nvme_rdma_dev_put / nvme_rdma_dev_get?

>  static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
>  		bool remove)
>  {
> +	struct nvme_rdma_device *ndev = ctrl->device;
> +
>  	if (remove) {
>  		blk_cleanup_queue(ctrl->ctrl.admin_q);
>  		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
> +		/* ctrl releases refcount on device */
> +		nvme_rdma_ctrl_dev_put(ctrl, ctrl->device);
>  	}
>  	if (ctrl->async_event_sqe.data) {
> -		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
> +		nvme_rdma_free_qe(ndev->dev, &ctrl->async_event_sqe,

What guarantees ndev is not freed here?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-23 10:22 ` [PATCH " Christoph Hellwig
@ 2019-05-23 11:05   ` Max Gurtovoy
  2019-05-23 15:33     ` Christoph Hellwig
  0 siblings, 1 reply; 11+ messages in thread
From: Max Gurtovoy @ 2019-05-23 11:05 UTC (permalink / raw)



On 5/23/2019 1:22 PM, Christoph Hellwig wrote:
>> +static void nvme_rdma_ctrl_dev_put(struct nvme_rdma_ctrl *ctrl,
>> +				   struct nvme_rdma_device *dev)
>> +{
>> +	ctrl->device = 	NULL;
> double whitespace here.
thanks.
>
>> +	kref_put(&dev->ref, nvme_rdma_free_dev);
>> +}
>> +
>> +static void nvme_rdma_ctrl_dev_get(struct nvme_rdma_ctrl *ctrl,
>> +				   struct nvme_rdma_device *dev)
>> +{
>> +	kref_get(&dev->ref);
>> +	ctrl->device = dev;
> Why aren't these using nvme_rdma_dev_put / nvme_rdma_dev_get?

Since we change the ctrl->device pointer here.

Do you prefer doing it without helper ?

>
>>   static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
>>   		bool remove)
>>   {
>> +	struct nvme_rdma_device *ndev = ctrl->device;
>> +
>>   	if (remove) {
>>   		blk_cleanup_queue(ctrl->ctrl.admin_q);
>>   		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
>> +		/* ctrl releases refcount on device */
>> +		nvme_rdma_ctrl_dev_put(ctrl, ctrl->device);
>>   	}
>>   	if (ctrl->async_event_sqe.data) {
>> -		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
>> +		nvme_rdma_free_qe(ndev->dev, &ctrl->async_event_sqe,
> What guarantees ndev is not freed here?

The admin queue is the last reference for the device (will be freed 
during nvme_rdma_free_queue(&ctrl->queues[0]);)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-23 11:05   ` Max Gurtovoy
@ 2019-05-23 15:33     ` Christoph Hellwig
  2019-05-24 19:36       ` Max Gurtovoy
  0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2019-05-23 15:33 UTC (permalink / raw)


On Thu, May 23, 2019@02:05:23PM +0300, Max Gurtovoy wrote:
>
> On 5/23/2019 1:22 PM, Christoph Hellwig wrote:
>>> +static void nvme_rdma_ctrl_dev_put(struct nvme_rdma_ctrl *ctrl,
>>> +				   struct nvme_rdma_device *dev)
>>> +{
>>> +	ctrl->device = 	NULL;
>> double whitespace here.
> thanks.
>>
>>> +	kref_put(&dev->ref, nvme_rdma_free_dev);
>>> +}
>>> +
>>> +static void nvme_rdma_ctrl_dev_get(struct nvme_rdma_ctrl *ctrl,
>>> +				   struct nvme_rdma_device *dev)
>>> +{
>>> +	kref_get(&dev->ref);
>>> +	ctrl->device = dev;
>> Why aren't these using nvme_rdma_dev_put / nvme_rdma_dev_get?
>
> Since we change the ctrl->device pointer here.
>
> Do you prefer doing it without helper ?

We can still use the helper underneath instead of open coding the kref
calls, right?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-21 13:19 [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev Max Gurtovoy
  2019-05-23  8:13 ` [Suspected-Phishing][PATCH " Max Gurtovoy
  2019-05-23 10:22 ` [PATCH " Christoph Hellwig
@ 2019-05-24  7:05 ` Sagi Grimberg
  2019-05-24 19:30   ` Max Gurtovoy
  2 siblings, 1 reply; 11+ messages in thread
From: Sagi Grimberg @ 2019-05-24  7:05 UTC (permalink / raw)



> @@ -758,23 +777,26 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
>   		bool new)
>   {
> +	struct ib_device *ibdev;
>   	int error;
>   
>   	error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
>   	if (error)
>   		return error;
>   
> -	ctrl->device = ctrl->queues[0].device;
> -	ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
> +	ibdev = ctrl->queues[0].device->dev;
> +	ctrl->ctrl.numa_node = dev_to_node(ibdev->dma_device);
> +	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ibdev);
>   
> -	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
> -
> -	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
> +	error = nvme_rdma_alloc_qe(ibdev, &ctrl->async_event_sqe,
>   			sizeof(struct nvme_command), DMA_TO_DEVICE);
>   	if (error)
>   		goto out_free_queue;
>   
>   	if (new) {
> +		/* ctrl takes refcount on device */
> +		nvme_rdma_ctrl_dev_get(ctrl, ctrl->queues[0].device);
> +

Do you actually need the extra reference on the device? why doesn't just
the set/clear of ctrl->device sufficient? these routines should be
serialized..

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-24  7:05 ` Sagi Grimberg
@ 2019-05-24 19:30   ` Max Gurtovoy
  2019-05-24 23:05     ` Sagi Grimberg
  0 siblings, 1 reply; 11+ messages in thread
From: Max Gurtovoy @ 2019-05-24 19:30 UTC (permalink / raw)



On 5/24/2019 10:05 AM, Sagi Grimberg wrote:
>
>> @@ -758,23 +777,26 @@ static void 
>> nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
>> ? static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl 
>> *ctrl,
>> ????????? bool new)
>> ? {
>> +??? struct ib_device *ibdev;
>> ????? int error;
>> ? ????? error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
>> ????? if (error)
>> ????????? return error;
>> ? -??? ctrl->device = ctrl->queues[0].device;
>> -??? ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
>> +??? ibdev = ctrl->queues[0].device->dev;
>> +??? ctrl->ctrl.numa_node = dev_to_node(ibdev->dma_device);
>> +??? ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ibdev);
>> ? -??? ctrl->max_fr_pages = 
>> nvme_rdma_get_max_fr_pages(ctrl->device->dev);
>> -
>> -??? error = nvme_rdma_alloc_qe(ctrl->device->dev, 
>> &ctrl->async_event_sqe,
>> +??? error = nvme_rdma_alloc_qe(ibdev, &ctrl->async_event_sqe,
>> ????????????? sizeof(struct nvme_command), DMA_TO_DEVICE);
>> ????? if (error)
>> ????????? goto out_free_queue;
>> ? ????? if (new) {
>> +??????? /* ctrl takes refcount on device */
>> +??????? nvme_rdma_ctrl_dev_get(ctrl, ctrl->queues[0].device);
>> +
>
> Do you actually need the extra reference on the device? why doesn't just
> the set/clear of ctrl->device sufficient? these routines should be
> serialized..

Yes we need this reference since the queues are freed during 
reconnection. And without ctrl reference, also the device will be freed.

And when you disconnect during reconnection, the block layer will call 
exit_request for each request (but the device is freed) and we will do 
the need unmapping from the device..

Not so plesent..

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-23 15:33     ` Christoph Hellwig
@ 2019-05-24 19:36       ` Max Gurtovoy
  0 siblings, 0 replies; 11+ messages in thread
From: Max Gurtovoy @ 2019-05-24 19:36 UTC (permalink / raw)



On 5/23/2019 6:33 PM, Christoph Hellwig wrote:
> On Thu, May 23, 2019@02:05:23PM +0300, Max Gurtovoy wrote:
>> On 5/23/2019 1:22 PM, Christoph Hellwig wrote:
>>>> +static void nvme_rdma_ctrl_dev_put(struct nvme_rdma_ctrl *ctrl,
>>>> +				   struct nvme_rdma_device *dev)
>>>> +{
>>>> +	ctrl->device = 	NULL;
>>> double whitespace here.
>> thanks.
>>>> +	kref_put(&dev->ref, nvme_rdma_free_dev);
>>>> +}
>>>> +
>>>> +static void nvme_rdma_ctrl_dev_get(struct nvme_rdma_ctrl *ctrl,
>>>> +				   struct nvme_rdma_device *dev)
>>>> +{
>>>> +	kref_get(&dev->ref);
>>>> +	ctrl->device = dev;
>>> Why aren't these using nvme_rdma_dev_put / nvme_rdma_dev_get?
>> Since we change the ctrl->device pointer here.
>>
>> Do you prefer doing it without helper ?
> We can still use the helper underneath instead of open coding the kref
> calls, right?

since nvme_rdma_ctrl_dev_get is void function I preferred calling void 
function kref_get instead of the non-void nvme_rdma_dev_get.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-24 19:30   ` Max Gurtovoy
@ 2019-05-24 23:05     ` Sagi Grimberg
  2019-05-28 11:50       ` Max Gurtovoy
  0 siblings, 1 reply; 11+ messages in thread
From: Sagi Grimberg @ 2019-05-24 23:05 UTC (permalink / raw)



>> Do you actually need the extra reference on the device? why doesn't just
>> the set/clear of ctrl->device sufficient? these routines should be
>> serialized..
> 
> Yes we need this reference since the queues are freed during 
> reconnection. And without ctrl reference, also the device will be freed.

The device should be freed shouldn't it? I thought that this was the
purpose of the ref removal from the tagset (which is identical to what
you are trying to replace). I'm not clear on what this is solving that
the prior reference didn't have?

> And when you disconnect during reconnection, the block layer will call 
> exit_request for each request (but the device is freed) and we will do 
> the need unmapping from the device..
> 
> Not so plesent..

Which again is why we had the tagset reference in the first place. I
thought you wanted to remove the device reference altogether because
of your bonding use-case..

I'm getting the feeling that we need to map the qe's in queue_rq
and unmap in complete_rq. I don't see any other way around it because
on the bonding use-case we need to wipe out all of the device related
resources because we need to have it teardown and be prepared to get
a different device on the next (re)connect.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-24 23:05     ` Sagi Grimberg
@ 2019-05-28 11:50       ` Max Gurtovoy
  2019-05-28 19:36         ` Sagi Grimberg
  0 siblings, 1 reply; 11+ messages in thread
From: Max Gurtovoy @ 2019-05-28 11:50 UTC (permalink / raw)



On 5/25/2019 2:05 AM, Sagi Grimberg wrote:
>
>>> Do you actually need the extra reference on the device? why doesn't 
>>> just
>>> the set/clear of ctrl->device sufficient? these routines should be
>>> serialized..
>>
>> Yes we need this reference since the queues are freed during 
>> reconnection. And without ctrl reference, also the device will be freed.
>
> The device should be freed shouldn't it? I thought that this was the
> purpose of the ref removal from the tagset (which is identical to what
> you are trying to replace). I'm not clear on what this is solving that
> the prior reference didn't have?

Prior reference was per tagset and not per controller.


>
>> And when you disconnect during reconnection, the block layer will 
>> call exit_request for each request (but the device is freed) and we 
>> will do the need unmapping from the device..
>>
>> Not so plesent..
>
> Which again is why we had the tagset reference in the first place. I
> thought you wanted to remove the device reference altogether because
> of your bonding use-case..

we remove it in de-configutration state.


>
> I'm getting the feeling that we need to map the qe's in queue_rq
> and unmap in complete_rq. I don't see any other way around it because
> on the bonding use-case we need to wipe out all of the device related
> resources because we need to have it teardown and be prepared to get
> a different device on the next (re)connect.

Yes, thought using block layer iter but in case it won't hurt 
performance I'll do it in queue_rq/complete_rq.

Let me check that...

-Max.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev
  2019-05-28 11:50       ` Max Gurtovoy
@ 2019-05-28 19:36         ` Sagi Grimberg
  0 siblings, 0 replies; 11+ messages in thread
From: Sagi Grimberg @ 2019-05-28 19:36 UTC (permalink / raw)



>> I'm getting the feeling that we need to map the qe's in queue_rq
>> and unmap in complete_rq. I don't see any other way around it because
>> on the bonding use-case we need to wipe out all of the device related
>> resources because we need to have it teardown and be prepared to get
>> a different device on the next (re)connect.
> 
> Yes, thought using block layer iter but in case it won't hurt 
> performance I'll do it in queue_rq/complete_rq.
> 
> Let me check that...

OK,

So lets just do that instead, and not try to get too smart with device
referencing...

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-05-28 19:36 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-21 13:19 [PATCH 1/1] nvme-rdma: Add association between ctrl and transport dev Max Gurtovoy
2019-05-23  8:13 ` [Suspected-Phishing][PATCH " Max Gurtovoy
2019-05-23 10:22 ` [PATCH " Christoph Hellwig
2019-05-23 11:05   ` Max Gurtovoy
2019-05-23 15:33     ` Christoph Hellwig
2019-05-24 19:36       ` Max Gurtovoy
2019-05-24  7:05 ` Sagi Grimberg
2019-05-24 19:30   ` Max Gurtovoy
2019-05-24 23:05     ` Sagi Grimberg
2019-05-28 11:50       ` Max Gurtovoy
2019-05-28 19:36         ` Sagi Grimberg

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.