All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
@ 2019-06-03 23:11 Sagi Grimberg
  2019-06-03 23:14 ` Sagi Grimberg
  0 siblings, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2019-06-03 23:11 UTC (permalink / raw)


This commit caused a kernel panic when disconnecting from an
inaccessible controller.

--
nvme nvme0: Removing ctrl: NQN "testnqn1"
nvme_rdma: nvme_rdma_exit_request: hctx 0 queue_idx 1
BUG: unable to handle kernel paging request at 0000000080000228
PGD 0 P4D 0
Oops: 0000 [#1] SMP PTI
...
Call Trace:
 blk_mq_exit_hctx+0x5c/0xf0
 blk_mq_exit_queue+0xd4/0x100
 blk_cleanup_queue+0x9a/0xc0
 nvme_rdma_destroy_io_queues+0x52/0x60 [nvme_rdma]
 nvme_rdma_shutdown_ctrl+0x3e/0x80 [nvme_rdma]
 nvme_do_delete_ctrl+0x53/0x80 [nvme_core]
 nvme_sysfs_delete+0x45/0x60 [nvme_core]
 kernfs_fop_write+0x105/0x180
 vfs_write+0xad/0x1a0
 ksys_write+0x5a/0xd0
 do_syscall_64+0x55/0x110
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7fa215417154
--

Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 drivers/nvme/host/rdma.c | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 26709a2ab593..8aff3a0ab609 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -707,6 +707,15 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
 	return ret;
 }
 
+static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl,
+		struct blk_mq_tag_set *set)
+{
+	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+	blk_mq_free_tag_set(set);
+	nvme_rdma_dev_put(ctrl->device);
+}
+
 static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 		bool admin)
 {
@@ -745,9 +754,24 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 
 	ret = blk_mq_alloc_tag_set(set);
 	if (ret)
-		return ERR_PTR(ret);
+		goto out;
+
+	/*
+	 * We need a reference on the device as long as the tag_set is alive,
+	 * as the MRs in the request structures need a valid ib_device.
+	 */
+	ret = nvme_rdma_dev_get(ctrl->device);
+	if (!ret) {
+		ret = -EINVAL;
+		goto out_free_tagset;
+	}
 
 	return set;
+
+out_free_tagset:
+	blk_mq_free_tag_set(set);
+out:
+	return ERR_PTR(ret);
 }
 
 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
@@ -755,7 +779,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 {
 	if (remove) {
 		blk_cleanup_queue(ctrl->ctrl.admin_q);
-		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
+		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
 	}
 	if (ctrl->async_event_sqe.data) {
 		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
@@ -833,7 +857,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		blk_cleanup_queue(ctrl->ctrl.admin_q);
 out_free_tagset:
 	if (new)
-		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
+		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
 out_free_async_qe:
 	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
 		sizeof(struct nvme_command), DMA_TO_DEVICE);
@@ -848,7 +872,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
 {
 	if (remove) {
 		blk_cleanup_queue(ctrl->ctrl.connect_q);
-		blk_mq_free_tag_set(ctrl->ctrl.tagset);
+		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
 	}
 	nvme_rdma_free_io_queues(ctrl);
 }
@@ -889,7 +913,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
 		blk_cleanup_queue(ctrl->ctrl.connect_q);
 out_free_tag_set:
 	if (new)
-		blk_mq_free_tag_set(ctrl->ctrl.tagset);
+		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
 out_free_io_queues:
 	nvme_rdma_free_io_queues(ctrl);
 	return ret;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-03 23:11 [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset" Sagi Grimberg
@ 2019-06-03 23:14 ` Sagi Grimberg
  2019-06-03 23:26   ` Harris, James R
  0 siblings, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2019-06-03 23:14 UTC (permalink / raw)



> This commit caused a kernel panic when disconnecting from an
> inaccessible controller.
> 
> --
> nvme nvme0: Removing ctrl: NQN "testnqn1"
> nvme_rdma: nvme_rdma_exit_request: hctx 0 queue_idx 1
> BUG: unable to handle kernel paging request at 0000000080000228
> PGD 0 P4D 0
> Oops: 0000 [#1] SMP PTI
> ...
> Call Trace:
>   blk_mq_exit_hctx+0x5c/0xf0
>   blk_mq_exit_queue+0xd4/0x100
>   blk_cleanup_queue+0x9a/0xc0
>   nvme_rdma_destroy_io_queues+0x52/0x60 [nvme_rdma]
>   nvme_rdma_shutdown_ctrl+0x3e/0x80 [nvme_rdma]
>   nvme_do_delete_ctrl+0x53/0x80 [nvme_core]
>   nvme_sysfs_delete+0x45/0x60 [nvme_core]
>   kernfs_fop_write+0x105/0x180
>   vfs_write+0xad/0x1a0
>   ksys_write+0x5a/0xd0
>   do_syscall_64+0x55/0x110
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
> RIP: 0033:0x7fa215417154
> --
> 

Should add:
Reported-by: Harris, James R <james.r.harris at intel.com>

> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-03 23:14 ` Sagi Grimberg
@ 2019-06-03 23:26   ` Harris, James R
  2019-06-04 11:11     ` Max Gurtovoy
  0 siblings, 1 reply; 14+ messages in thread
From: Harris, James R @ 2019-06-03 23:26 UTC (permalink / raw)




?On 6/3/19, 4:14 PM, "Sagi Grimberg" <sagi@grimberg.me> wrote:

    
    > This commit caused a kernel panic when disconnecting from an
    > inaccessible controller.
    > 
    > --
    > nvme nvme0: Removing ctrl: NQN "testnqn1"
    > nvme_rdma: nvme_rdma_exit_request: hctx 0 queue_idx 1
    > BUG: unable to handle kernel paging request at 0000000080000228
    > PGD 0 P4D 0
    > Oops: 0000 [#1] SMP PTI
    > ...
    > Call Trace:
    >   blk_mq_exit_hctx+0x5c/0xf0
    >   blk_mq_exit_queue+0xd4/0x100
    >   blk_cleanup_queue+0x9a/0xc0
    >   nvme_rdma_destroy_io_queues+0x52/0x60 [nvme_rdma]
    >   nvme_rdma_shutdown_ctrl+0x3e/0x80 [nvme_rdma]
    >   nvme_do_delete_ctrl+0x53/0x80 [nvme_core]
    >   nvme_sysfs_delete+0x45/0x60 [nvme_core]
    >   kernfs_fop_write+0x105/0x180
    >   vfs_write+0xad/0x1a0
    >   ksys_write+0x5a/0xd0
    >   do_syscall_64+0x55/0x110
    >   entry_SYSCALL_64_after_hwframe+0x44/0xa9
    > RIP: 0033:0x7fa215417154
    > --
    > 
    
    Should add:
    Reported-by: Harris, James R <james.r.harris at intel.com>

Thanks Sagi.  I can confirm that reverting this patch eliminated the kernel panic on my system.
    
    > Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
    

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-03 23:26   ` Harris, James R
@ 2019-06-04 11:11     ` Max Gurtovoy
  2019-06-04 13:33       ` [Suspected-Phishing]Re: " Max Gurtovoy
  0 siblings, 1 reply; 14+ messages in thread
From: Max Gurtovoy @ 2019-06-04 11:11 UTC (permalink / raw)


Hi Sagi/Christoph,

please don't revert this commit yet on the branch. I'm sending a fix to 
it in 1 hour for James approval.

-Max.

On 6/4/2019 2:26 AM, Harris, James R wrote:
>
> ?On 6/3/19, 4:14 PM, "Sagi Grimberg" <sagi@grimberg.me> wrote:
>
>      
>      > This commit caused a kernel panic when disconnecting from an
>      > inaccessible controller.
>      >
>      > --
>      > nvme nvme0: Removing ctrl: NQN "testnqn1"
>      > nvme_rdma: nvme_rdma_exit_request: hctx 0 queue_idx 1
>      > BUG: unable to handle kernel paging request at 0000000080000228
>      > PGD 0 P4D 0
>      > Oops: 0000 [#1] SMP PTI
>      > ...
>      > Call Trace:
>      >   blk_mq_exit_hctx+0x5c/0xf0
>      >   blk_mq_exit_queue+0xd4/0x100
>      >   blk_cleanup_queue+0x9a/0xc0
>      >   nvme_rdma_destroy_io_queues+0x52/0x60 [nvme_rdma]
>      >   nvme_rdma_shutdown_ctrl+0x3e/0x80 [nvme_rdma]
>      >   nvme_do_delete_ctrl+0x53/0x80 [nvme_core]
>      >   nvme_sysfs_delete+0x45/0x60 [nvme_core]
>      >   kernfs_fop_write+0x105/0x180
>      >   vfs_write+0xad/0x1a0
>      >   ksys_write+0x5a/0xd0
>      >   do_syscall_64+0x55/0x110
>      >   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>      > RIP: 0033:0x7fa215417154
>      > --
>      >
>      
>      Should add:
>      Reported-by: Harris, James R <james.r.harris at intel.com>
>
> Thanks Sagi.  I can confirm that reverting this patch eliminated the kernel panic on my system.
>      
>      > Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
>      
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 11:11     ` Max Gurtovoy
@ 2019-06-04 13:33       ` Max Gurtovoy
  2019-06-04 15:47         ` Sagi Grimberg
                           ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Max Gurtovoy @ 2019-06-04 13:33 UTC (permalink / raw)


James,

can you test the attached patch (without the revert) ?

-Max.

On 6/4/2019 2:11 PM, Max Gurtovoy wrote:
> Hi Sagi/Christoph,
>
> please don't revert this commit yet on the branch. I'm sending a fix 
> to it in 1 hour for James approval.
>
> -Max.
>
> On 6/4/2019 2:26 AM, Harris, James R wrote:
>>
>> ?On 6/3/19, 4:14 PM, "Sagi Grimberg" <sagi@grimberg.me> wrote:
>>
>> ???? ???? > This commit caused a kernel panic when disconnecting from an
>> ???? > inaccessible controller.
>> ???? >
>> ???? > --
>> ???? > nvme nvme0: Removing ctrl: NQN "testnqn1"
>> ???? > nvme_rdma: nvme_rdma_exit_request: hctx 0 queue_idx 1
>> ???? > BUG: unable to handle kernel paging request at 0000000080000228
>> ???? > PGD 0 P4D 0
>> ???? > Oops: 0000 [#1] SMP PTI
>> ???? > ...
>> ???? > Call Trace:
>> ???? >?? blk_mq_exit_hctx+0x5c/0xf0
>> ???? >?? blk_mq_exit_queue+0xd4/0x100
>> ???? >?? blk_cleanup_queue+0x9a/0xc0
>> ???? >?? nvme_rdma_destroy_io_queues+0x52/0x60 [nvme_rdma]
>> ???? >?? nvme_rdma_shutdown_ctrl+0x3e/0x80 [nvme_rdma]
>> ???? >?? nvme_do_delete_ctrl+0x53/0x80 [nvme_core]
>> ???? >?? nvme_sysfs_delete+0x45/0x60 [nvme_core]
>> ???? >?? kernfs_fop_write+0x105/0x180
>> ???? >?? vfs_write+0xad/0x1a0
>> ???? >?? ksys_write+0x5a/0xd0
>> ???? >?? do_syscall_64+0x55/0x110
>> ???? >?? entry_SYSCALL_64_after_hwframe+0x44/0xa9
>> ???? > RIP: 0033:0x7fa215417154
>> ???? > --
>> ???? >
>> ???? ???? Should add:
>> ???? Reported-by: Harris, James R <james.r.harris at intel.com>
>>
>> Thanks Sagi.? I can confirm that reverting this patch eliminated the 
>> kernel panic on my system.
>> ???? ???? > Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
>>
>
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Flists.infradead.org%2Fmailman%2Flistinfo%2Flinux-nvme&amp;data=02%7C01%7Cmaxg%40mellanox.com%7Cefd64e8278674d90875208d6e8dd7d41%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636952435329842439&amp;sdata=aqbBsIOPBLK7nGhvgLAPB%2FiQoR0QzEie%2FWPpL5fE%2BYY%3D&amp;reserved=0 
>
-------------- next part --------------

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 13:33       ` [Suspected-Phishing]Re: " Max Gurtovoy
@ 2019-06-04 15:47         ` Sagi Grimberg
  2019-06-04 15:50           ` Max Gurtovoy
  2019-06-04 19:11         ` Christoph Hellwig
  2019-06-04 20:49         ` Harris, James R
  2 siblings, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2019-06-04 15:47 UTC (permalink / raw)



> James,
> 
> can you test the attached patch (without the revert) ?

Max, I think that its pretty late for this change for 5.2,
I'm leaning towards reverting the offending patch for 5.2-rc and
getting this into 5.3

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 15:47         ` Sagi Grimberg
@ 2019-06-04 15:50           ` Max Gurtovoy
  2019-06-04 15:55             ` Sagi Grimberg
  0 siblings, 1 reply; 14+ messages in thread
From: Max Gurtovoy @ 2019-06-04 15:50 UTC (permalink / raw)



On 6/4/2019 6:47 PM, Sagi Grimberg wrote:
>
>> James,
>>
>> can you test the attached patch (without the revert) ?
>
> Max, I think that its pretty late for this change for 5.2,
> I'm leaning towards reverting the offending patch for 5.2-rc and
> getting this into 5.3

are you sure ?

we're only at rc3..

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 15:50           ` Max Gurtovoy
@ 2019-06-04 15:55             ` Sagi Grimberg
  2019-06-04 19:05               ` Christoph Hellwig
  0 siblings, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2019-06-04 15:55 UTC (permalink / raw)



>>> James,
>>>
>>> can you test the attached patch (without the revert) ?
>>
>> Max, I think that its pretty late for this change for 5.2,
>> I'm leaning towards reverting the offending patch for 5.2-rc and
>> getting this into 5.3
> 
> are you sure ?
> 
> we're only at rc3..

This is beyond a bug fix, its a behavior change, with possible other
implications. This is really something that is merge window material
I think. I would rather let it sit for nvme-5.3 for a while before
we merge it.

Christoph, do you have a different preference?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 15:55             ` Sagi Grimberg
@ 2019-06-04 19:05               ` Christoph Hellwig
  2019-06-04 19:07                 ` Harris, James R
  2019-06-04 23:54                 ` Sagi Grimberg
  0 siblings, 2 replies; 14+ messages in thread
From: Christoph Hellwig @ 2019-06-04 19:05 UTC (permalink / raw)


On Tue, Jun 04, 2019@08:55:56AM -0700, Sagi Grimberg wrote:
>
>>>> James,
>>>>
>>>> can you test the attached patch (without the revert) ?
>>>
>>> Max, I think that its pretty late for this change for 5.2,
>>> I'm leaning towards reverting the offending patch for 5.2-rc and
>>> getting this into 5.3
>>
>> are you sure ?
>>
>> we're only at rc3..
>
> This is beyond a bug fix, its a behavior change, with possible other
> implications. This is really something that is merge window material
> I think. I would rather let it sit for nvme-5.3 for a while before
> we merge it.
>
> Christoph, do you have a different preference?

The original patch from Max fixed a serious issue, so I'd prefer not
to revert it, especially as we are only at rc3.  That assumes we
can agree on a fix in the next couple of days.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 19:05               ` Christoph Hellwig
@ 2019-06-04 19:07                 ` Harris, James R
  2019-06-04 23:54                 ` Sagi Grimberg
  1 sibling, 0 replies; 14+ messages in thread
From: Harris, James R @ 2019-06-04 19:07 UTC (permalink / raw)




?On 6/4/19, 12:05 PM, "Christoph Hellwig" <hch@lst.de> wrote:

    On Tue, Jun 04, 2019@08:55:56AM -0700, Sagi Grimberg wrote:
    >
    >>>> James,
    >>>>
    >>>> can you test the attached patch (without the revert) ?
    >>>
    >>> Max, I think that its pretty late for this change for 5.2,
    >>> I'm leaning towards reverting the offending patch for 5.2-rc and
    >>> getting this into 5.3
    >>
    >> are you sure ?
    >>
    >> we're only at rc3..
    >
    > This is beyond a bug fix, its a behavior change, with possible other
    > implications. This is really something that is merge window material
    > I think. I would rather let it sit for nvme-5.3 for a while before
    > we merge it.
    >
    > Christoph, do you have a different preference?
    
    The original patch from Max fixed a serious issue, so I'd prefer not
    to revert it, especially as we are only at rc3.  That assumes we
    can agree on a fix in the next couple of days.

I'll test Max's proposed patch this afternoon and report back.
    

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 13:33       ` [Suspected-Phishing]Re: " Max Gurtovoy
  2019-06-04 15:47         ` Sagi Grimberg
@ 2019-06-04 19:11         ` Christoph Hellwig
  2019-06-04 22:02           ` Max Gurtovoy
  2019-06-04 20:49         ` Harris, James R
  2 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2019-06-04 19:11 UTC (permalink / raw)


So this basically means we can't keep dma mappings around, which
is a bit unfortunate.  Can you also write a real changelog explaining
the reasoning behind it, which isn't quite obvious to me?

Nitpicks below:

> +static void nvme_rdma_unmap_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
> +			       size_t capsule_size, enum dma_data_direction dir)
>  {
>  	ib_dma_unmap_single(ibdev, qe->dma, capsule_size, dir);
> +}
> +
> +static int nvme_rdma_map_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
> +			    size_t capsule_size, enum dma_data_direction dir)
> +{
> +	qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
> +	return ib_dma_mapping_error(ibdev, qe->dma);
> +}
> +
> +static void nvme_rdma_free_qe(struct nvme_rdma_qe *qe)
> +{
>  	kfree(qe->data);
>  }

All these helpers are pretty pointless now, and we might as well just
open code them.

> +static void nvme_rdma_free_mapped_qe(struct ib_device *ibdev,
> +				     struct nvme_rdma_qe *qe,
> +				     size_t capsule_size,
> +				     enum dma_data_direction dir)

Can you use normal two tabs indents for your new functions?

>  
>  	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
>  
> -	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
> -			sizeof(struct nvme_command), DMA_TO_DEVICE);
> +	error = nvme_rdma_alloc_mapped_qe(ctrl->device->dev,
> +					  &ctrl->async_event_sqe,
> +					  sizeof(struct nvme_command),
> +					  DMA_TO_DEVICE);

So.. If keeping SQEs mapped is not fine, why is keeping the AEN SQE
around fine?

> +		if (err == -ENOMEM || err == -EAGAIN)
> +			ret = BLK_STS_RESOURCE;
> +		else
> +			ret = BLK_STS_IOERR;
> +		goto unmap_qe;
>  	}
>  
>  	sqe->cqe.done = nvme_rdma_send_done;
> @@ -1735,14 +1781,20 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>  			req->mr ? &req->reg_wr.wr : NULL);
>  	if (unlikely(err)) {
>  		nvme_rdma_unmap_data(queue, rq);
> -		goto err;
> +		if (err == -ENOMEM || err == -EAGAIN)
> +			ret = BLK_STS_RESOURCE;
> +		else
> +			ret = BLK_STS_IOERR;
> +		goto unmap_qe;

It would be nice to keep the status mapping in a single place behind
the goto label.

> +out:
> +	return ret;

No real need for the label here, you could just return directly.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 13:33       ` [Suspected-Phishing]Re: " Max Gurtovoy
  2019-06-04 15:47         ` Sagi Grimberg
  2019-06-04 19:11         ` Christoph Hellwig
@ 2019-06-04 20:49         ` Harris, James R
  2 siblings, 0 replies; 14+ messages in thread
From: Harris, James R @ 2019-06-04 20:49 UTC (permalink / raw)




?On 6/4/19, 6:34 AM, "Max Gurtovoy" <maxg@mellanox.com> wrote:

    James,
    
    can you test the attached patch (without the revert) ?

Hi Max,

This patch works on my system.

Tested-by: Jim Harris <james.r.harris at intel.com>

Thanks,

-Jim

    
    -Max.
    
    On 6/4/2019 2:11 PM, Max Gurtovoy wrote:
    > Hi Sagi/Christoph,
    >
    > please don't revert this commit yet on the branch. I'm sending a fix 
    > to it in 1 hour for James approval.
    >
    > -Max.
    >
    > On 6/4/2019 2:26 AM, Harris, James R wrote:
    >>
    >> On 6/3/19, 4:14 PM, "Sagi Grimberg" <sagi@grimberg.me> wrote:
    >>
    >>           > This commit caused a kernel panic when disconnecting from an
    >>      > inaccessible controller.
    >>      >
    >>      > --
    >>      > nvme nvme0: Removing ctrl: NQN "testnqn1"
    >>      > nvme_rdma: nvme_rdma_exit_request: hctx 0 queue_idx 1
    >>      > BUG: unable to handle kernel paging request at 0000000080000228
    >>      > PGD 0 P4D 0
    >>      > Oops: 0000 [#1] SMP PTI
    >>      > ...
    >>      > Call Trace:
    >>      >   blk_mq_exit_hctx+0x5c/0xf0
    >>      >   blk_mq_exit_queue+0xd4/0x100
    >>      >   blk_cleanup_queue+0x9a/0xc0
    >>      >   nvme_rdma_destroy_io_queues+0x52/0x60 [nvme_rdma]
    >>      >   nvme_rdma_shutdown_ctrl+0x3e/0x80 [nvme_rdma]
    >>      >   nvme_do_delete_ctrl+0x53/0x80 [nvme_core]
    >>      >   nvme_sysfs_delete+0x45/0x60 [nvme_core]
    >>      >   kernfs_fop_write+0x105/0x180
    >>      >   vfs_write+0xad/0x1a0
    >>      >   ksys_write+0x5a/0xd0
    >>      >   do_syscall_64+0x55/0x110
    >>      >   entry_SYSCALL_64_after_hwframe+0x44/0xa9
    >>      > RIP: 0033:0x7fa215417154
    >>      > --
    >>      >
    >>           Should add:
    >>      Reported-by: Harris, James R <james.r.harris at intel.com>
    >>
    >> Thanks Sagi.  I can confirm that reverting this patch eliminated the 
    >> kernel panic on my system.
    >>           > Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
    >>
    >
    > _______________________________________________
    > Linux-nvme mailing list
    > Linux-nvme at lists.infradead.org
    > https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Flists.infradead.org%2Fmailman%2Flistinfo%2Flinux-nvme&amp;data=02%7C01%7Cmaxg%40mellanox.com%7Cefd64e8278674d90875208d6e8dd7d41%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C636952435329842439&amp;sdata=aqbBsIOPBLK7nGhvgLAPB%2FiQoR0QzEie%2FWPpL5fE%2BYY%3D&amp;reserved=0 
    >
    

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 19:11         ` Christoph Hellwig
@ 2019-06-04 22:02           ` Max Gurtovoy
  0 siblings, 0 replies; 14+ messages in thread
From: Max Gurtovoy @ 2019-06-04 22:02 UTC (permalink / raw)



On 6/4/2019 10:11 PM, Christoph Hellwig wrote:
> So this basically means we can't keep dma mappings around, which
> is a bit unfortunate.  Can you also write a real changelog explaining
> the reasoning behind it, which isn't quite obvious to me?

Sure I'll explain it when I'll send a final patch. This one was quick 
and dirty one. I put this in our nightly regression and I'll run more 
manual tests tomorrow as well.

This was Sagi's idea actually to dma map and unmap during the actual 
queue_rq and complete_rq to instead of taking a refcount by the 
rdma_ctrl of the rdma_device.

We need this reference since currently we do the dma_unmap during 
exit_request callback and the ibdev might be already freed (disconnect 
during reconnect scenario for example) and we get the trace and crash.

My idea for an alternative fix was taking a ctrl refcount on the device 
and add a blk_iter for remapping the requests. But I guess Sagi's idea 
is simpler.

Another thing that came into my mind during writing the fix is how we 
deal with the scenario that the target ctrl queues depth was changed 
during re-connection ? As we don't destroy the tagset in re-connection, 
we might need to add blk layer helper for that.


>
> Nitpicks below:
>
>> +static void nvme_rdma_unmap_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
>> +			       size_t capsule_size, enum dma_data_direction dir)
>>   {
>>   	ib_dma_unmap_single(ibdev, qe->dma, capsule_size, dir);
>> +}
>> +
>> +static int nvme_rdma_map_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
>> +			    size_t capsule_size, enum dma_data_direction dir)
>> +{
>> +	qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
>> +	return ib_dma_mapping_error(ibdev, qe->dma);
>> +}
>> +
>> +static void nvme_rdma_free_qe(struct nvme_rdma_qe *qe)
>> +{
>>   	kfree(qe->data);
>>   }
> All these helpers are pretty pointless now, and we might as well just
> open code them.
>
>> +static void nvme_rdma_free_mapped_qe(struct ib_device *ibdev,
>> +				     struct nvme_rdma_qe *qe,
>> +				     size_t capsule_size,
>> +				     enum dma_data_direction dir)
> Can you use normal two tabs indents for your new functions?
>
>>   
>>   	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
>>   
>> -	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
>> -			sizeof(struct nvme_command), DMA_TO_DEVICE);
>> +	error = nvme_rdma_alloc_mapped_qe(ctrl->device->dev,
>> +					  &ctrl->async_event_sqe,
>> +					  sizeof(struct nvme_command),
>> +					  DMA_TO_DEVICE);
> So.. If keeping SQEs mapped is not fine, why is keeping the AEN SQE
> around fine?

we alloc_map and free_unmap it for each re-connection/error_recovery so 
we're good here. Also for post_recv ring.

For the SQEs we can't hold it since we don't destroy the tagset in 
error_recovery.


>
>> +		if (err == -ENOMEM || err == -EAGAIN)
>> +			ret = BLK_STS_RESOURCE;
>> +		else
>> +			ret = BLK_STS_IOERR;
>> +		goto unmap_qe;
>>   	}
>>   
>>   	sqe->cqe.done = nvme_rdma_send_done;
>> @@ -1735,14 +1781,20 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>>   			req->mr ? &req->reg_wr.wr : NULL);
>>   	if (unlikely(err)) {
>>   		nvme_rdma_unmap_data(queue, rq);
>> -		goto err;
>> +		if (err == -ENOMEM || err == -EAGAIN)
>> +			ret = BLK_STS_RESOURCE;
>> +		else
>> +			ret = BLK_STS_IOERR;
>> +		goto unmap_qe;
> It would be nice to keep the status mapping in a single place behind
> the goto label.

Yup I'll do it as it was before.

(Was too quick implementation...)

>> +out:
>> +	return ret;
> No real need for the label here, you could just return directly.
Yup.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Suspected-Phishing]Re: [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset"
  2019-06-04 19:05               ` Christoph Hellwig
  2019-06-04 19:07                 ` Harris, James R
@ 2019-06-04 23:54                 ` Sagi Grimberg
  1 sibling, 0 replies; 14+ messages in thread
From: Sagi Grimberg @ 2019-06-04 23:54 UTC (permalink / raw)



>>>>> can you test the attached patch (without the revert) ?
>>>>
>>>> Max, I think that its pretty late for this change for 5.2,
>>>> I'm leaning towards reverting the offending patch for 5.2-rc and
>>>> getting this into 5.3
>>>
>>> are you sure ?
>>>
>>> we're only at rc3..
>>
>> This is beyond a bug fix, its a behavior change, with possible other
>> implications. This is really something that is merge window material
>> I think. I would rather let it sit for nvme-5.3 for a while before
>> we merge it.
>>
>> Christoph, do you have a different preference?
> 
> The original patch from Max fixed a serious issue, so I'd prefer not
> to revert it, especially as we are only at rc3.  That assumes we
> can agree on a fix in the next couple of days.

Its not really a serious issue given that it was broken from day 0, and
it requires to unload/reset a device that is under a bond to get it
to trigger (and the issue is not a crash). Not exactly mainstream, and
it broke something that is basic like disconnecting while reconnect is
ongoing.

And, this changes the behavior in terms of dma mappings. I'd expect that
this goes to the next merge window (which we can push asap) instead of
rushing it.

But, if you think that rc3 is early enough that we can rush it, I won't
argue with you, lets hope we won't miss anything else and fix again..

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2019-06-04 23:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-03 23:11 [PATCH] Revert "nvme-rdma: remove redundant reference between ib_device and tagset" Sagi Grimberg
2019-06-03 23:14 ` Sagi Grimberg
2019-06-03 23:26   ` Harris, James R
2019-06-04 11:11     ` Max Gurtovoy
2019-06-04 13:33       ` [Suspected-Phishing]Re: " Max Gurtovoy
2019-06-04 15:47         ` Sagi Grimberg
2019-06-04 15:50           ` Max Gurtovoy
2019-06-04 15:55             ` Sagi Grimberg
2019-06-04 19:05               ` Christoph Hellwig
2019-06-04 19:07                 ` Harris, James R
2019-06-04 23:54                 ` Sagi Grimberg
2019-06-04 19:11         ` Christoph Hellwig
2019-06-04 22:02           ` Max Gurtovoy
2019-06-04 20:49         ` Harris, James R

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.