All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sagi Grimberg <sagi@grimberg.me>
To: Ming Lei <ming.lei@redhat.com>, Christoph Hellwig <hch@lst.de>,
	Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org
Cc: Yi Zhang <yi.zhang@redhat.com>, linux-block@vger.kernel.org
Subject: Re: [PATCH 1/2] nvme: add API of nvme_delete_dead_ctrl
Date: Tue, 6 Jun 2023 00:48:32 +0300	[thread overview]
Message-ID: <5ac19a92-bfc8-1e1e-a37d-983f19217df7@grimberg.me> (raw)
In-Reply-To: <20230530094322.258090-2-ming.lei@redhat.com>


> When driver confirms that the controller is dead, this controller should
> be deleted with marking as DEAD. Otherwise, upper layer may wait forever
> in __bio_queue_enter() since the disk won't be marked as DEAD.
> Especially, in del_gendisk(), disk won't be marked as DEAD unless bdev
> sync & invalidate returns. If any writeback IO waits in
> __bio_queue_enter(), IO deadlock is caused.
> 
> Add nvme_delete_dead_ctrl() for avoiding such kind of io deadlock.
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>   drivers/nvme/host/core.c | 24 +++++++++++++++++++++++-
>   drivers/nvme/host/nvme.h |  1 +
>   2 files changed, 24 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index ccb6eb1282f8..413213cfa417 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -227,16 +227,38 @@ static void nvme_delete_ctrl_work(struct work_struct *work)
>   	nvme_do_delete_ctrl(ctrl);
>   }
>   
> -int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
> +static int __nvme_delete_ctrl(struct nvme_ctrl *ctrl,
> +			      enum nvme_ctrl_state state)
>   {
> +	if (state != NVME_CTRL_DELETING && state != NVME_CTRL_DEAD)
> +		return -EINVAL;
> +
>   	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
>   		return -EBUSY;
> +	if (state == NVME_CTRL_DEAD) {
> +		if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DEAD))
> +			return -EBUSY;
> +	}
>   	if (!queue_work(nvme_delete_wq, &ctrl->delete_work))
>   		return -EBUSY;
>   	return 0;
>   }

the user can trigger a delete in exactly the same condition as
the transport (say a nanosecond before the transport exhaust
the max_reconnects).

I think that we'd want something like
--
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 841f155fe0b3..6c718ad46e06 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -231,6 +231,11 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
  {
         if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
                 return -EBUSY;
+
+       if (ctrl->ops->transport_disconnected &&
+           ctrl->ops->transport_disconnected(ctrl))
+               nvme_change_ctrl_state(ctrl, NVME_CTRL_DEAD);
+
         if (!queue_work(nvme_delete_wq, &ctrl->delete_work))
                 return -EBUSY;
         return 0;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 054bf2f8b1a1..657d3f79953d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2828,6 +2828,13 @@ static bool nvme_pci_supports_pci_p2pdma(struct 
nvme_ctrl *ctrl)
         return dma_pci_p2pdma_supported(dev->dev);
  }

+static bool nvme_pci_disconnected(struct nvme_ctrl *nctrl)
+{
+       struct nvme_dev *dev = to_nvme_dev(ctrl);
+
+       return !pci_device_is_present(to_pci_dev(dev->dev));
+}
+
  static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
         .name                   = "pcie",
         .module                 = THIS_MODULE,
@@ -2841,6 +2848,7 @@ static const struct nvme_ctrl_ops 
nvme_pci_ctrl_ops = {
         .get_address            = nvme_pci_get_address,
         .print_device_info      = nvme_pci_print_device_info,
         .supports_pci_p2pdma    = nvme_pci_supports_pci_p2pdma,
+       .transport_disconnected = nvme_pci_disconnected,
  };

  static int nvme_dev_map(struct nvme_dev *dev)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0eb79696fb73..2a03df693b0e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2235,6 +2235,18 @@ static void nvme_rdma_reset_ctrl_work(struct 
work_struct *work)
         nvme_rdma_reconnect_or_remove(ctrl);
  }

+static bool nvme_rdma_disconnected(struct nvme_ctrl *nctrl)
+{
+       struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+       int i;
+
+       for (i = 0; i < ctrl->queue_count; i++) {
+               if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags))
+                       return false;
+       }
+       return true;
+}
+
  static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
         .name                   = "rdma",
         .module                 = THIS_MODULE,
@@ -2247,6 +2259,7 @@ static const struct nvme_ctrl_ops 
nvme_rdma_ctrl_ops = {
         .delete_ctrl            = nvme_rdma_delete_ctrl,
         .get_address            = nvmf_get_address,
         .stop_ctrl              = nvme_rdma_stop_ctrl,
+       .transport_disconnected = nvme_rdma_disconnected,
  };

  /*
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index fe01ba75570d..043ce9878560 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2536,6 +2536,18 @@ static int nvme_tcp_get_address(struct nvme_ctrl 
*ctrl, char *buf, int size)
         return len;
  }

+static bool nvme_tcp_disconnected(struct nvme_ctrl *nctrl)
+{
+       struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+       int i;
+
+       for (i = 0; i < ctrl->queue_count; i++) {
+               if (test_bit(NVME_TCP_Q_LIVE, &ctrl->queues[i].flags))
+                       return false;
+       }
+       return true;
+}
+
  static const struct blk_mq_ops nvme_tcp_mq_ops = {
         .queue_rq       = nvme_tcp_queue_rq,
         .commit_rqs     = nvme_tcp_commit_rqs,
@@ -2569,6 +2581,7 @@ static const struct nvme_ctrl_ops 
nvme_tcp_ctrl_ops = {
         .delete_ctrl            = nvme_tcp_delete_ctrl,
         .get_address            = nvme_tcp_get_address,
         .stop_ctrl              = nvme_tcp_stop_ctrl,
+       .transport_disconnected = nvme_tcp_disconnected,
  };

  static bool
--

  reply	other threads:[~2023-06-05 21:48 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-30  9:43 [PATCH 0/2] nvme: add nvme_delete_dead_ctrl for avoiding io deadlock Ming Lei
2023-05-30  9:43 ` [PATCH 1/2] nvme: add API of nvme_delete_dead_ctrl Ming Lei
2023-06-05 21:48   ` Sagi Grimberg [this message]
2023-06-06  0:51     ` Ming Lei
2023-06-06  6:28       ` Sagi Grimberg
2023-05-30  9:43 ` [PATCH 2/2] nvme: rdma/tcp: call nvme_delete_dead_ctrl for handling reconnect failure Ming Lei
2023-06-05 14:31   ` Yi Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5ac19a92-bfc8-1e1e-a37d-983f19217df7@grimberg.me \
    --to=sagi@grimberg.me \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=ming.lei@redhat.com \
    --cc=yi.zhang@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.