* [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow
@ 2022-05-15 4:19 Leon Romanovsky
2022-05-26 14:32 ` Jason Gunthorpe
2022-06-07 9:58 ` Leon Romanovsky
0 siblings, 2 replies; 6+ messages in thread
From: Leon Romanovsky @ 2022-05-15 4:19 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik
From: Aharon Landau <aharonl@nvidia.com>
When a UMR fails, the UMR QP state changes to an error state. Therefore,
all the further UMR operations will fail too.
Add a recovery flow to the UMR QP, and repost the flushed WQEs.
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/infiniband/hw/mlx5/cq.c | 4 ++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 ++++-
drivers/infiniband/hw/mlx5/umr.c | 78 ++++++++++++++++++++++++----
3 files changed, 83 insertions(+), 11 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 716ab467ac9f..457f57b088c6 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -523,6 +523,10 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
"Requestor" : "Responder", cq->mcq.cqn);
mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
err_cqe->syndrome, err_cqe->vendor_err_synd);
+ if (wc->status != IB_WC_WR_FLUSH_ERR &&
+ (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
+ dev->umrc.state = MLX5_UMR_STATE_RECOVER;
+
if (opcode == MLX5_CQE_REQ_ERR) {
wq = &(*cur_qp)->sq;
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index df2b566ad73d..80a1c12ca1c2 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -717,13 +717,23 @@ struct mlx5_ib_umr_context {
struct completion done;
};
+enum {
+ MLX5_UMR_STATE_ACTIVE,
+ MLX5_UMR_STATE_RECOVER,
+ MLX5_UMR_STATE_ERR,
+};
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- /* control access to UMR QP
+ /* Protects from UMR QP overflow
*/
struct semaphore sem;
+ /* Protects from using UMR while the UMR is not active
+ */
+ struct mutex lock;
+ unsigned int state;
};
struct mlx5_cache_ent {
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 3a48364c0918..e00b94d1b1ea 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -176,6 +176,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ mutex_init(&dev->umrc.lock);
return 0;
@@ -195,6 +196,31 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
ib_dealloc_pd(dev->umrc.pd);
}
+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_qp_attr attr;
+ int err;
+
+ attr.qp_state = IB_QPS_RESET;
+ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
+ if (err) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto err;
+ }
+
+ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
+ if (err)
+ goto err;
+
+ umrc->state = MLX5_UMR_STATE_ACTIVE;
+ return 0;
+
+err:
+ umrc->state = MLX5_UMR_STATE_ERR;
+ return err;
+}
+
static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
@@ -231,7 +257,7 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
id.ib_cqe = cqe;
mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
- MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
+ MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
mlx5r_ring_db(qp, 1, ctrl);
@@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
mlx5r_umr_init_context(&umr_context);
down(&umrc->sem);
- err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
- with_data);
- if (err)
- mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
- else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
+ while (true) {
+ mutex_lock(&umrc->lock);
+ if (umrc->state == MLX5_UMR_STATE_ERR) {
+ mutex_unlock(&umrc->lock);
err = -EFAULT;
+ break;
+ }
+
+ if (umrc->state == MLX5_UMR_STATE_RECOVER) {
+ mutex_unlock(&umrc->lock);
+ usleep_range(3000, 5000);
+ continue;
+ }
+
+ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
+ with_data);
+ mutex_unlock(&umrc->lock);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
+ err);
+ break;
}
+
+ wait_for_completion(&umr_context.done);
+
+ if (umr_context.status == IB_WC_SUCCESS)
+ break;
+
+ if (umr_context.status == IB_WC_WR_FLUSH_ERR)
+ continue;
+
+ WARN_ON_ONCE(1);
+ mlx5_ib_warn(dev,
+ "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
+ umr_context.status);
+ mutex_lock(&umrc->lock);
+ err = mlx5r_umr_recover(dev);
+ mutex_unlock(&umrc->lock);
+ if (err)
+ mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
+ err);
+ err = -EFAULT;
+ break;
}
up(&umrc->sem);
return err;
--
2.36.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow
2022-05-15 4:19 [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow Leon Romanovsky
@ 2022-05-26 14:32 ` Jason Gunthorpe
2022-05-26 16:29 ` Leon Romanovsky
2022-06-07 9:58 ` Leon Romanovsky
1 sibling, 1 reply; 6+ messages in thread
From: Jason Gunthorpe @ 2022-05-26 14:32 UTC (permalink / raw)
To: Leon Romanovsky; +Cc: Aharon Landau, linux-rdma, Michael Guralnik
On Sun, May 15, 2022 at 07:19:53AM +0300, Leon Romanovsky wrote:
> @@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
> mlx5r_umr_init_context(&umr_context);
>
> down(&umrc->sem);
> + while (true) {
> + mutex_lock(&umrc->lock);
You need to test this with lockdep, nesing a mutex under a semaphor is
not allowed, AFAIK.
> + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
> + with_data);
> + mutex_unlock(&umrc->lock);
> + if (err) {
> + mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
> + err);
> + break;
> }
> +
> + wait_for_completion(&umr_context.done);
Nor is sleeping under a semaphore.
And, I'm pretty sure, this entire function is called under a spinlock
in some cases.
Jason
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow
2022-05-26 14:32 ` Jason Gunthorpe
@ 2022-05-26 16:29 ` Leon Romanovsky
2022-05-26 17:21 ` Jason Gunthorpe
0 siblings, 1 reply; 6+ messages in thread
From: Leon Romanovsky @ 2022-05-26 16:29 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik
On Thu, May 26, 2022 at 11:32:12AM -0300, Jason Gunthorpe wrote:
> On Sun, May 15, 2022 at 07:19:53AM +0300, Leon Romanovsky wrote:
> > @@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
> > mlx5r_umr_init_context(&umr_context);
> >
> > down(&umrc->sem);
> > + while (true) {
> > + mutex_lock(&umrc->lock);
>
> You need to test this with lockdep, nesing a mutex under a semaphor is
> not allowed, AFAIK.
We are running with lockdep all our tests.
>
> > + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
> > + with_data);
> > + mutex_unlock(&umrc->lock);
> > + if (err) {
> > + mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
> > + err);
> > + break;
> > }
> > +
> > + wait_for_completion(&umr_context.done);
>
> Nor is sleeping under a semaphore.
Not according to the kernel/locking/semaphore.c. Semaphores can sleep
and the code protected by semaphores can sleep too.
53 void down(struct semaphore *sem)
54 {
55 unsigned long flags;
56
57 might_sleep();
....
64 }
65 EXPORT_SYMBOL(down);
>
> And, I'm pretty sure, this entire function is called under a spinlock
> in some cases.
Can you point to such flow?
Thanks
>
> Jason
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow
2022-05-26 16:29 ` Leon Romanovsky
@ 2022-05-26 17:21 ` Jason Gunthorpe
2022-05-26 17:33 ` Leon Romanovsky
0 siblings, 1 reply; 6+ messages in thread
From: Jason Gunthorpe @ 2022-05-26 17:21 UTC (permalink / raw)
To: Leon Romanovsky; +Cc: Aharon Landau, linux-rdma, Michael Guralnik
On Thu, May 26, 2022 at 07:29:20PM +0300, Leon Romanovsky wrote:
> > > + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
> > > + with_data);
> > > + mutex_unlock(&umrc->lock);
> > > + if (err) {
> > > + mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
> > > + err);
> > > + break;
> > > }
> > > +
> > > + wait_for_completion(&umr_context.done);
> >
> > Nor is sleeping under a semaphore.
>
> Not according to the kernel/locking/semaphore.c. Semaphores can sleep
> and the code protected by semaphores can sleep too.
>
> 53 void down(struct semaphore *sem)
> 54 {
> 55 unsigned long flags;
> 56
> 57 might_sleep();
> ....
> 64 }
> 65 EXPORT_SYMBOL(down);
Hum, OK, I am confused
> > And, I'm pretty sure, this entire function is called under a spinlock
> > in some cases.
>
> Can you point to such flow?
It seems like not anymore, or at least I couldn't find a case.
Jason
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow
2022-05-26 17:21 ` Jason Gunthorpe
@ 2022-05-26 17:33 ` Leon Romanovsky
0 siblings, 0 replies; 6+ messages in thread
From: Leon Romanovsky @ 2022-05-26 17:33 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik
On Thu, May 26, 2022 at 02:21:32PM -0300, Jason Gunthorpe wrote:
> On Thu, May 26, 2022 at 07:29:20PM +0300, Leon Romanovsky wrote:
>
> > > > + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
> > > > + with_data);
> > > > + mutex_unlock(&umrc->lock);
> > > > + if (err) {
> > > > + mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
> > > > + err);
> > > > + break;
> > > > }
> > > > +
> > > > + wait_for_completion(&umr_context.done);
> > >
> > > Nor is sleeping under a semaphore.
> >
> > Not according to the kernel/locking/semaphore.c. Semaphores can sleep
> > and the code protected by semaphores can sleep too.
> >
> > 53 void down(struct semaphore *sem)
> > 54 {
> > 55 unsigned long flags;
> > 56
> > 57 might_sleep();
> > ....
> > 64 }
> > 65 EXPORT_SYMBOL(down);
>
> Hum, OK, I am confused
>
> > > And, I'm pretty sure, this entire function is called under a spinlock
> > > in some cases.
> >
> > Can you point to such flow?
>
> It seems like not anymore, or at least I couldn't find a case.
So are we fine with this patch and it can go as is after merge window?
Thanks
>
> Jason
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow
2022-05-15 4:19 [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow Leon Romanovsky
2022-05-26 14:32 ` Jason Gunthorpe
@ 2022-06-07 9:58 ` Leon Romanovsky
1 sibling, 0 replies; 6+ messages in thread
From: Leon Romanovsky @ 2022-06-07 9:58 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Aharon Landau, linux-rdma, Michael Guralnik
On Sun, May 15, 2022 at 07:19:53AM +0300, Leon Romanovsky wrote:
> From: Aharon Landau <aharonl@nvidia.com>
>
> When a UMR fails, the UMR QP state changes to an error state. Therefore,
> all the further UMR operations will fail too.
>
> Add a recovery flow to the UMR QP, and repost the flushed WQEs.
>
> Signed-off-by: Aharon Landau <aharonl@nvidia.com>
> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
> drivers/infiniband/hw/mlx5/cq.c | 4 ++
> drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 ++++-
> drivers/infiniband/hw/mlx5/umr.c | 78 ++++++++++++++++++++++++----
> 3 files changed, 83 insertions(+), 11 deletions(-)
>
Thanks, applied to rdma-next.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2022-06-07 9:59 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-15 4:19 [PATCH rdma-next] RDMA/mlx5: Add a umr recovery flow Leon Romanovsky
2022-05-26 14:32 ` Jason Gunthorpe
2022-05-26 16:29 ` Leon Romanovsky
2022-05-26 17:21 ` Jason Gunthorpe
2022-05-26 17:33 ` Leon Romanovsky
2022-06-07 9:58 ` Leon Romanovsky
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.