All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Romanovsky <leon@kernel.org>
To: Jason Gunthorpe <jgg@nvidia.com>
Cc: Doug Ledford <dledford@redhat.com>, Shay Drory <shayd@nvidia.com>,
	linux-rdma@vger.kernel.org
Subject: Re: [PATCH rdma-next] RDMA/restrack: Delay QP deletion till all users are gone
Date: Sun, 25 Apr 2021 16:03:47 +0300	[thread overview]
Message-ID: <YIVosxurbZGlmCOw@unreal> (raw)
In-Reply-To: <20210422142939.GA2407382@nvidia.com>

On Thu, Apr 22, 2021 at 11:29:39AM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 21, 2021 at 08:03:22AM +0300, Leon Romanovsky wrote:
> 
> > I didn't understand when reviewed either, but decided to post it anyway
> > to get possible explanation for this RDMA_RESTRACK_MR || RDMA_RESTRACK_QP
> > check.
> 
> I think the whole thing should look more like this and we delete the
> if entirely.

I have mixed feelings about this approach. Before "destroy can fail disaster",
the restrack goal was to provide the following flow:
1. create new memory object - rdma_restrack_new()
2. create new HW object - .create_XXX() callback in the driver
3. add HW object to the DB - rdma_restrack_del()
....
4. wait for any work on this HW object to complete - rdma_restrack_del()
5. safely destroy HW object - .destroy_XXX()

I really would like to stay with this flow and block any access to the
object that failed to destruct - maybe add to some zombie list.

The proposed prepare/abort/finish flow is much harder to implement correctly.
Let's take as an example ib_destroy_qp_user(), we called to rdma_rw_cleanup_mrs(),
but didn't restore them after .destroy_qp() failure.

Thanks

> 
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index 2b9ffc21cbc4ad..479b16b8f6723a 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -1860,6 +1860,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
>  				iw_destroy_cm_id(id_priv->cm_id.iw);
>  		}
>  		cma_leave_mc_groups(id_priv);
> +		rdma_restrack_del(&id_priv->res);
>  		cma_release_dev(id_priv);
>  	}
>  
> @@ -1873,7 +1874,6 @@ static void _destroy_id(struct rdma_id_private *id_priv,
>  	kfree(id_priv->id.route.path_rec);
>  
>  	put_net(id_priv->id.route.addr.dev_addr.net);
> -	rdma_restrack_del(&id_priv->res);
>  	kfree(id_priv);
>  }
>  
> diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
> index 15493357cfef09..1808bc2533f155 100644
> --- a/drivers/infiniband/core/counters.c
> +++ b/drivers/infiniband/core/counters.c
> @@ -176,6 +176,8 @@ static void rdma_counter_free(struct rdma_counter *counter)
>  {
>  	struct rdma_port_counter *port_counter;
>  
> +	rdma_restrack_del(&counter->res);
> +
>  	port_counter = &counter->device->port_data[counter->port].port_counter;
>  	mutex_lock(&port_counter->lock);
>  	port_counter->num_counters--;
> @@ -185,7 +187,6 @@ static void rdma_counter_free(struct rdma_counter *counter)
>  
>  	mutex_unlock(&port_counter->lock);
>  
> -	rdma_restrack_del(&counter->res);
>  	kfree(counter->stats);
>  	kfree(counter);
>  }
> diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
> index 433b426729d4ce..3884db637d33ab 100644
> --- a/drivers/infiniband/core/cq.c
> +++ b/drivers/infiniband/core/cq.c
> @@ -339,11 +339,15 @@ void ib_free_cq(struct ib_cq *cq)
>  		WARN_ON_ONCE(1);
>  	}
>  
> +	rdma_restrack_prepare_del(&cq->res);
>  	rdma_dim_destroy(cq);
>  	trace_cq_free(cq);
>  	ret = cq->device->ops.destroy_cq(cq, NULL);
> -	WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
> -	rdma_restrack_del(&cq->res);
> +	if (WARN_ON(ret)) {
> +		rdma_restrack_abort_del(&cq->res);
> +		return;
> +	}
> +	rdma_restrack_finish_del(&cq->res);
>  	kfree(cq->wc);
>  	kfree(cq);
>  }
> diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
> index 94d83b665a2fe0..f57234ced93ca1 100644
> --- a/drivers/infiniband/core/rdma_core.c
> +++ b/drivers/infiniband/core/rdma_core.c
> @@ -854,6 +854,8 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
>  	struct ib_ucontext *ucontext = ufile->ucontext;
>  	struct ib_device *ib_dev = ucontext->device;
>  
> +	rdma_restrack_prepare_del(&ucontext->res);
> +
>  	/*
>  	 * If we are closing the FD then the user mmap VMAs must have
>  	 * already been destroyed as they hold on to the filep, otherwise
> @@ -868,9 +870,8 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
>  	ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
>  			   RDMACG_RESOURCE_HCA_HANDLE);
>  
> -	rdma_restrack_del(&ucontext->res);
> -
>  	ib_dev->ops.dealloc_ucontext(ucontext);
> +	rdma_restrack_finish_del(&ucontext->res);
>  	WARN_ON(!xa_empty(&ucontext->mmap_xa));
>  	kfree(ucontext);
>  
> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
> index 033207882c82ce..8aa1dae40f38a7 100644
> --- a/drivers/infiniband/core/restrack.c
> +++ b/drivers/infiniband/core/restrack.c
> @@ -315,11 +315,49 @@ int rdma_restrack_put(struct rdma_restrack_entry *res)
>  }
>  EXPORT_SYMBOL(rdma_restrack_put);
>  
> -/**
> - * rdma_restrack_del() - delete object from the reource tracking database
> - * @res:  resource entry
> - */
> -void rdma_restrack_del(struct rdma_restrack_entry *res)
> +void rdma_restrack_prepare_del(struct rdma_restrack_entry *res)
> +{
> +	struct rdma_restrack_entry *old;
> +	struct rdma_restrack_root *rt;
> +	struct ib_device *dev;
> +
> +	if (!res->valid)
> +		return;
> +
> +	dev = res_to_dev(res);
> +	rt = &dev->res[res->type];
> +
> +	if (!res->no_track) {
> +		old = xa_cmpxchg(&rt->xa, res->id, res, XA_ZERO_ENTRY,
> +				 GFP_KERNEL);
> +		WARN_ON(old != res);
> +	}
> +
> +	/* Fence access from all concurrent threads, like netlink */
> +	rdma_restrack_put(res);
> +	wait_for_completion(&res->comp);
> +}
> +EXPORT_SYMBOL(rdma_restrack_prepare_del);
> +
> +void rdma_restrack_abort_del(struct rdma_restrack_entry *res)
> +{
> +	struct rdma_restrack_entry *old;
> +	struct rdma_restrack_root *rt;
> +	struct ib_device *dev;
> +
> +	if (!res->valid || res->no_track)
> +		return;
> +
> +	dev = res_to_dev(res);
> +	rt = &dev->res[res->type];
> +
> +	kref_init(&res->kref);
> +	old = xa_cmpxchg(&rt->xa, res->id, XA_ZERO_ENTRY, res, GFP_KERNEL);
> +	WARN_ON(old != res);
> +}
> +EXPORT_SYMBOL(rdma_restrack_abort_del);
> +
> +void rdma_restrack_finish_del(struct rdma_restrack_entry *res)
>  {
>  	struct rdma_restrack_entry *old;
>  	struct rdma_restrack_root *rt;
> @@ -332,24 +370,22 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
>  		}
>  		return;
>  	}
> -
> -	if (res->no_track)
> -		goto out;
> +	res->valid = false;
>  
>  	dev = res_to_dev(res);
> -	if (WARN_ON(!dev))
> -		return;
> -
>  	rt = &dev->res[res->type];
> -
>  	old = xa_erase(&rt->xa, res->id);
> -	if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP)
> -		return;
> -	WARN_ON(old != res);
> +	WARN_ON(old != NULL);
> +}
> +EXPORT_SYMBOL(rdma_restrack_finish_del);
>  
> -out:
> -	res->valid = false;
> -	rdma_restrack_put(res);
> -	wait_for_completion(&res->comp);
> +/**
> + * rdma_restrack_del() - delete object from the reource tracking database
> + * @res:  resource entry
> + */
> +void rdma_restrack_del(struct rdma_restrack_entry *res)
> +{
> +	rdma_restrack_prepare_del(res);
> +	rdma_restrack_finish_del(res);
>  }
>  EXPORT_SYMBOL(rdma_restrack_del);
> diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
> index 6a04fc41f73801..87a670334acabe 100644
> --- a/drivers/infiniband/core/restrack.h
> +++ b/drivers/infiniband/core/restrack.h
> @@ -27,6 +27,9 @@ int rdma_restrack_init(struct ib_device *dev);
>  void rdma_restrack_clean(struct ib_device *dev);
>  void rdma_restrack_add(struct rdma_restrack_entry *res);
>  void rdma_restrack_del(struct rdma_restrack_entry *res);
> +void rdma_restrack_prepare_del(struct rdma_restrack_entry *res);
> +void rdma_restrack_finish_del(struct rdma_restrack_entry *res);
> +void rdma_restrack_abort_del(struct rdma_restrack_entry *res);
>  void rdma_restrack_new(struct rdma_restrack_entry *res,
>  		       enum rdma_restrack_type type);
>  void rdma_restrack_set_name(struct rdma_restrack_entry *res,
> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
> index 2b0798151fb753..2e761a7eb92847 100644
> --- a/drivers/infiniband/core/verbs.c
> +++ b/drivers/infiniband/core/verbs.c
> @@ -346,13 +346,16 @@ int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
>  	 */
>  	WARN_ON(atomic_read(&pd->usecnt));
>  
> +	rdma_restrack_prepare_del(&pd->res);
>  	ret = pd->device->ops.dealloc_pd(pd, udata);
> -	if (ret)
> +	if (ret) {
> +		rdma_restrack_abort_del(&pd->res);
>  		return ret;
> +	}
>  
> -	rdma_restrack_del(&pd->res);
> +	rdma_restrack_finish_del(&pd->res);
>  	kfree(pd);
> -	return ret;
> +	return 0;
>  }
>  EXPORT_SYMBOL(ib_dealloc_pd_user);
>  
> @@ -1085,19 +1088,22 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
>  	if (atomic_read(&srq->usecnt))
>  		return -EBUSY;
>  
> +	rdma_restrack_prepare_del(&srq->res);
>  	ret = srq->device->ops.destroy_srq(srq, udata);
> -	if (ret)
> +	if (ret) {
> +		rdma_restrack_abort_del(&srq->res);
>  		return ret;
> +	}
> +	rdma_restrack_finish_del(&srq->res);
>  
>  	atomic_dec(&srq->pd->usecnt);
>  	if (srq->srq_type == IB_SRQT_XRC)
>  		atomic_dec(&srq->ext.xrc.xrcd->usecnt);
>  	if (ib_srq_has_cq(srq->srq_type))
>  		atomic_dec(&srq->ext.cq->usecnt);
> -	rdma_restrack_del(&srq->res);
>  	kfree(srq);
>  
> -	return ret;
> +	return 0;
>  }
>  EXPORT_SYMBOL(ib_destroy_srq_user);
>  
> @@ -1963,31 +1969,33 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
>  		rdma_rw_cleanup_mrs(qp);
>  
>  	rdma_counter_unbind_qp(qp, true);
> -	rdma_restrack_del(&qp->res);
> +	rdma_restrack_prepare_del(&qp->res);
>  	ret = qp->device->ops.destroy_qp(qp, udata);
> -	if (!ret) {
> -		if (alt_path_sgid_attr)
> -			rdma_put_gid_attr(alt_path_sgid_attr);
> -		if (av_sgid_attr)
> -			rdma_put_gid_attr(av_sgid_attr);
> -		if (pd)
> -			atomic_dec(&pd->usecnt);
> -		if (scq)
> -			atomic_dec(&scq->usecnt);
> -		if (rcq)
> -			atomic_dec(&rcq->usecnt);
> -		if (srq)
> -			atomic_dec(&srq->usecnt);
> -		if (ind_tbl)
> -			atomic_dec(&ind_tbl->usecnt);
> -		if (sec)
> -			ib_destroy_qp_security_end(sec);
> -	} else {
> +	if (ret) {
> +		rdma_restrack_abort_del(&qp->res);
>  		if (sec)
>  			ib_destroy_qp_security_abort(sec);
> +		return ret;
>  	}
>  
> -	return ret;
> +	rdma_restrack_finish_del(&qp->res);
> +	if (alt_path_sgid_attr)
> +		rdma_put_gid_attr(alt_path_sgid_attr);
> +	if (av_sgid_attr)
> +		rdma_put_gid_attr(av_sgid_attr);
> +	if (pd)
> +		atomic_dec(&pd->usecnt);
> +	if (scq)
> +		atomic_dec(&scq->usecnt);
> +	if (rcq)
> +		atomic_dec(&rcq->usecnt);
> +	if (srq)
> +		atomic_dec(&srq->usecnt);
> +	if (ind_tbl)
> +		atomic_dec(&ind_tbl->usecnt);
> +	if (sec)
> +		ib_destroy_qp_security_end(sec);
> +	return 0;
>  }
>  EXPORT_SYMBOL(ib_destroy_qp_user);
>  
> @@ -2050,13 +2058,16 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
>  	if (atomic_read(&cq->usecnt))
>  		return -EBUSY;
>  
> +	rdma_restrack_prepare_del(&cq->res);
>  	ret = cq->device->ops.destroy_cq(cq, udata);
> -	if (ret)
> +	if (ret) {
> +		rdma_restrack_abort_del(&cq->res);
>  		return ret;
> +	}
>  
> -	rdma_restrack_del(&cq->res);
> +	rdma_restrack_finish_del(&cq->res);
>  	kfree(cq);
> -	return ret;
> +	return 0;
>  }
>  EXPORT_SYMBOL(ib_destroy_cq_user);
>  
> @@ -2126,16 +2137,19 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
>  	int ret;
>  
>  	trace_mr_dereg(mr);
> -	rdma_restrack_del(&mr->res);
> +	rdma_restrack_prepare_del(&mr->res);
>  	ret = mr->device->ops.dereg_mr(mr, udata);
> -	if (!ret) {
> -		atomic_dec(&pd->usecnt);
> -		if (dm)
> -			atomic_dec(&dm->usecnt);
> -		kfree(sig_attrs);
> +	if (ret) {
> +		rdma_restrack_abort_del(&mr->res);
> +		return ret;
>  	}
>  
> -	return ret;
> +	rdma_restrack_finish_del(&mr->res);
> +	atomic_dec(&pd->usecnt);
> +	if (dm)
> +		atomic_dec(&dm->usecnt);
> +	kfree(sig_attrs);
> +	return 0;
>  }
>  EXPORT_SYMBOL(ib_dereg_mr_user);
>  

  reply	other threads:[~2021-04-25 13:03 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-18 13:37 [PATCH rdma-next] RDMA/restrack: Delay QP deletion till all users are gone Leon Romanovsky
2021-04-20 12:39 ` Jason Gunthorpe
2021-04-20 13:06   ` Leon Romanovsky
2021-04-20 15:25     ` Jason Gunthorpe
2021-04-21  5:03       ` Leon Romanovsky
2021-04-22 14:29         ` Jason Gunthorpe
2021-04-25 13:03           ` Leon Romanovsky [this message]
2021-04-25 13:08             ` Jason Gunthorpe
2021-04-25 13:44               ` Leon Romanovsky
2021-04-25 17:22                 ` Jason Gunthorpe
2021-04-25 17:38                   ` Leon Romanovsky
2021-04-26 12:03                     ` Jason Gunthorpe
2021-04-26 13:08                       ` Leon Romanovsky
2021-04-26 13:11                         ` Jason Gunthorpe
2021-04-27  4:45                           ` Leon Romanovsky
2021-05-02 11:28           ` Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YIVosxurbZGlmCOw@unreal \
    --to=leon@kernel.org \
    --cc=dledford@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=shayd@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.