linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wenpeng Liang <liangwenpeng@huawei.com>
To: Leon Romanovsky <leon@kernel.org>
Cc: <jgg@nvidia.com>, <linux-rdma@vger.kernel.org>, <linuxarm@huawei.com>
Subject: Re: [PATCH for-next] RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT
Date: Wed, 2 Mar 2022 20:44:48 +0800	[thread overview]
Message-ID: <0c14fac1-9448-7920-52fd-f353a8e7590f@huawei.com> (raw)
In-Reply-To: <Yhy5fZrsp79HZKR+@unreal>

On 2022/2/28 20:01, Leon Romanovsky wrote:
> On Fri, Feb 25, 2022 at 05:56:54PM +0800, Wenpeng Liang wrote:
>> From: Yixing Liu <liuyixing1@huawei.com>
>>
>> Before destroying MPT, the reserved loopback QPs send loopback IOs (one
>> write operation per SL). Completing these loopback IOs represents that
>> there isn't any outstanding request in MPT, then it's safe to destroy MPT.
>>
>> Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
>> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_device.h |   2 +
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 334 +++++++++++++++++++-
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  20 ++
>>  drivers/infiniband/hw/hns/hns_roce_mr.c     |   6 +-
>>  4 files changed, 358 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>> index 1e0bae136997..da0b4b310aab 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>> @@ -624,6 +624,7 @@ struct hns_roce_qp {
>>  	u32			next_sge;
>>  	enum ib_mtu		path_mtu;
>>  	u32			max_inline_data;
>> +	u8			free_mr_en;
>>  
>>  	/* 0: flush needed, 1: unneeded */
>>  	unsigned long		flush_flag;
>> @@ -882,6 +883,7 @@ struct hns_roce_hw {
>>  			 enum ib_qp_state new_state);
>>  	int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
>>  			 struct hns_roce_qp *hr_qp);
>> +	void (*dereg_mr)(struct hns_roce_dev *hr_dev);
>>  	int (*init_eq)(struct hns_roce_dev *hr_dev);
>>  	void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
>>  	int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index b33e948fd060..62ee9c0bba74 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -2664,6 +2664,217 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
>>  	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
>>  }
>>  
>> +static int free_mr_alloc_pd(struct hns_roce_dev *hr_dev,
>> +			    struct hns_roce_v2_free_mr *free_mr)
>> +{
> 
> You chose very non-intuitive name "free_mr...", but I don't have anything
> concrete to suggest.
> 

Thank you for your advice. There are two alternative names for this event,
which are DRAIN_RESIDUAL_WR or DRAIN_WR. It is hard to decide which one is
better. Could you give me some suggestions for the naming?

Thanks,
Wenpeng

>> +	struct ib_device *ibdev = &hr_dev->ib_dev;
>> +	struct ib_pd *pd;
>> +
>> +	pd = ib_alloc_pd(ibdev, 0);
>> +	if (IS_ERR(pd)) {
>> +		ibdev_err(ibdev, "failed to create pd for free mr.\n");
>> +		return PTR_ERR(pd);
>> +	}
>> +
>> +	free_mr->rsv_pd = pd;
>> +	return 0;
>> +}
>> +
>> +static int free_mr_create_cq(struct hns_roce_dev *hr_dev,
>> +			     struct hns_roce_v2_free_mr *free_mr)
>> +{
>> +	struct ib_device *ibdev = &hr_dev->ib_dev;
>> +	struct ib_cq_init_attr cq_init_attr = {};
>> +	struct ib_cq *cq;
>> +
>> +	cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
>> +
>> +	cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr);
>> +	if (IS_ERR(cq)) {
>> +		ibdev_err(ibdev, "failed to create cq for free mr.\n");
>> +		return PTR_ERR(cq);
>> +	}
>> +
>> +	free_mr->rsv_cq = cq;
>> +	return 0;
>> +}
>> +
>> +static int free_mr_create_loopback_qp(struct hns_roce_dev *hr_dev,
>> +				      struct hns_roce_v2_free_mr *free_mr,
>> +				      int sl_num)
>> +{
>> +	struct ib_device *ibdev = &hr_dev->ib_dev;
>> +	struct ib_qp_init_attr init_attr = {};
>> +	struct ib_pd *pd = free_mr->rsv_pd;
>> +	struct ib_cq *cq = free_mr->rsv_cq;
>> +	struct ib_qp *qp;
>> +
>> +	init_attr.qp_type		= IB_QPT_RC;
>> +	init_attr.sq_sig_type		= IB_SIGNAL_ALL_WR;
>> +	init_attr.send_cq		= cq;
>> +	init_attr.recv_cq		= cq;
>> +	init_attr.cap.max_send_wr	= HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
>> +	init_attr.cap.max_send_sge	= HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
>> +	init_attr.cap.max_recv_wr	= HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
>> +	init_attr.cap.max_recv_sge	= HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
> 
> No vertical alignment in new code, please.
> 

Fix it in v2.

>> +
>> +	qp = ib_create_qp(pd, &init_attr);
>> +	if (IS_ERR(qp)) {
>> +		ibdev_err(ibdev, "failed to create qp for free mr.\n");
>> +		return PTR_ERR(qp);
>> +	}
>> +
>> +	free_mr->rsv_qp[sl_num] = qp;
>> +	return 0;
>> +}
>> +
>> +static int free_mr_create_qp(struct hns_roce_dev *hr_dev,
>> +			     struct hns_roce_v2_free_mr *free_mr)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++)
>> +		if (free_mr_create_loopback_qp(hr_dev, free_mr, i))
>> +			return -ENOMEM;
> 
> Please don't overwrite returned error code - in all places.
> 

The next version will be revised to the following form:

	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
		ret = free_mr_create_loopback_qp(hr_dev, free_mr, i);
		if (ret)
			return ret;
	}

>> +
>> +	return 0;
>> +}
>> +
>> +static void free_mr_init_qp_attr(struct ib_qp_attr *attr)
>> +{
>> +	rdma_ah_set_grh(&attr->ah_attr, NULL, 0, 0, 1, 0);
>> +	rdma_ah_set_static_rate(&attr->ah_attr, 3);
>> +	rdma_ah_set_port_num(&attr->ah_attr, 1);
>> +}
>> +
>> +static int free_mr_modify_loopback_qp(struct hns_roce_dev *hr_dev,
>> +				      struct ib_qp_attr *attr, int sl_num)
>> +{
>> +	struct hns_roce_v2_priv *priv = hr_dev->priv;
>> +	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
>> +	struct ib_device *ibdev = &hr_dev->ib_dev;
>> +	struct hns_roce_qp *hr_qp;
>> +	int loopback;
>> +	int mask;
>> +	int ret;
>> +
>> +	hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]);
>> +	hr_qp->free_mr_en = 1;
>> +
>> +	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
>> +	attr->qp_state		= IB_QPS_INIT;
>> +	attr->port_num		= 1;
>> +	attr->qp_access_flags	= IB_ACCESS_REMOTE_WRITE;
>> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
>> +	if (ret) {
>> +		ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
>> +			  ret);
>> +		return ret;
>> +	}
>> +
>> +	loopback = hr_dev->loop_idc;
>> +	/* Set qpc lbi = 1 incidate loopback IO */
>> +	hr_dev->loop_idc = 1;
>> +
>> +	mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
>> +	       IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
>> +	attr->qp_state		= IB_QPS_RTR;
>> +	attr->ah_attr.type	= RDMA_AH_ATTR_TYPE_ROCE;
>> +	attr->path_mtu		= IB_MTU_256;
>> +	attr->dest_qp_num	= hr_qp->qpn;
>> +	attr->rq_psn		= HNS_ROCE_FREE_MR_USED_PSN;
>> +
>> +	rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
>> +
>> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
>> +	hr_dev->loop_idc = loopback;
>> +	if (ret) {
>> +		ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
>> +			  ret);
>> +		return ret;
>> +	}
>> +
>> +	mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT |
>> +	       IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC;
>> +	attr->qp_state		= IB_QPS_RTS;
>> +	attr->sq_psn		= HNS_ROCE_FREE_MR_USED_PSN;
>> +	attr->retry_cnt		= HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
>> +	attr->timeout		= HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
>> +	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
>> +	if (ret)
>> +		ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
>> +			  ret);
>> +
>> +	return ret;
>> +}
>> +
>> +static int free_mr_modify_qp(struct hns_roce_dev *hr_dev,
>> +			     struct hns_roce_v2_free_mr *free_mr)
>> +{
>> +	struct ib_qp_attr attr = {};
>> +	int ret;
>> +	int i;
>> +
>> +	free_mr_init_qp_attr(&attr);
>> +
>> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
>> +		ret = free_mr_modify_loopback_qp(hr_dev, &attr, i);
>> +		if (ret)
>> +			return ret;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static void free_mr_exit(struct hns_roce_dev *hr_dev)
>> +{
>> +	struct hns_roce_v2_priv *priv = hr_dev->priv;
>> +	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
>> +		if (free_mr->rsv_qp[i]) {
>> +			(void)ib_destroy_qp(free_mr->rsv_qp[i]);
> 
> Please don't cast. It is not kernel coding style.
> 

The next version will be revised to the following form

	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
		if (free_mr->rsv_qp[i]) {
			ret = ib_destroy_qp(free_mr->rsv_qp[i]);
			if (ret)
				ibdev_err(&hr_dev->ib_dev,
					  "failed to destroy qp in free mr.\n");

			free_mr->rsv_qp[i] = NULL;
		}
	}

Thanks,
Wenpeng

> Thanks
> .
> 

  reply	other threads:[~2022-03-02 12:44 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-25  9:56 [PATCH for-next] RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT Wenpeng Liang
2022-02-28 12:01 ` Leon Romanovsky
2022-03-02 12:44   ` Wenpeng Liang [this message]
2022-03-03 17:57     ` Leon Romanovsky
2022-03-08  8:13       ` Wenpeng Liang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0c14fac1-9448-7920-52fd-f353a8e7590f@huawei.com \
    --to=liangwenpeng@huawei.com \
    --cc=jgg@nvidia.com \
    --cc=leon@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).