Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
From: Max Gurtovoy <maxg@mellanox.com>
To: sagi@grimberg.me, yaminf@mellanox.com, dledford@redhat.com,
	linux-rdma@vger.kernel.org, leon@kernel.org, bvanassche@acm.org
Cc: israelr@mellanox.com, oren@mellanox.com, jgg@mellanox.com,
	idanb@mellanox.com
Subject: Re: [PATCH 1/3] IB/iser: use new shared CQ mechanism
Date: Wed, 29 Jul 2020 11:34:11 +0300
Message-ID: <b922a13b-592b-0c03-bd0e-c7b6c7d4a54e@mellanox.com> (raw)
In-Reply-To: <20200722135629.49467-1-maxg@mellanox.com>

Jason/Leon,

can you please pull this to "for-kernel-5.9" branch please ?

Or do we need more reviews on this series ?

On 7/22/2020 4:56 PM, Max Gurtovoy wrote:
> From: Yamin Friedman <yaminf@mellanox.com>
>
> Has the driver use shared CQs provided by the rdma core driver.
> Because this provides similar functionality to iser_comp it has been
> removed. Now there is no reason to allocate very large CQs when the driver
> is loaded while gaining the advantage of shared CQs.
>
> Signed-off-by: Yamin Friedman <yaminf@mellanox.com>
> Acked-by: Max Gurtovoy <maxg@mellanox.com>
> ---
>   drivers/infiniband/ulp/iser/iscsi_iser.h |  23 ++-----
>   drivers/infiniband/ulp/iser/iser_verbs.c | 112 +++++++------------------------
>   2 files changed, 29 insertions(+), 106 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
> index 1d77c7f..fddcb88 100644
> --- a/drivers/infiniband/ulp/iser/iscsi_iser.h
> +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
> @@ -300,18 +300,6 @@ struct iser_login_desc {
>   struct iscsi_iser_task;
>   
>   /**
> - * struct iser_comp - iSER completion context
> - *
> - * @cq:         completion queue
> - * @active_qps: Number of active QPs attached
> - *              to completion context
> - */
> -struct iser_comp {
> -	struct ib_cq		*cq;
> -	int                      active_qps;
> -};
> -
> -/**
>    * struct iser_device - iSER device handle
>    *
>    * @ib_device:     RDMA device
> @@ -320,9 +308,6 @@ struct iser_comp {
>    * @event_handler: IB events handle routine
>    * @ig_list:	   entry in devices list
>    * @refcount:      Reference counter, dominated by open iser connections
> - * @comps_used:    Number of completion contexts used, Min between online
> - *                 cpus and device max completion vectors
> - * @comps:         Dinamically allocated array of completion handlers
>    */
>   struct iser_device {
>   	struct ib_device             *ib_device;
> @@ -330,8 +315,6 @@ struct iser_device {
>   	struct ib_event_handler      event_handler;
>   	struct list_head             ig_list;
>   	int                          refcount;
> -	int			     comps_used;
> -	struct iser_comp	     *comps;
>   };
>   
>   /**
> @@ -380,11 +363,12 @@ struct iser_fr_pool {
>    *
>    * @cma_id:              rdma_cm connection maneger handle
>    * @qp:                  Connection Queue-pair
> + * @cq:                  Connection completion queue
> + * @cq_size:             The number of max outstanding completions
>    * @post_recv_buf_count: post receive counter
>    * @sig_count:           send work request signal count
>    * @rx_wr:               receive work request for batch posts
>    * @device:              reference to iser device
> - * @comp:                iser completion context
>    * @fr_pool:             connection fast registration poool
>    * @pi_support:          Indicate device T10-PI support
>    * @reg_cqe:             completion handler
> @@ -392,11 +376,12 @@ struct iser_fr_pool {
>   struct ib_conn {
>   	struct rdma_cm_id           *cma_id;
>   	struct ib_qp	            *qp;
> +	struct ib_cq		    *cq;
> +	u32			    cq_size;
>   	int                          post_recv_buf_count;
>   	u8                           sig_count;
>   	struct ib_recv_wr	     rx_wr[ISER_MIN_POSTED_RX];
>   	struct iser_device          *device;
> -	struct iser_comp	    *comp;
>   	struct iser_fr_pool          fr_pool;
>   	bool			     pi_support;
>   	struct ib_cqe		     reg_cqe;
> diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
> index c1f44c4..699e075 100644
> --- a/drivers/infiniband/ulp/iser/iser_verbs.c
> +++ b/drivers/infiniband/ulp/iser/iser_verbs.c
> @@ -68,59 +68,23 @@ static void iser_event_handler(struct ib_event_handler *handler,
>   static int iser_create_device_ib_res(struct iser_device *device)
>   {
>   	struct ib_device *ib_dev = device->ib_device;
> -	int i, max_cqe;
>   
>   	if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
>   		iser_err("IB device does not support memory registrations\n");
>   		return -1;
>   	}
>   
> -	device->comps_used = min_t(int, num_online_cpus(),
> -				 ib_dev->num_comp_vectors);
> -
> -	device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
> -				GFP_KERNEL);
> -	if (!device->comps)
> -		goto comps_err;
> -
> -	max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
> -
> -	iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
> -		  device->comps_used, dev_name(&ib_dev->dev),
> -		  ib_dev->num_comp_vectors, max_cqe);
> -
>   	device->pd = ib_alloc_pd(ib_dev,
>   		iser_always_reg ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY);
>   	if (IS_ERR(device->pd))
>   		goto pd_err;
>   
> -	for (i = 0; i < device->comps_used; i++) {
> -		struct iser_comp *comp = &device->comps[i];
> -
> -		comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
> -				       IB_POLL_SOFTIRQ);
> -		if (IS_ERR(comp->cq)) {
> -			comp->cq = NULL;
> -			goto cq_err;
> -		}
> -	}
> -
>   	INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
>   			      iser_event_handler);
>   	ib_register_event_handler(&device->event_handler);
>   	return 0;
>   
> -cq_err:
> -	for (i = 0; i < device->comps_used; i++) {
> -		struct iser_comp *comp = &device->comps[i];
> -
> -		if (comp->cq)
> -			ib_free_cq(comp->cq);
> -	}
> -	ib_dealloc_pd(device->pd);
>   pd_err:
> -	kfree(device->comps);
> -comps_err:
>   	iser_err("failed to allocate an IB resource\n");
>   	return -1;
>   }
> @@ -131,20 +95,9 @@ static int iser_create_device_ib_res(struct iser_device *device)
>    */
>   static void iser_free_device_ib_res(struct iser_device *device)
>   {
> -	int i;
> -
> -	for (i = 0; i < device->comps_used; i++) {
> -		struct iser_comp *comp = &device->comps[i];
> -
> -		ib_free_cq(comp->cq);
> -		comp->cq = NULL;
> -	}
> -
>   	ib_unregister_event_handler(&device->event_handler);
>   	ib_dealloc_pd(device->pd);
>   
> -	kfree(device->comps);
> -	device->comps = NULL;
>   	device->pd = NULL;
>   }
>   
> @@ -287,70 +240,57 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
>   	struct ib_device	*ib_dev;
>   	struct ib_qp_init_attr	init_attr;
>   	int			ret = -ENOMEM;
> -	int index, min_index = 0;
> +	unsigned int max_send_wr, cq_size;
>   
>   	BUG_ON(ib_conn->device == NULL);
>   
>   	device = ib_conn->device;
>   	ib_dev = device->ib_device;
>   
> -	memset(&init_attr, 0, sizeof init_attr);
> +	if (ib_conn->pi_support)
> +		max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
> +	else
> +		max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
> +	max_send_wr = min_t(unsigned int, max_send_wr,
> +			    (unsigned int)ib_dev->attrs.max_qp_wr);
>   
> -	mutex_lock(&ig.connlist_mutex);
> -	/* select the CQ with the minimal number of usages */
> -	for (index = 0; index < device->comps_used; index++) {
> -		if (device->comps[index].active_qps <
> -		    device->comps[min_index].active_qps)
> -			min_index = index;
> +	cq_size = max_send_wr + ISER_QP_MAX_RECV_DTOS;
> +	ib_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_SOFTIRQ);
> +	if (IS_ERR(ib_conn->cq)) {
> +		ret = PTR_ERR(ib_conn->cq);
> +		goto cq_err;
>   	}
> -	ib_conn->comp = &device->comps[min_index];
> -	ib_conn->comp->active_qps++;
> -	mutex_unlock(&ig.connlist_mutex);
> -	iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
> +	ib_conn->cq_size = cq_size;
> +
> +	memset(&init_attr, 0, sizeof(init_attr));
>   
>   	init_attr.event_handler = iser_qp_event_callback;
>   	init_attr.qp_context	= (void *)ib_conn;
> -	init_attr.send_cq	= ib_conn->comp->cq;
> -	init_attr.recv_cq	= ib_conn->comp->cq;
> +	init_attr.send_cq	= ib_conn->cq;
> +	init_attr.recv_cq	= ib_conn->cq;
>   	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
>   	init_attr.cap.max_send_sge = 2;
>   	init_attr.cap.max_recv_sge = 1;
>   	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;
>   	init_attr.qp_type	= IB_QPT_RC;
> -	if (ib_conn->pi_support) {
> -		init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
> +	init_attr.cap.max_send_wr = max_send_wr;
> +	if (ib_conn->pi_support)
>   		init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
> -		iser_conn->max_cmds =
> -			ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
> -	} else {
> -		if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
> -			init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
> -			iser_conn->max_cmds =
> -				ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
> -		} else {
> -			init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
> -			iser_conn->max_cmds =
> -				ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
> -			iser_dbg("device %s supports max_send_wr %d\n",
> -				 dev_name(&device->ib_device->dev),
> -				 ib_dev->attrs.max_qp_wr);
> -		}
> -	}
> +	iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(max_send_wr - 1);
>   
>   	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
>   	if (ret)
>   		goto out_err;
>   
>   	ib_conn->qp = ib_conn->cma_id->qp;
> -	iser_info("setting conn %p cma_id %p qp %p\n",
> +	iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n",
>   		  ib_conn, ib_conn->cma_id,
> -		  ib_conn->cma_id->qp);
> +		  ib_conn->cma_id->qp, max_send_wr);
>   	return ret;
>   
>   out_err:
> -	mutex_lock(&ig.connlist_mutex);
> -	ib_conn->comp->active_qps--;
> -	mutex_unlock(&ig.connlist_mutex);
> +	ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size);
> +cq_err:
>   	iser_err("unable to alloc mem or create resource, err %d\n", ret);
>   
>   	return ret;
> @@ -462,10 +402,8 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
>   		  iser_conn, ib_conn->cma_id, ib_conn->qp);
>   
>   	if (ib_conn->qp != NULL) {
> -		mutex_lock(&ig.connlist_mutex);
> -		ib_conn->comp->active_qps--;
> -		mutex_unlock(&ig.connlist_mutex);
>   		rdma_destroy_qp(ib_conn->cma_id);
> +		ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size);
>   		ib_conn->qp = NULL;
>   	}
>   

  parent reply index

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-22 13:56 Max Gurtovoy
2020-07-22 13:56 ` [PATCH 2/3] IB/isert: " Max Gurtovoy
2020-07-29  9:39   ` Sagi Grimberg
2020-07-22 13:56 ` [PATCH 3/3] IB/srpt: " Max Gurtovoy
2020-07-22 15:02   ` Bart Van Assche
2020-07-29  9:40   ` Sagi Grimberg
2020-07-29  8:34 ` Max Gurtovoy [this message]
2020-07-29 11:34   ` [PATCH 1/3] IB/iser: " Leon Romanovsky
2020-07-29  9:39 ` Sagi Grimberg
2020-07-29 12:23 ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b922a13b-592b-0c03-bd0e-c7b6c7d4a54e@mellanox.com \
    --to=maxg@mellanox.com \
    --cc=bvanassche@acm.org \
    --cc=dledford@redhat.com \
    --cc=idanb@mellanox.com \
    --cc=israelr@mellanox.com \
    --cc=jgg@mellanox.com \
    --cc=leon@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=oren@mellanox.com \
    --cc=sagi@grimberg.me \
    --cc=yaminf@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git