Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: Jack Wang <jinpuwang@gmail.com>,
	linux-block@vger.kernel.org, linux-rdma@vger.kernel.org
Cc: axboe@kernel.dk, hch@infradead.org, sagi@grimberg.me,
	jgg@mellanox.com, dledford@redhat.com,
	danil.kipnis@cloud.ionos.com, rpenyaev@suse.de,
	Jack Wang <jinpu.wang@cloud.ionos.com>
Subject: Re: [PATCH v4 06/25] ibtrs: client: main functionality
Date: Mon, 23 Sep 2019 14:51:43 -0700
Message-ID: <d0bc1253-4f3d-981b-97f1-e44900fffb44@acm.org> (raw)
In-Reply-To: <20190620150337.7847-7-jinpuwang@gmail.com>

On 6/20/19 8:03 AM, Jack Wang wrote:
> +static const struct ibtrs_ib_dev_pool_ops dev_pool_ops;
> +static struct ibtrs_ib_dev_pool dev_pool = {
> +	.ops = &dev_pool_ops
> +};

Can the definitions in this file be reordered such that the forward 
declaration of dev_pool_ops can be removed?

> +static void ibtrs_rdma_error_recovery(struct ibtrs_clt_con *con);
> +static int ibtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
> +				     struct rdma_cm_event *ev);
> +static void ibtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc);
> +static void complete_rdma_req(struct ibtrs_clt_io_req *req, int errno,
> +			      bool notify, bool can_wait);
> +static int ibtrs_clt_write_req(struct ibtrs_clt_io_req *req);
> +static int ibtrs_clt_read_req(struct ibtrs_clt_io_req *req);

Please also remove these forward declarations.

> +bool ibtrs_clt_sess_is_connected(const struct ibtrs_clt_sess *sess)
> +{
> +	return sess->state == IBTRS_CLT_CONNECTED;
> +}

Is it really useful to introduce a one line function for testing the 
session state?

> +static inline struct ibtrs_tag *
> +__ibtrs_get_tag(struct ibtrs_clt *clt, enum ibtrs_clt_con_type con_type)
> +{
> +	size_t max_depth = clt->queue_depth;
> +	struct ibtrs_tag *tag;
> +	int cpu, bit;
> +
> +	cpu = get_cpu();
> +	do {
> +		bit = find_first_zero_bit(clt->tags_map, max_depth);
> +		if (unlikely(bit >= max_depth)) {
> +			put_cpu();
> +			return NULL;
> +		}
> +
> +	} while (unlikely(test_and_set_bit_lock(bit, clt->tags_map)));
> +	put_cpu();
> +
> +	tag = GET_TAG(clt, bit);
> +	WARN_ON(tag->mem_id != bit);
> +	tag->cpu_id = cpu;
> +	tag->con_type = con_type;
> +
> +	return tag;
> +}

What is the role of the get_cpu() and put_cpu() calls in this function? 
How can it make sense to assign the cpu number to tag->cpu_id after 
put_cpu() has been called?

> +static inline void ibtrs_clt_init_req(struct ibtrs_clt_io_req *req,
> +				      struct ibtrs_clt_sess *sess,
> +				      ibtrs_conf_fn *conf,
> +				      struct ibtrs_tag *tag, void *priv,
> +				      const struct kvec *vec, size_t usr_len,
> +				      struct scatterlist *sg, size_t sg_cnt,
> +				      size_t data_len, int dir)
> +{
> +	struct iov_iter iter;
> +	size_t len;
> +
> +	req->tag = tag;
> +	req->in_use = true;
> +	req->usr_len = usr_len;
> +	req->data_len = data_len;
> +	req->sglist = sg;
> +	req->sg_cnt = sg_cnt;
> +	req->priv = priv;
> +	req->dir = dir;
> +	req->con = ibtrs_tag_to_clt_con(sess, tag);
> +	req->conf = conf;
> +	req->need_inv = false;
> +	req->need_inv_comp = false;
> +	req->inv_errno = 0;
> +
> +	iov_iter_kvec(&iter, READ, vec, 1, usr_len);
> +	len = _copy_from_iter(req->iu->buf, usr_len, &iter);
> +	WARN_ON(len != usr_len);
> +
> +	reinit_completion(&req->inv_comp);
> +	if (sess->stats.enable_rdma_lat)
> +		req->start_jiffies = jiffies;
> +}

A comment that explains what "req" stands for would be welcome. Since 
this function copies the entire payload, I assume that it is only used 
for control messages and not for reading or writing data from a block 
device?

> +static int ibtrs_clt_failover_req(struct ibtrs_clt *clt,
> +				  struct ibtrs_clt_io_req *fail_req)
> +{
> +	struct ibtrs_clt_sess *alive_sess;
> +	struct ibtrs_clt_io_req *req;
> +	int err = -ECONNABORTED;
> +	struct path_it it;
> +
> +	do_each_path(alive_sess, clt, &it) {
> +		if (unlikely(alive_sess->state != IBTRS_CLT_CONNECTED))
> +			continue;
> +		req = ibtrs_clt_get_copy_req(alive_sess, fail_req);
> +		if (req->dir == DMA_TO_DEVICE)
> +			err = ibtrs_clt_write_req(req);
> +		else
> +			err = ibtrs_clt_read_req(req);
> +		if (unlikely(err)) {
> +			req->in_use = false;
> +			continue;
> +		}
> +		/* Success path */
> +		ibtrs_clt_inc_failover_cnt(&alive_sess->stats);
> +		break;
> +	} while_each_path(&it);
> +
> +	return err;
> +}

Also for this function, a comment that explains the purpose of this 
function would be welcome.

> +static void fail_all_outstanding_reqs(struct ibtrs_clt_sess *sess)
> +{
> +	struct ibtrs_clt *clt = sess->clt;
> +	struct ibtrs_clt_io_req *req;
> +	int i, err;
> +
> +	if (!sess->reqs)
> +		return;
> +	for (i = 0; i < sess->queue_depth; ++i) {
> +		req = &sess->reqs[i];
> +		if (!req->in_use)
> +			continue;
> +
> +		/*
> +		 * Safely (without notification) complete failed request.
> +		 * After completion this request is still usebale and can
> +		 * be failovered to another path.
> +		 */
> +		complete_rdma_req(req, -ECONNABORTED, false, true);
> +
> +		err = ibtrs_clt_failover_req(clt, req);
> +		if (unlikely(err))
> +			/* Failover failed, notify anyway */
> +			req->conf(req->priv, err);
> +	}
> +}

What guarantees that this function does not call complete_rdma_req() 
while complete_rdma_req() is called from the regular completion path?

> +static bool __ibtrs_clt_change_state(struct ibtrs_clt_sess *sess,
> +				     enum ibtrs_clt_state new_state)
> +{
> +	enum ibtrs_clt_state old_state;
> +	bool changed = false;
> +
> +	old_state = sess->state;
> +	switch (new_state) {

Please use lockdep_assert_held() inside this function to verify at 
runtime that session state changes are serialized properly.

> +static enum ibtrs_clt_state ibtrs_clt_state(struct ibtrs_clt_sess *sess)
> +{
> +	enum ibtrs_clt_state state;
> +
> +	spin_lock_irq(&sess->state_wq.lock);
> +	state = sess->state;
> +	spin_unlock_irq(&sess->state_wq.lock);
> +
> +	return state;
> +}

Please remove this function and read sess->state without holding 
state_wq.lock.

> +static void ibtrs_clt_hb_err_handler(struct ibtrs_con *c, int err)
> +{
> +	struct ibtrs_clt_con *con;
> +
> +	(void)err;
> +	con = container_of(c, typeof(*con), c);
> +	ibtrs_rdma_error_recovery(con);
> +}

Can "(void)err" be left out?

Can the declaration and assignment of 'con' be merged into a single line 
of code?

> +static int create_con(struct ibtrs_clt_sess *sess, unsigned int cid)
> +{
> +	struct ibtrs_clt_con *con;
> +
> +	con = kzalloc(sizeof(*con), GFP_KERNEL);
> +	if (unlikely(!con))
> +		return -ENOMEM;
> +
> +	/* Map first two connections to the first CPU */
> +	con->cpu  = (cid ? cid - 1 : 0) % nr_cpu_ids;
> +	con->c.cid = cid;
> +	con->c.sess = &sess->s;
> +	atomic_set(&con->io_cnt, 0);
> +
> +	sess->s.con[cid] = &con->c;
> +
> +	return 0;
> +}

The code to map a connection ID to onto a CPU occurs multiple times. Has 
it been considered to introduce a function for that mapping? Although 
one-line inline functions are not recommended in general, such a 
function will also make it easier to experiment with other mapping 
approaches, e.g. mapping hypertread siblings onto the same connection ID.

> +static inline bool xchg_sessions(struct ibtrs_clt_sess __rcu **rcu_ppcpu_path,
> +				 struct ibtrs_clt_sess *sess,
> +				 struct ibtrs_clt_sess *next)
> +{
> +	struct ibtrs_clt_sess **ppcpu_path;
> +
> +	/* Call cmpxchg() without sparse warnings */
> +	ppcpu_path = (typeof(ppcpu_path))rcu_ppcpu_path;
> +	return (sess == cmpxchg(ppcpu_path, sess, next));
> +}

This looks suspicious. Has it been considered to protect changes of 
rcu_ppcpu_path with a mutex and to protect reads with an RCU read lock?

> +static void ibtrs_clt_add_path_to_arr(struct ibtrs_clt_sess *sess,
> +				      struct ibtrs_addr *addr)
> +{
> +	struct ibtrs_clt *clt = sess->clt;
> +
> +	mutex_lock(&clt->paths_mutex);
> +	clt->paths_num++;
> +
> +	/*
> +	 * Firstly increase paths_num, wait for GP and then
> +	 * add path to the list.  Why?  Since we add path with
> +	 * !CONNECTED state explanation is similar to what has
> +	 * been written in ibtrs_clt_remove_path_from_arr().
> +	 */
> +	synchronize_rcu();
> +
> +	list_add_tail_rcu(&sess->s.entry, &clt->paths_list);
> +	mutex_unlock(&clt->paths_mutex);
> +}

synchronize_rcu() while a mutex is being held? Really?

> +static void ibtrs_clt_close_work(struct work_struct *work)
> +{
> +	struct ibtrs_clt_sess *sess;
> +
> +	sess = container_of(work, struct ibtrs_clt_sess, close_work);
> +
> +	cancel_delayed_work_sync(&sess->reconnect_dwork);
> +	ibtrs_clt_stop_and_destroy_conns(sess);
> +	/*
> +	 * Sounds stupid, huh?  No, it is not.  Consider this sequence:
> +	 *
> +	 *   #CPU0                              #CPU1
> +	 *   1.  CONNECTED->RECONNECTING
> +	 *   2.                                 RECONNECTING->CLOSING
> +	 *   3.  queue_work(&reconnect_dwork)
> +	 *   4.                                 queue_work(&close_work);
> +	 *   5.  reconnect_work();              close_work();
> +	 *
> +	 * To avoid that case do cancel twice: before and after.
> +	 */
> +	cancel_delayed_work_sync(&sess->reconnect_dwork);
> +	ibtrs_clt_change_state(sess, IBTRS_CLT_CLOSED);
> +}

The above code looks suspicious to me. I think there should be an 
additional state change at the start of this function to prevent that 
reconnect_dwork gets requeued after having been canceled.

> +static void ibtrs_clt_dev_release(struct device *dev)
> +{
> +	/* Nobody plays with device references, so nop */
> +}

That comment sounds wrong. Have you reviewed all of the device driver 
core code and checked that there is no code in there that manipulates 
struct device refcounts? I think the code that frees struct ibtrs_clt 
should be moved from free_clt() into the above function.

Bart.

  parent reply index

Thread overview: 123+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20190620150337.7847-1-jinpuwang@gmail.com>
2019-06-20 15:03 ` [PATCH v4 01/25] sysfs: export sysfs_remove_file_self() Jack Wang
2019-09-23 17:21   ` Bart Van Assche
2019-09-25  9:30     ` Danil Kipnis
2019-07-09  9:55 ` [PATCH v4 00/25] InfiniBand Transport (IBTRS) and Network Block Device (IBNBD) Danil Kipnis
2019-07-09 11:00   ` Leon Romanovsky
2019-07-09 11:17     ` Greg KH
2019-07-09 11:57       ` Jinpu Wang
2019-07-09 13:32       ` Leon Romanovsky
2019-07-09 15:39       ` Bart Van Assche
2019-07-09 11:37     ` Jinpu Wang
2019-07-09 12:06       ` Jason Gunthorpe
2019-07-09 13:15         ` Jinpu Wang
2019-07-09 13:19           ` Jason Gunthorpe
2019-07-09 14:17             ` Jinpu Wang
2019-07-09 21:27             ` Sagi Grimberg
2019-07-19 13:12               ` Danil Kipnis
2019-07-10 14:55     ` Danil Kipnis
2019-07-09 12:04   ` Jason Gunthorpe
2019-07-09 19:45   ` Sagi Grimberg
2019-07-10 13:55     ` Jason Gunthorpe
2019-07-10 16:25       ` Sagi Grimberg
2019-07-10 17:25         ` Jason Gunthorpe
2019-07-10 19:11           ` Sagi Grimberg
2019-07-11  7:27             ` Danil Kipnis
2019-07-11  8:54     ` Danil Kipnis
2019-07-12  0:22       ` Sagi Grimberg
2019-07-12  7:57         ` Jinpu Wang
2019-07-12 19:40           ` Sagi Grimberg
2019-07-15 11:21             ` Jinpu Wang
2019-07-12 10:58         ` Danil Kipnis
     [not found] ` <20190620150337.7847-26-jinpuwang@gmail.com>
2019-07-09 15:10   ` [PATCH v4 25/25] MAINTAINERS: Add maintainer for IBNBD/IBTRS modules Leon Romanovsky
2019-07-09 15:18     ` Jinpu Wang
2019-07-09 15:51       ` Leon Romanovsky
2019-09-13 23:56   ` Bart Van Assche
2019-09-19 10:30     ` Jinpu Wang
     [not found] ` <20190620150337.7847-16-jinpuwang@gmail.com>
2019-09-13 22:10   ` [PATCH v4 15/25] ibnbd: private headers with IBNBD protocol structs and helpers Bart Van Assche
2019-09-15 14:30     ` Jinpu Wang
2019-09-16  5:27       ` Leon Romanovsky
2019-09-16 13:45         ` Bart Van Assche
2019-09-17 15:41           ` Leon Romanovsky
2019-09-17 15:52             ` Jinpu Wang
2019-09-16  7:08       ` Danil Kipnis
2019-09-16 14:57       ` Jinpu Wang
2019-09-16 17:25         ` Bart Van Assche
2019-09-17 12:27           ` Jinpu Wang
2019-09-16 15:39       ` Jinpu Wang
2019-09-18 15:26         ` Bart Van Assche
2019-09-18 16:11           ` Jinpu Wang
     [not found] ` <20190620150337.7847-17-jinpuwang@gmail.com>
2019-09-13 22:25   ` [PATCH v4 16/25] ibnbd: client: private header with client structs and functions Bart Van Assche
2019-09-17 16:36     ` Jinpu Wang
2019-09-25 23:43       ` Danil Kipnis
2019-09-26 10:00         ` Jinpu Wang
     [not found] ` <20190620150337.7847-18-jinpuwang@gmail.com>
2019-09-13 23:46   ` [PATCH v4 17/25] ibnbd: client: main functionality Bart Van Assche
2019-09-16 14:17     ` Danil Kipnis
2019-09-16 16:46       ` Bart Van Assche
2019-09-17 11:39         ` Danil Kipnis
2019-09-18  7:14           ` Danil Kipnis
2019-09-18 15:47             ` Bart Van Assche
2019-09-20  8:29               ` Danil Kipnis
2019-09-25 22:26               ` Danil Kipnis
2019-09-26  9:55                 ` Roman Penyaev
2019-09-26 15:01                   ` Bart Van Assche
2019-09-27  8:52                     ` Roman Penyaev
2019-09-27  9:32                       ` Danil Kipnis
2019-09-27 12:18                         ` Danil Kipnis
2019-09-27 16:37                       ` Bart Van Assche
2019-09-27 16:50                         ` Roman Penyaev
2019-09-27 17:16                           ` Bart Van Assche
2019-09-17 13:09     ` Jinpu Wang
2019-09-17 16:46       ` Bart Van Assche
2019-09-18 12:02         ` Jinpu Wang
2019-09-18 16:05     ` Jinpu Wang
2019-09-14  0:00   ` Bart Van Assche
     [not found] ` <20190620150337.7847-25-jinpuwang@gmail.com>
2019-09-13 23:58   ` [PATCH v4 24/25] ibnbd: a bit of documentation Bart Van Assche
2019-09-18 12:22     ` Jinpu Wang
     [not found] ` <20190620150337.7847-19-jinpuwang@gmail.com>
2019-09-18 16:28   ` [PATCH v4 18/25] ibnbd: client: sysfs interface functions Bart Van Assche
2019-09-19 15:55     ` Jinpu Wang
     [not found] ` <20190620150337.7847-21-jinpuwang@gmail.com>
2019-09-18 17:41   ` [PATCH v4 20/25] ibnbd: server: main functionality Bart Van Assche
2019-09-20  7:36     ` Danil Kipnis
2019-09-20 15:42       ` Bart Van Assche
2019-09-23 15:19         ` Danil Kipnis
     [not found] ` <20190620150337.7847-22-jinpuwang@gmail.com>
2019-09-18 21:46   ` [PATCH v4 21/25] ibnbd: server: functionality for IO submission to file or block dev Bart Van Assche
2019-09-26 14:04     ` Jinpu Wang
2019-09-26 15:11       ` Bart Van Assche
2019-09-26 15:25         ` Danil Kipnis
2019-09-26 15:29           ` Bart Van Assche
2019-09-26 15:38             ` Danil Kipnis
2019-09-26 15:42               ` Jinpu Wang
     [not found] ` <20190620150337.7847-3-jinpuwang@gmail.com>
2019-09-23 17:44   ` [PATCH v4 02/25] ibtrs: public interface header to establish RDMA connections Bart Van Assche
2019-09-25 10:20     ` Danil Kipnis
2019-09-25 15:38       ` Bart Van Assche
     [not found] ` <20190620150337.7847-7-jinpuwang@gmail.com>
2019-09-23 21:51   ` Bart Van Assche [this message]
2019-09-25 17:36     ` [PATCH v4 06/25] ibtrs: client: main functionality Danil Kipnis
2019-09-25 18:55       ` Bart Van Assche
2019-09-25 20:50         ` Danil Kipnis
2019-09-25 21:08           ` Bart Van Assche
2019-09-25 21:16             ` Bart Van Assche
2019-09-25 22:53             ` Danil Kipnis
2019-09-25 23:21               ` Bart Van Assche
2019-09-26  9:16                 ` Danil Kipnis
     [not found] ` <20190620150337.7847-4-jinpuwang@gmail.com>
2019-09-23 22:50   ` [PATCH v4 03/25] ibtrs: private headers with IBTRS protocol structs and helpers Bart Van Assche
2019-09-25 21:45     ` Danil Kipnis
2019-09-25 21:57       ` Bart Van Assche
2019-09-27  8:56     ` Jinpu Wang
     [not found] ` <20190620150337.7847-5-jinpuwang@gmail.com>
2019-09-23 23:03   ` [PATCH v4 04/25] ibtrs: core: lib functions shared between client and server modules Bart Van Assche
2019-09-27 10:13     ` Jinpu Wang
     [not found] ` <20190620150337.7847-6-jinpuwang@gmail.com>
2019-09-23 23:05   ` [PATCH v4 05/25] ibtrs: client: private header with client structs and functions Bart Van Assche
2019-09-27 10:18     ` Jinpu Wang
     [not found] ` <20190620150337.7847-8-jinpuwang@gmail.com>
2019-09-23 23:15   ` [PATCH v4 07/25] ibtrs: client: statistics functions Bart Van Assche
2019-09-27 12:00     ` Jinpu Wang
     [not found] ` <20190620150337.7847-10-jinpuwang@gmail.com>
2019-09-23 23:21   ` [PATCH v4 09/25] ibtrs: server: private header with server structs and functions Bart Van Assche
2019-09-27 12:04     ` Jinpu Wang
     [not found] ` <20190620150337.7847-11-jinpuwang@gmail.com>
2019-09-23 23:49   ` [PATCH v4 10/25] ibtrs: server: main functionality Bart Van Assche
2019-09-27 15:03     ` Jinpu Wang
2019-09-27 15:11       ` Bart Van Assche
2019-09-27 15:19         ` Jinpu Wang
     [not found] ` <20190620150337.7847-12-jinpuwang@gmail.com>
2019-09-23 23:56   ` [PATCH v4 11/25] ibtrs: server: statistics functions Bart Van Assche
2019-10-02 15:15     ` Jinpu Wang
2019-10-02 15:42       ` Leon Romanovsky
2019-10-02 15:45         ` Jinpu Wang
2019-10-02 16:00           ` Leon Romanovsky
     [not found] ` <20190620150337.7847-13-jinpuwang@gmail.com>
2019-09-24  0:00   ` [PATCH v4 12/25] ibtrs: server: sysfs interface functions Bart Van Assche
2019-10-02 15:11     ` Jinpu Wang

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d0bc1253-4f3d-981b-97f1-e44900fffb44@acm.org \
    --to=bvanassche@acm.org \
    --cc=axboe@kernel.dk \
    --cc=danil.kipnis@cloud.ionos.com \
    --cc=dledford@redhat.com \
    --cc=hch@infradead.org \
    --cc=jgg@mellanox.com \
    --cc=jinpu.wang@cloud.ionos.com \
    --cc=jinpuwang@gmail.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=rpenyaev@suse.de \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git