From: Bart Van Assche <bvanassche@acm.org>
To: Jack Wang <jinpuwang@gmail.com>,
linux-block@vger.kernel.org, linux-rdma@vger.kernel.org
Cc: axboe@kernel.dk, hch@infradead.org, sagi@grimberg.me,
jgg@mellanox.com, dledford@redhat.com,
danil.kipnis@cloud.ionos.com, rpenyaev@suse.de,
Jack Wang <jinpu.wang@cloud.ionos.com>
Subject: Re: [PATCH v4 06/25] ibtrs: client: main functionality
Date: Mon, 23 Sep 2019 14:51:43 -0700 [thread overview]
Message-ID: <d0bc1253-4f3d-981b-97f1-e44900fffb44@acm.org> (raw)
In-Reply-To: <20190620150337.7847-7-jinpuwang@gmail.com>
On 6/20/19 8:03 AM, Jack Wang wrote:
> +static const struct ibtrs_ib_dev_pool_ops dev_pool_ops;
> +static struct ibtrs_ib_dev_pool dev_pool = {
> + .ops = &dev_pool_ops
> +};
Can the definitions in this file be reordered such that the forward
declaration of dev_pool_ops can be removed?
> +static void ibtrs_rdma_error_recovery(struct ibtrs_clt_con *con);
> +static int ibtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
> + struct rdma_cm_event *ev);
> +static void ibtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc);
> +static void complete_rdma_req(struct ibtrs_clt_io_req *req, int errno,
> + bool notify, bool can_wait);
> +static int ibtrs_clt_write_req(struct ibtrs_clt_io_req *req);
> +static int ibtrs_clt_read_req(struct ibtrs_clt_io_req *req);
Please also remove these forward declarations.
> +bool ibtrs_clt_sess_is_connected(const struct ibtrs_clt_sess *sess)
> +{
> + return sess->state == IBTRS_CLT_CONNECTED;
> +}
Is it really useful to introduce a one line function for testing the
session state?
> +static inline struct ibtrs_tag *
> +__ibtrs_get_tag(struct ibtrs_clt *clt, enum ibtrs_clt_con_type con_type)
> +{
> + size_t max_depth = clt->queue_depth;
> + struct ibtrs_tag *tag;
> + int cpu, bit;
> +
> + cpu = get_cpu();
> + do {
> + bit = find_first_zero_bit(clt->tags_map, max_depth);
> + if (unlikely(bit >= max_depth)) {
> + put_cpu();
> + return NULL;
> + }
> +
> + } while (unlikely(test_and_set_bit_lock(bit, clt->tags_map)));
> + put_cpu();
> +
> + tag = GET_TAG(clt, bit);
> + WARN_ON(tag->mem_id != bit);
> + tag->cpu_id = cpu;
> + tag->con_type = con_type;
> +
> + return tag;
> +}
What is the role of the get_cpu() and put_cpu() calls in this function?
How can it make sense to assign the cpu number to tag->cpu_id after
put_cpu() has been called?
> +static inline void ibtrs_clt_init_req(struct ibtrs_clt_io_req *req,
> + struct ibtrs_clt_sess *sess,
> + ibtrs_conf_fn *conf,
> + struct ibtrs_tag *tag, void *priv,
> + const struct kvec *vec, size_t usr_len,
> + struct scatterlist *sg, size_t sg_cnt,
> + size_t data_len, int dir)
> +{
> + struct iov_iter iter;
> + size_t len;
> +
> + req->tag = tag;
> + req->in_use = true;
> + req->usr_len = usr_len;
> + req->data_len = data_len;
> + req->sglist = sg;
> + req->sg_cnt = sg_cnt;
> + req->priv = priv;
> + req->dir = dir;
> + req->con = ibtrs_tag_to_clt_con(sess, tag);
> + req->conf = conf;
> + req->need_inv = false;
> + req->need_inv_comp = false;
> + req->inv_errno = 0;
> +
> + iov_iter_kvec(&iter, READ, vec, 1, usr_len);
> + len = _copy_from_iter(req->iu->buf, usr_len, &iter);
> + WARN_ON(len != usr_len);
> +
> + reinit_completion(&req->inv_comp);
> + if (sess->stats.enable_rdma_lat)
> + req->start_jiffies = jiffies;
> +}
A comment that explains what "req" stands for would be welcome. Since
this function copies the entire payload, I assume that it is only used
for control messages and not for reading or writing data from a block
device?
> +static int ibtrs_clt_failover_req(struct ibtrs_clt *clt,
> + struct ibtrs_clt_io_req *fail_req)
> +{
> + struct ibtrs_clt_sess *alive_sess;
> + struct ibtrs_clt_io_req *req;
> + int err = -ECONNABORTED;
> + struct path_it it;
> +
> + do_each_path(alive_sess, clt, &it) {
> + if (unlikely(alive_sess->state != IBTRS_CLT_CONNECTED))
> + continue;
> + req = ibtrs_clt_get_copy_req(alive_sess, fail_req);
> + if (req->dir == DMA_TO_DEVICE)
> + err = ibtrs_clt_write_req(req);
> + else
> + err = ibtrs_clt_read_req(req);
> + if (unlikely(err)) {
> + req->in_use = false;
> + continue;
> + }
> + /* Success path */
> + ibtrs_clt_inc_failover_cnt(&alive_sess->stats);
> + break;
> + } while_each_path(&it);
> +
> + return err;
> +}
Also for this function, a comment that explains the purpose of this
function would be welcome.
> +static void fail_all_outstanding_reqs(struct ibtrs_clt_sess *sess)
> +{
> + struct ibtrs_clt *clt = sess->clt;
> + struct ibtrs_clt_io_req *req;
> + int i, err;
> +
> + if (!sess->reqs)
> + return;
> + for (i = 0; i < sess->queue_depth; ++i) {
> + req = &sess->reqs[i];
> + if (!req->in_use)
> + continue;
> +
> + /*
> + * Safely (without notification) complete failed request.
> + * After completion this request is still usebale and can
> + * be failovered to another path.
> + */
> + complete_rdma_req(req, -ECONNABORTED, false, true);
> +
> + err = ibtrs_clt_failover_req(clt, req);
> + if (unlikely(err))
> + /* Failover failed, notify anyway */
> + req->conf(req->priv, err);
> + }
> +}
What guarantees that this function does not call complete_rdma_req()
while complete_rdma_req() is called from the regular completion path?
> +static bool __ibtrs_clt_change_state(struct ibtrs_clt_sess *sess,
> + enum ibtrs_clt_state new_state)
> +{
> + enum ibtrs_clt_state old_state;
> + bool changed = false;
> +
> + old_state = sess->state;
> + switch (new_state) {
Please use lockdep_assert_held() inside this function to verify at
runtime that session state changes are serialized properly.
> +static enum ibtrs_clt_state ibtrs_clt_state(struct ibtrs_clt_sess *sess)
> +{
> + enum ibtrs_clt_state state;
> +
> + spin_lock_irq(&sess->state_wq.lock);
> + state = sess->state;
> + spin_unlock_irq(&sess->state_wq.lock);
> +
> + return state;
> +}
Please remove this function and read sess->state without holding
state_wq.lock.
> +static void ibtrs_clt_hb_err_handler(struct ibtrs_con *c, int err)
> +{
> + struct ibtrs_clt_con *con;
> +
> + (void)err;
> + con = container_of(c, typeof(*con), c);
> + ibtrs_rdma_error_recovery(con);
> +}
Can "(void)err" be left out?
Can the declaration and assignment of 'con' be merged into a single line
of code?
> +static int create_con(struct ibtrs_clt_sess *sess, unsigned int cid)
> +{
> + struct ibtrs_clt_con *con;
> +
> + con = kzalloc(sizeof(*con), GFP_KERNEL);
> + if (unlikely(!con))
> + return -ENOMEM;
> +
> + /* Map first two connections to the first CPU */
> + con->cpu = (cid ? cid - 1 : 0) % nr_cpu_ids;
> + con->c.cid = cid;
> + con->c.sess = &sess->s;
> + atomic_set(&con->io_cnt, 0);
> +
> + sess->s.con[cid] = &con->c;
> +
> + return 0;
> +}
The code to map a connection ID to onto a CPU occurs multiple times. Has
it been considered to introduce a function for that mapping? Although
one-line inline functions are not recommended in general, such a
function will also make it easier to experiment with other mapping
approaches, e.g. mapping hypertread siblings onto the same connection ID.
> +static inline bool xchg_sessions(struct ibtrs_clt_sess __rcu **rcu_ppcpu_path,
> + struct ibtrs_clt_sess *sess,
> + struct ibtrs_clt_sess *next)
> +{
> + struct ibtrs_clt_sess **ppcpu_path;
> +
> + /* Call cmpxchg() without sparse warnings */
> + ppcpu_path = (typeof(ppcpu_path))rcu_ppcpu_path;
> + return (sess == cmpxchg(ppcpu_path, sess, next));
> +}
This looks suspicious. Has it been considered to protect changes of
rcu_ppcpu_path with a mutex and to protect reads with an RCU read lock?
> +static void ibtrs_clt_add_path_to_arr(struct ibtrs_clt_sess *sess,
> + struct ibtrs_addr *addr)
> +{
> + struct ibtrs_clt *clt = sess->clt;
> +
> + mutex_lock(&clt->paths_mutex);
> + clt->paths_num++;
> +
> + /*
> + * Firstly increase paths_num, wait for GP and then
> + * add path to the list. Why? Since we add path with
> + * !CONNECTED state explanation is similar to what has
> + * been written in ibtrs_clt_remove_path_from_arr().
> + */
> + synchronize_rcu();
> +
> + list_add_tail_rcu(&sess->s.entry, &clt->paths_list);
> + mutex_unlock(&clt->paths_mutex);
> +}
synchronize_rcu() while a mutex is being held? Really?
> +static void ibtrs_clt_close_work(struct work_struct *work)
> +{
> + struct ibtrs_clt_sess *sess;
> +
> + sess = container_of(work, struct ibtrs_clt_sess, close_work);
> +
> + cancel_delayed_work_sync(&sess->reconnect_dwork);
> + ibtrs_clt_stop_and_destroy_conns(sess);
> + /*
> + * Sounds stupid, huh? No, it is not. Consider this sequence:
> + *
> + * #CPU0 #CPU1
> + * 1. CONNECTED->RECONNECTING
> + * 2. RECONNECTING->CLOSING
> + * 3. queue_work(&reconnect_dwork)
> + * 4. queue_work(&close_work);
> + * 5. reconnect_work(); close_work();
> + *
> + * To avoid that case do cancel twice: before and after.
> + */
> + cancel_delayed_work_sync(&sess->reconnect_dwork);
> + ibtrs_clt_change_state(sess, IBTRS_CLT_CLOSED);
> +}
The above code looks suspicious to me. I think there should be an
additional state change at the start of this function to prevent that
reconnect_dwork gets requeued after having been canceled.
> +static void ibtrs_clt_dev_release(struct device *dev)
> +{
> + /* Nobody plays with device references, so nop */
> +}
That comment sounds wrong. Have you reviewed all of the device driver
core code and checked that there is no code in there that manipulates
struct device refcounts? I think the code that frees struct ibtrs_clt
should be moved from free_clt() into the above function.
Bart.
next prev parent reply other threads:[~2019-09-23 21:51 UTC|newest]
Thread overview: 148+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-20 15:03 [PATCH v4 00/25] InfiniBand Transport (IBTRS) and Network Block Device (IBNBD) Jack Wang
2019-06-20 15:03 ` [PATCH v4 01/25] sysfs: export sysfs_remove_file_self() Jack Wang
2019-09-23 17:21 ` Bart Van Assche
2019-09-25 9:30 ` Danil Kipnis
2019-06-20 15:03 ` [PATCH v4 02/25] ibtrs: public interface header to establish RDMA connections Jack Wang
2019-09-23 17:44 ` Bart Van Assche
2019-09-25 10:20 ` Danil Kipnis
2019-09-25 15:38 ` Bart Van Assche
2019-06-20 15:03 ` [PATCH v4 03/25] ibtrs: private headers with IBTRS protocol structs and helpers Jack Wang
2019-09-23 22:50 ` Bart Van Assche
2019-09-25 21:45 ` Danil Kipnis
2019-09-25 21:57 ` Bart Van Assche
2019-09-27 8:56 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 04/25] ibtrs: core: lib functions shared between client and server modules Jack Wang
2019-09-23 23:03 ` Bart Van Assche
2019-09-27 10:13 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 05/25] ibtrs: client: private header with client structs and functions Jack Wang
2019-09-23 23:05 ` Bart Van Assche
2019-09-27 10:18 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 06/25] ibtrs: client: main functionality Jack Wang
2019-09-23 21:51 ` Bart Van Assche [this message]
2019-09-25 17:36 ` Danil Kipnis
2019-09-25 18:55 ` Bart Van Assche
2019-09-25 20:50 ` Danil Kipnis
2019-09-25 21:08 ` Bart Van Assche
2019-09-25 21:16 ` Bart Van Assche
2019-09-25 22:53 ` Danil Kipnis
2019-09-25 23:21 ` Bart Van Assche
2019-09-26 9:16 ` Danil Kipnis
2019-06-20 15:03 ` [PATCH v4 07/25] ibtrs: client: statistics functions Jack Wang
2019-09-23 23:15 ` Bart Van Assche
2019-09-27 12:00 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 08/25] ibtrs: client: sysfs interface functions Jack Wang
2019-06-20 15:03 ` [PATCH v4 09/25] ibtrs: server: private header with server structs and functions Jack Wang
2019-09-23 23:21 ` Bart Van Assche
2019-09-27 12:04 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 10/25] ibtrs: server: main functionality Jack Wang
2019-09-23 23:49 ` Bart Van Assche
2019-09-27 15:03 ` Jinpu Wang
2019-09-27 15:11 ` Bart Van Assche
2019-09-27 15:19 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 11/25] ibtrs: server: statistics functions Jack Wang
2019-09-23 23:56 ` Bart Van Assche
2019-10-02 15:15 ` Jinpu Wang
2019-10-02 15:42 ` Leon Romanovsky
2019-10-02 15:45 ` Jinpu Wang
2019-10-02 16:00 ` Leon Romanovsky
2019-06-20 15:03 ` [PATCH v4 12/25] ibtrs: server: sysfs interface functions Jack Wang
2019-09-24 0:00 ` Bart Van Assche
2019-10-02 15:11 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 13/25] ibtrs: include client and server modules into kernel compilation Jack Wang
2019-06-20 15:03 ` [PATCH v4 14/25] ibtrs: a bit of documentation Jack Wang
2019-06-20 15:03 ` [PATCH v4 15/25] ibnbd: private headers with IBNBD protocol structs and helpers Jack Wang
2019-09-13 22:10 ` Bart Van Assche
2019-09-15 14:30 ` Jinpu Wang
2019-09-16 5:27 ` Leon Romanovsky
2019-09-16 13:45 ` Bart Van Assche
2019-09-17 15:41 ` Leon Romanovsky
2019-09-17 15:52 ` Jinpu Wang
2019-09-16 7:08 ` Danil Kipnis
2019-09-16 14:57 ` Jinpu Wang
2019-09-16 17:25 ` Bart Van Assche
2019-09-17 12:27 ` Jinpu Wang
2019-09-16 15:39 ` Jinpu Wang
2019-09-18 15:26 ` Bart Van Assche
2019-09-18 16:11 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 16/25] ibnbd: client: private header with client structs and functions Jack Wang
2019-09-13 22:25 ` Bart Van Assche
2019-09-17 16:36 ` Jinpu Wang
2019-09-25 23:43 ` Danil Kipnis
2019-09-26 10:00 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 17/25] ibnbd: client: main functionality Jack Wang
2019-09-13 23:46 ` Bart Van Assche
2019-09-16 14:17 ` Danil Kipnis
2019-09-16 16:46 ` Bart Van Assche
2019-09-17 11:39 ` Danil Kipnis
2019-09-18 7:14 ` Danil Kipnis
2019-09-18 15:47 ` Bart Van Assche
2019-09-20 8:29 ` Danil Kipnis
2019-09-25 22:26 ` Danil Kipnis
2019-09-26 9:55 ` Roman Penyaev
2019-09-26 15:01 ` Bart Van Assche
2019-09-27 8:52 ` Roman Penyaev
2019-09-27 9:32 ` Danil Kipnis
2019-09-27 12:18 ` Danil Kipnis
2019-09-27 16:37 ` Bart Van Assche
2019-09-27 16:50 ` Roman Penyaev
2019-09-27 17:16 ` Bart Van Assche
2019-09-17 13:09 ` Jinpu Wang
2019-09-17 16:46 ` Bart Van Assche
2019-09-18 12:02 ` Jinpu Wang
2019-09-18 16:05 ` Jinpu Wang
2019-09-14 0:00 ` Bart Van Assche
2019-06-20 15:03 ` [PATCH v4 18/25] ibnbd: client: sysfs interface functions Jack Wang
2019-09-18 16:28 ` Bart Van Assche
2019-09-19 15:55 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 19/25] ibnbd: server: private header with server structs and functions Jack Wang
2019-06-20 15:03 ` [PATCH v4 20/25] ibnbd: server: main functionality Jack Wang
2019-09-18 17:41 ` Bart Van Assche
2019-09-20 7:36 ` Danil Kipnis
2019-09-20 15:42 ` Bart Van Assche
2019-09-23 15:19 ` Danil Kipnis
2019-06-20 15:03 ` [PATCH v4 21/25] ibnbd: server: functionality for IO submission to file or block dev Jack Wang
2019-09-18 21:46 ` Bart Van Assche
2019-09-26 14:04 ` Jinpu Wang
2019-09-26 15:11 ` Bart Van Assche
2019-09-26 15:25 ` Danil Kipnis
2019-09-26 15:29 ` Bart Van Assche
2019-09-26 15:38 ` Danil Kipnis
2019-09-26 15:42 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 22/25] ibnbd: server: sysfs interface functions Jack Wang
2019-06-20 15:03 ` [PATCH v4 23/25] ibnbd: include client and server modules into kernel compilation Jack Wang
2019-06-20 15:03 ` [PATCH v4 24/25] ibnbd: a bit of documentation Jack Wang
2019-09-13 23:58 ` Bart Van Assche
2019-09-18 12:22 ` Jinpu Wang
2019-06-20 15:03 ` [PATCH v4 25/25] MAINTAINERS: Add maintainer for IBNBD/IBTRS modules Jack Wang
2019-07-09 15:10 ` Leon Romanovsky
2019-07-09 15:18 ` Jinpu Wang
2019-07-09 15:51 ` Leon Romanovsky
2019-09-13 23:56 ` Bart Van Assche
2019-09-19 10:30 ` Jinpu Wang
2019-07-09 9:55 ` [PATCH v4 00/25] InfiniBand Transport (IBTRS) and Network Block Device (IBNBD) Danil Kipnis
2019-07-09 11:00 ` Leon Romanovsky
2019-07-09 11:17 ` Greg KH
2019-07-09 11:57 ` Jinpu Wang
2019-07-09 13:32 ` Leon Romanovsky
2019-07-09 15:39 ` Bart Van Assche
2019-07-09 11:37 ` Jinpu Wang
2019-07-09 12:06 ` Jason Gunthorpe
2019-07-09 13:15 ` Jinpu Wang
2019-07-09 13:19 ` Jason Gunthorpe
2019-07-09 14:17 ` Jinpu Wang
2019-07-09 21:27 ` Sagi Grimberg
2019-07-19 13:12 ` Danil Kipnis
2019-07-10 14:55 ` Danil Kipnis
2019-07-09 12:04 ` Jason Gunthorpe
2019-07-09 19:45 ` Sagi Grimberg
2019-07-10 13:55 ` Jason Gunthorpe
2019-07-10 16:25 ` Sagi Grimberg
2019-07-10 17:25 ` Jason Gunthorpe
2019-07-10 19:11 ` Sagi Grimberg
2019-07-11 7:27 ` Danil Kipnis
2019-07-11 8:54 ` Danil Kipnis
2019-07-12 0:22 ` Sagi Grimberg
2019-07-12 7:57 ` Jinpu Wang
2019-07-12 19:40 ` Sagi Grimberg
2019-07-15 11:21 ` Jinpu Wang
2019-07-12 10:58 ` Danil Kipnis
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d0bc1253-4f3d-981b-97f1-e44900fffb44@acm.org \
--to=bvanassche@acm.org \
--cc=axboe@kernel.dk \
--cc=danil.kipnis@cloud.ionos.com \
--cc=dledford@redhat.com \
--cc=hch@infradead.org \
--cc=jgg@mellanox.com \
--cc=jinpu.wang@cloud.ionos.com \
--cc=jinpuwang@gmail.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=rpenyaev@suse.de \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).