Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: Jack Wang <jinpuwang@gmail.com>,
	linux-block@vger.kernel.org, linux-rdma@vger.kernel.org
Cc: axboe@kernel.dk, hch@infradead.org, sagi@grimberg.me,
	leon@kernel.org, dledford@redhat.com,
	danil.kipnis@cloud.ionos.com, jinpu.wang@cloud.ionos.com,
	rpenyaev@suse.de
Subject: Re: [PATCH v6 17/25] rnbd: client: main functionality
Date: Thu, 2 Jan 2020 15:55:09 -0800
Message-ID: <aa7eeeda-b3d7-4a26-9043-53ce8c80eef1@acm.org> (raw)
In-Reply-To: <20191230102942.18395-18-jinpuwang@gmail.com>

On 12/30/19 2:29 AM, Jack Wang wrote:
> +MODULE_DESCRIPTION("InfiniBand Network Block Device Client");

InfiniBand or RDMA?

> +static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
> +				  const struct rnbd_msg_open_rsp *rsp)
> +{
> +	struct rnbd_clt_session *sess = dev->sess;
> +
> +	if (unlikely(!rsp->logical_block_size))
> +		return -EINVAL;
> +
> +	dev->device_id		    = le32_to_cpu(rsp->device_id);
> +	dev->nsectors		    = le64_to_cpu(rsp->nsectors);
> +	dev->logical_block_size	    = le16_to_cpu(rsp->logical_block_size);
> +	dev->physical_block_size    = le16_to_cpu(rsp->physical_block_size);
> +	dev->max_write_same_sectors = le32_to_cpu(rsp->max_write_same_sectors);
> +	dev->max_discard_sectors    = le32_to_cpu(rsp->max_discard_sectors);
> +	dev->discard_granularity    = le32_to_cpu(rsp->discard_granularity);
> +	dev->discard_alignment	    = le32_to_cpu(rsp->discard_alignment);
> +	dev->secure_discard	    = le16_to_cpu(rsp->secure_discard);
> +	dev->rotational		    = rsp->rotational;
> +
> +	dev->max_hw_sectors = sess->max_io_size / dev->logical_block_size;

The above statement looks suspicious to me. The unit of the second 
argument of blk_queue_max_hw_sectors() is 512 bytes. Since 
dev->max_hw_sectors is passed as the second argument to 
blk_queue_max_hw_sectors() I think it should also have 512 bytes as unit 
instead of the logical block size.

> +static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
> +				     size_t new_nsectors)
> +{
> +	int err = 0;
> +
> +	rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n",
> +		       dev->nsectors, new_nsectors);
> +	dev->nsectors = new_nsectors;
> +	set_capacity(dev->gd,
> +		     dev->nsectors * (dev->logical_block_size /
> +				      SECTOR_SIZE));
> +	err = revalidate_disk(dev->gd);
> +	if (err)
> +		rnbd_clt_err(dev,
> +			      "Failed to change device size from %zu to %zu, err: %d\n",
> +			      dev->nsectors, new_nsectors, err);
> +	return err;
> +}

Please document the unit of nsectors in struct rnbd_clt_dev. Please also 
document the unit of the 'new_nsectors' argument.

The set_capacity() call can only be correct if the unit of dev->nsectors 
is one logical block. Is that really the case?

> +static void msg_io_conf(void *priv, int errno)
> +{
> +	struct rnbd_iu *iu = priv;
> +	struct rnbd_clt_dev *dev = iu->dev;
> +	struct request *rq = iu->rq;
> +
> +	iu->status = errno ? BLK_STS_IOERR : BLK_STS_OK;
> +
> +	blk_mq_complete_request(rq);
> +
> +	if (errno)
> +		rnbd_clt_info_rl(dev, "%s I/O failed with err: %d\n",
> +				  rq_data_dir(rq) == READ ? "read" : "write",
> +				  errno);
> +}

Accessing 'rq' after having called blk_mq_complete_request() may trigger 
a use-after-free. Please don't do that.

> +static void wait_for_rtrs_disconnection(struct rnbd_clt_session *sess)
> +__releases(&sess_lock)
> +__acquires(&sess_lock)

Please indent __releases() and __acquires() annotations.

> +{
> +	DEFINE_WAIT_FUNC(wait, autoremove_wake_function);
> +
> +	prepare_to_wait(&sess->rtrs_waitq, &wait, TASK_UNINTERRUPTIBLE);
> +	if (IS_ERR_OR_NULL(sess->rtrs)) {
> +		finish_wait(&sess->rtrs_waitq, &wait);
> +		return;
> +	}
> +	mutex_unlock(&sess_lock);
> +	/* After unlock session can be freed, so careful */
> +	schedule();
> +	mutex_lock(&sess_lock);
> +}

How can a function that calls schedule() and that is not surrounded by a 
loop be correct? What if e.g. schedule() finishes due to a spurious wakeup?

> +static struct rnbd_clt_session *__find_and_get_sess(const char *sessname)
> +__releases(&sess_lock)
> +__acquires(&sess_lock)
> +{
> +	struct rnbd_clt_session *sess;
> +	int err;
> +
> +again:
> +	list_for_each_entry(sess, &sess_list, list) {
> +		if (strcmp(sessname, sess->sessname))
> +			continue;
> +
> +		if (unlikely(sess->rtrs_ready && IS_ERR_OR_NULL(sess->rtrs)))
> +			/*
> +			 * No RTRS connection, session is dying.
> +			 */
> +			continue;
> +
> +		if (likely(rnbd_clt_get_sess(sess))) {
> +			/*
> +			 * Alive session is found, wait for RTRS connection.
> +			 */
> +			mutex_unlock(&sess_lock);
> +			err = wait_for_rtrs_connection(sess);
> +			if (unlikely(err))
> +				rnbd_clt_put_sess(sess);
> +			mutex_lock(&sess_lock);
> +
> +			if (unlikely(err))
> +				/* Session is dying, repeat the loop */
> +				goto again;
> +
> +			return sess;
> +		}
> +		/*
> +		 * Ref is 0, session is dying, wait for RTRS disconnect
> +		 * in order to avoid session names clashes.
> +		 */
> +		wait_for_rtrs_disconnection(sess);
> +		/*
> +		 * RTRS is disconnected and soon session will be freed,
> +		 * so repeat a loop.
> +		 */
> +		goto again;
> +	}
> +
> +	return NULL;
> +}

Since wait_for_rtrs_disconnection() unlocks sess_lock, can the 
list_for_each_entry() above trigger a use-after-free of sess->next?

> +static size_t rnbd_clt_get_sg_size(struct scatterlist *sglist, u32 len)
> +{
> +	struct scatterlist *sg;
> +	size_t tsize = 0;
> +	int i;
> +
> +	for_each_sg(sglist, sg, len, i)
> +		tsize += sg->length;
> +	return tsize;
> +}

Please follow the example of other block drivers and use blk_rq_bytes() 
instead of iterating over the sg-list.

> +static int setup_mq_tags(struct rnbd_clt_session *sess)
> +{
> +	struct blk_mq_tag_set *tags = &sess->tag_set;
> +
> +	memset(tags, 0, sizeof(*tags));
> +	tags->ops		= &rnbd_mq_ops;
> +	tags->queue_depth	= sess->queue_depth;
> +	tags->numa_node		= NUMA_NO_NODE;
> +	tags->flags		= BLK_MQ_F_SHOULD_MERGE |
> +				  BLK_MQ_F_TAG_SHARED;
> +	tags->cmd_size		= sizeof(struct rnbd_iu);
> +	tags->nr_hw_queues	= num_online_cpus();
> +
> +	return blk_mq_alloc_tag_set(tags);
> +}

Please change the name of the "tags" pointer into "tag_set".

> +static int index_to_minor(int index)
> +{
> +	return index << RNBD_PART_BITS;
> +}
> +
> +static int minor_to_index(int minor)
> +{
> +	return minor >> RNBD_PART_BITS;
> +}

Is it useful to introduce functions that encapsulate a single shift 
operation?

> +	blk_queue_virt_boundary(dev->queue, 4095);

The virt_boundary parameter must match the RDMA memory registration page 
size. Please introduce a symbolic constant for the RDMA memory 
registration page size such that these two parameters stay in sync in 
case anyone would want to change the memory registration page size.

> +static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
> +{
> +	dev->gd->major		= rnbd_client_major;
> +	dev->gd->first_minor	= index_to_minor(idx);
> +	dev->gd->fops		= &rnbd_client_ops;
> +	dev->gd->queue		= dev->queue;
> +	dev->gd->private_data	= dev;
> +	snprintf(dev->gd->disk_name, sizeof(dev->gd->disk_name), "rnbd%d",
> +		 idx);
> +	pr_debug("disk_name=%s, capacity=%zu\n",
> +		 dev->gd->disk_name,
> +		 dev->nsectors * (dev->logical_block_size / SECTOR_SIZE)
> +		 );
> +
> +	set_capacity(dev->gd, dev->nsectors * (dev->logical_block_size /
> +					       SECTOR_SIZE));

Again, what is the unit of dev->nsectors?

> +static void rnbd_clt_add_gen_disk(struct rnbd_clt_dev *dev)
> +{
> +	add_disk(dev->gd);
> +}

Is it useful to introduce this wrapper around add_disk()?

Thanks,

Bart.

  reply index

Thread overview: 89+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-30 10:29 [PATCH v6 00/25] RTRS (former IBTRS) rdma transport library and RNBD (former IBNBD) rdma network block device Jack Wang
2019-12-30 10:29 ` [PATCH v6 01/25] sysfs: export sysfs_remove_file_self() Jack Wang
2019-12-30 10:29 ` [PATCH v6 02/25] rtrs: public interface header to establish RDMA connections Jack Wang
2019-12-30 19:25   ` Bart Van Assche
2020-01-02 13:35     ` Jinpu Wang
2020-01-02 16:36       ` Bart Van Assche
2020-01-02 16:47         ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 03/25] rtrs: private headers with rtrs protocol structs and helpers Jack Wang
2019-12-30 19:48   ` Bart Van Assche
2020-01-02 15:27     ` Jinpu Wang
2020-01-02 17:00       ` Bart Van Assche
2020-01-02 18:26         ` Jason Gunthorpe
2020-01-03 12:31           ` Jinpu Wang
2020-01-03 12:27         ` Jinpu Wang
2019-12-31  0:07   ` Bart Van Assche
2020-01-03 13:48     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 04/25] rtrs: core: lib functions shared between client and server modules Jack Wang
2019-12-30 22:25   ` Bart Van Assche
2020-01-07 12:22     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 05/25] rtrs: client: private header with client structs and functions Jack Wang
2019-12-30 22:51   ` Bart Van Assche
2020-01-07 12:39     ` Jinpu Wang
2019-12-30 23:03   ` Bart Van Assche
2020-01-07 12:39     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 06/25] rtrs: client: main functionality Jack Wang
2019-12-30 23:53   ` Bart Van Assche
2020-01-02 18:23     ` Jason Gunthorpe
2020-01-03 14:30     ` Jinpu Wang
2020-01-03 16:12       ` Bart Van Assche
2019-12-30 10:29 ` [PATCH v6 07/25] rtrs: client: statistics functions Jack Wang
2020-01-02 21:07   ` Bart Van Assche
2020-01-03 14:39     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 08/25] rtrs: client: sysfs interface functions Jack Wang
2020-01-02 21:14   ` Bart Van Assche
2020-01-03 14:59     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 09/25] rtrs: server: private header with server structs and functions Jack Wang
2020-01-02 21:24   ` Bart Van Assche
2020-01-08 16:33     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 10/25] rtrs: server: main functionality Jack Wang
2020-01-02 22:03   ` Bart Van Assche
2020-01-07 13:19     ` Jinpu Wang
2020-01-07 18:25       ` Jason Gunthorpe
2020-01-10 17:38         ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 11/25] rtrs: server: statistics functions Jack Wang
2020-01-02 22:02   ` Bart Van Assche
2020-01-08 12:55     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 12/25] rtrs: server: sysfs interface functions Jack Wang
2020-01-02 22:06   ` Bart Van Assche
2020-01-07 14:40     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 13/25] rtrs: include client and server modules into kernel compilation Jack Wang
2020-01-02 22:11   ` Bart Van Assche
2020-01-03 16:19     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 14/25] rtrs: a bit of documentation Jack Wang
2019-12-30 23:19   ` Bart Van Assche
2020-01-07 14:48     ` Jinpu Wang
2020-01-02 22:21   ` Bart Van Assche
2020-01-07 15:49     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 15/25] rnbd: private headers with rnbd protocol structs and helpers Jack Wang
2020-01-02 22:34   ` Bart Van Assche
2020-01-07 16:53     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 16/25] rnbd: client: private header with client structs and functions Jack Wang
2020-01-02 22:37   ` Bart Van Assche
2020-01-07 17:09     ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 17/25] rnbd: client: main functionality Jack Wang
2020-01-02 23:55   ` Bart Van Assche [this message]
2020-01-08 14:22     ` Jinpu Wang
2020-01-10 14:45     ` Jinpu Wang
2020-01-10 15:09       ` Roman Penyaev
2020-01-10 15:29         ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 18/25] rnbd: client: sysfs interface functions Jack Wang
2020-01-03  0:03   ` Bart Van Assche
2020-01-08 13:06     ` Jinpu Wang
2020-01-08 16:39       ` Bart Van Assche
2020-01-08 16:51         ` Jinpu Wang
2019-12-30 10:29 ` [PATCH v6 19/25] rnbd: server: private header with server structs and functions Jack Wang
2019-12-30 10:29 ` [PATCH v6 20/25] rnbd: server: main functionality Jack Wang
2019-12-30 10:29 ` [PATCH v6 21/25] rnbd: server: functionality for IO submission to file or block dev Jack Wang
2019-12-30 10:29 ` [PATCH v6 22/25] rnbd: server: sysfs interface functions Jack Wang
2019-12-30 10:29 ` [PATCH v6 23/25] rnbd: include client and server modules into kernel compilation Jack Wang
2019-12-30 10:29 ` [PATCH v6 24/25] rnbd: a bit of documentation Jack Wang
2019-12-30 10:29 ` [PATCH v6 25/25] MAINTAINERS: Add maintainers for RNBD/RTRS modules Jack Wang
2019-12-30 12:22   ` Gal Pressman
2020-01-02  8:41     ` Jinpu Wang
2019-12-31  0:11 ` [PATCH v6 00/25] RTRS (former IBTRS) rdma transport library and RNBD (former IBNBD) rdma network block device Bart Van Assche
2020-01-02  8:48   ` Jinpu Wang
2019-12-31  2:39 ` Bart Van Assche
2020-01-02  9:20   ` Jinpu Wang
2020-01-02 18:28   ` Jason Gunthorpe
2020-01-03 12:34     ` Jinpu Wang

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aa7eeeda-b3d7-4a26-9043-53ce8c80eef1@acm.org \
    --to=bvanassche@acm.org \
    --cc=axboe@kernel.dk \
    --cc=danil.kipnis@cloud.ionos.com \
    --cc=dledford@redhat.com \
    --cc=hch@infradead.org \
    --cc=jinpu.wang@cloud.ionos.com \
    --cc=jinpuwang@gmail.com \
    --cc=leon@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=rpenyaev@suse.de \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git