Linux-SCSI Archive on lore.kernel.org
 help / color / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Mike Christie <michael.christie@oracle.com>
Cc: martin.petersen@oracle.com, linux-scsi@vger.kernel.org,
	target-devel@vger.kernel.org, jasowang@redhat.com,
	pbonzini@redhat.com, stefanha@redhat.com,
	virtualization@lists.linux-foundation.org
Subject: Re: [PATCH 3/8] vhost scsi: alloc cmds per vq instead of session
Date: Thu, 24 Sep 2020 02:22:32 -0400
Message-ID: <20200924022107-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <1600712588-9514-4-git-send-email-michael.christie@oracle.com>

On Mon, Sep 21, 2020 at 01:23:03PM -0500, Mike Christie wrote:
> We currently are limited to 256 cmds per session. This leads to problems
> where if the user has increased virtqueue_size to more than 2 or
> cmd_per_lun to more than 256 vhost_scsi_get_tag can fail and the guest
> will get IO errors.
> 
> This patch moves the cmd allocation to per vq so we can easily match
> whatever the user has specified for num_queues and
> virtqueue_size/cmd_per_lun. It also makes it easier to control how much
> memory we preallocate. For cases, where perf is not as important and
> we can use the current defaults (1 vq and 128 cmds per vq) memory use
> from preallocate cmds is cut in half. For cases, where we are willing
> to use more memory for higher perf, cmd mem use will now increase as
> the num queues and queue depth increases.
> 
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
>  drivers/vhost/scsi.c | 204 ++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 127 insertions(+), 77 deletions(-)
> 
> diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> index b22adf0..13311b8 100644
> --- a/drivers/vhost/scsi.c
> +++ b/drivers/vhost/scsi.c
> @@ -52,7 +52,6 @@
>  #define VHOST_SCSI_VERSION  "v0.1"
>  #define VHOST_SCSI_NAMELEN 256
>  #define VHOST_SCSI_MAX_CDB_SIZE 32
> -#define VHOST_SCSI_DEFAULT_TAGS 256
>  #define VHOST_SCSI_PREALLOC_SGLS 2048
>  #define VHOST_SCSI_PREALLOC_UPAGES 2048
>  #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048
> @@ -189,6 +188,9 @@ struct vhost_scsi_virtqueue {
>  	 * Writers must also take dev mutex and flush under it.
>  	 */
>  	int inflight_idx;
> +	struct vhost_scsi_cmd *scsi_cmds;
> +	struct sbitmap scsi_tags;
> +	int max_cmds;
>  };
>  
>  struct vhost_scsi {
> @@ -324,7 +326,9 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
>  {
>  	struct vhost_scsi_cmd *tv_cmd = container_of(se_cmd,
>  				struct vhost_scsi_cmd, tvc_se_cmd);
> -	struct se_session *se_sess = tv_cmd->tvc_nexus->tvn_se_sess;
> +	struct vhost_scsi_virtqueue *svq = container_of(tv_cmd->tvc_vq,
> +				struct vhost_scsi_virtqueue, vq);
> +	struct vhost_scsi_inflight *inflight = tv_cmd->inflight;
>  	int i;
>  
>  	if (tv_cmd->tvc_sgl_count) {
> @@ -336,8 +340,8 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
>  			put_page(sg_page(&tv_cmd->tvc_prot_sgl[i]));
>  	}
>  
> -	vhost_scsi_put_inflight(tv_cmd->inflight);
> -	target_free_tag(se_sess, se_cmd);
> +	sbitmap_clear_bit(&svq->scsi_tags, se_cmd->map_tag);
> +	vhost_scsi_put_inflight(inflight);
>  }
>  
>  static u32 vhost_scsi_sess_get_index(struct se_session *se_sess)
> @@ -566,13 +570,14 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
>  }
>  
>  static struct vhost_scsi_cmd *
> -vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
> +vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
>  		   unsigned char *cdb, u64 scsi_tag, u16 lun, u8 task_attr,
>  		   u32 exp_data_len, int data_direction)
>  {
> +	struct vhost_scsi_virtqueue *svq = container_of(vq,
> +					struct vhost_scsi_virtqueue, vq);
>  	struct vhost_scsi_cmd *cmd;
>  	struct vhost_scsi_nexus *tv_nexus;
> -	struct se_session *se_sess;
>  	struct scatterlist *sg, *prot_sg;
>  	struct page **pages;
>  	int tag, cpu;
> @@ -582,15 +587,14 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
>  		pr_err("Unable to locate active struct vhost_scsi_nexus\n");
>  		return ERR_PTR(-EIO);
>  	}
> -	se_sess = tv_nexus->tvn_se_sess;
>  
> -	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
> +	tag = sbitmap_get(&svq->scsi_tags, 0, false);
>  	if (tag < 0) {
>  		pr_err("Unable to obtain tag for vhost_scsi_cmd\n");
>  		return ERR_PTR(-ENOMEM);
>  	}


After this change, cpu is uninitialized.


>  
> -	cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[tag];
> +	cmd = &svq->scsi_cmds[tag];
>  	sg = cmd->tvc_sgl;
>  	prot_sg = cmd->tvc_prot_sgl;
>  	pages = cmd->tvc_upages;
> @@ -1065,11 +1069,11 @@ static void vhost_scsi_submission_work(struct work_struct *work)
>  				scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE);
>  				goto err;
>  		}
> -		cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
> +		cmd = vhost_scsi_get_cmd(vq, tpg, cdb, tag, lun, task_attr,
>  					 exp_data_len + prot_bytes,
>  					 data_direction);
>  		if (IS_ERR(cmd)) {
> -			vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
> +			vq_err(vq, "vhost_scsi_get_cmd failed %ld\n",
>  			       PTR_ERR(cmd));
>  			goto err;
>  		}
> @@ -1373,6 +1377,83 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
>  		wait_for_completion(&old_inflight[i]->comp);
>  }
>  
> +static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq)
> +{
> +	struct vhost_scsi_virtqueue *svq = container_of(vq,
> +					struct vhost_scsi_virtqueue, vq);
> +	struct vhost_scsi_cmd *tv_cmd;
> +	unsigned int i;
> +
> +	if (!svq->scsi_cmds)
> +		return;
> +
> +	for (i = 0; i < svq->max_cmds; i++) {
> +		tv_cmd = &svq->scsi_cmds[i];
> +
> +		kfree(tv_cmd->tvc_sgl);
> +		kfree(tv_cmd->tvc_prot_sgl);
> +		kfree(tv_cmd->tvc_upages);
> +	}
> +
> +	sbitmap_free(&svq->scsi_tags);
> +	kfree(svq->scsi_cmds);
> +	svq->scsi_cmds = NULL;
> +}
> +
> +static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds)
> +{
> +	struct vhost_scsi_virtqueue *svq = container_of(vq,
> +					struct vhost_scsi_virtqueue, vq);
> +	struct vhost_scsi_cmd *tv_cmd;
> +	unsigned int i;
> +
> +	if (svq->scsi_cmds)
> +		return 0;
> +
> +	if (sbitmap_init_node(&svq->scsi_tags, max_cmds, -1, GFP_KERNEL,
> +			      NUMA_NO_NODE))
> +		return -ENOMEM;
> +	svq->max_cmds = max_cmds;
> +
> +	svq->scsi_cmds = kcalloc(max_cmds, sizeof(*tv_cmd), GFP_KERNEL);
> +	if (!svq->scsi_cmds) {
> +		sbitmap_free(&svq->scsi_tags);
> +		return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < max_cmds; i++) {
> +		tv_cmd = &svq->scsi_cmds[i];
> +
> +		tv_cmd->tvc_sgl = kcalloc(VHOST_SCSI_PREALLOC_SGLS,
> +					  sizeof(struct scatterlist),
> +					  GFP_KERNEL);
> +		if (!tv_cmd->tvc_sgl) {
> +			pr_err("Unable to allocate tv_cmd->tvc_sgl\n");
> +			goto out;
> +		}
> +
> +		tv_cmd->tvc_upages = kcalloc(VHOST_SCSI_PREALLOC_UPAGES,
> +					     sizeof(struct page *),
> +					     GFP_KERNEL);
> +		if (!tv_cmd->tvc_upages) {
> +			pr_err("Unable to allocate tv_cmd->tvc_upages\n");
> +			goto out;
> +		}
> +
> +		tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS,
> +					       sizeof(struct scatterlist),
> +					       GFP_KERNEL);
> +		if (!tv_cmd->tvc_prot_sgl) {
> +			pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n");
> +			goto out;
> +		}
> +	}
> +	return 0;
> +out:
> +	vhost_scsi_destroy_vq_cmds(vq);
> +	return -ENOMEM;
> +}
> +
>  /*
>   * Called from vhost_scsi_ioctl() context to walk the list of available
>   * vhost_scsi_tpg with an active struct vhost_scsi_nexus
> @@ -1427,10 +1508,9 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
>  
>  		if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
>  			if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) {
> -				kfree(vs_tpg);
>  				mutex_unlock(&tpg->tv_tpg_mutex);
>  				ret = -EEXIST;
> -				goto out;
> +				goto undepend;
>  			}
>  			/*
>  			 * In order to ensure individual vhost-scsi configfs
> @@ -1442,9 +1522,8 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
>  			ret = target_depend_item(&se_tpg->tpg_group.cg_item);
>  			if (ret) {
>  				pr_warn("target_depend_item() failed: %d\n", ret);
> -				kfree(vs_tpg);
>  				mutex_unlock(&tpg->tv_tpg_mutex);
> -				goto out;
> +				goto undepend;
>  			}
>  			tpg->tv_tpg_vhost_count++;
>  			tpg->vhost_scsi = vs;
> @@ -1457,6 +1536,16 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
>  	if (match) {
>  		memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
>  		       sizeof(vs->vs_vhost_wwpn));
> +
> +		for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
> +			vq = &vs->vqs[i].vq;
> +			if (!vhost_vq_is_setup(vq))
> +				continue;
> +
> +			if (vhost_scsi_setup_vq_cmds(vq, vq->num))
> +				goto destroy_vq_cmds;
> +		}
> +
>  		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
>  			vq = &vs->vqs[i].vq;
>  			mutex_lock(&vq->mutex);
> @@ -1476,7 +1565,22 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
>  	vhost_scsi_flush(vs);
>  	kfree(vs->vs_tpg);
>  	vs->vs_tpg = vs_tpg;
> +	goto out;
>  
> +destroy_vq_cmds:
> +	for (i--; i >= VHOST_SCSI_VQ_IO; i--) {
> +		if (!vhost_vq_get_backend(&vs->vqs[i].vq))
> +			vhost_scsi_destroy_vq_cmds(&vs->vqs[i].vq);
> +	}
> +undepend:
> +	for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
> +		tpg = vs_tpg[i];
> +		if (tpg) {
> +			tpg->tv_tpg_vhost_count--;
> +			target_undepend_item(&tpg->se_tpg.tpg_group.cg_item);
> +		}
> +	}
> +	kfree(vs_tpg);
>  out:
>  	mutex_unlock(&vs->dev.mutex);
>  	mutex_unlock(&vhost_scsi_mutex);
> @@ -1549,6 +1653,12 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
>  			mutex_lock(&vq->mutex);
>  			vhost_vq_set_backend(vq, NULL);
>  			mutex_unlock(&vq->mutex);
> +			/*
> +			 * Make sure cmds are not running before tearing them
> +			 * down.
> +			 */
> +			vhost_scsi_flush(vs);
> +			vhost_scsi_destroy_vq_cmds(vq);
>  		}
>  	}
>  	/*
> @@ -1842,23 +1952,6 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg,
>  	mutex_unlock(&vhost_scsi_mutex);
>  }
>  
> -static void vhost_scsi_free_cmd_map_res(struct se_session *se_sess)
> -{
> -	struct vhost_scsi_cmd *tv_cmd;
> -	unsigned int i;
> -
> -	if (!se_sess->sess_cmd_map)
> -		return;
> -
> -	for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) {
> -		tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i];
> -
> -		kfree(tv_cmd->tvc_sgl);
> -		kfree(tv_cmd->tvc_prot_sgl);
> -		kfree(tv_cmd->tvc_upages);
> -	}
> -}
> -
>  static ssize_t vhost_scsi_tpg_attrib_fabric_prot_type_store(
>  		struct config_item *item, const char *page, size_t count)
>  {
> @@ -1898,45 +1991,6 @@ static ssize_t vhost_scsi_tpg_attrib_fabric_prot_type_show(
>  	NULL,
>  };
>  
> -static int vhost_scsi_nexus_cb(struct se_portal_group *se_tpg,
> -			       struct se_session *se_sess, void *p)
> -{
> -	struct vhost_scsi_cmd *tv_cmd;
> -	unsigned int i;
> -
> -	for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) {
> -		tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i];
> -
> -		tv_cmd->tvc_sgl = kcalloc(VHOST_SCSI_PREALLOC_SGLS,
> -					  sizeof(struct scatterlist),
> -					  GFP_KERNEL);
> -		if (!tv_cmd->tvc_sgl) {
> -			pr_err("Unable to allocate tv_cmd->tvc_sgl\n");
> -			goto out;
> -		}
> -
> -		tv_cmd->tvc_upages = kcalloc(VHOST_SCSI_PREALLOC_UPAGES,
> -					     sizeof(struct page *),
> -					     GFP_KERNEL);
> -		if (!tv_cmd->tvc_upages) {
> -			pr_err("Unable to allocate tv_cmd->tvc_upages\n");
> -			goto out;
> -		}
> -
> -		tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS,
> -					       sizeof(struct scatterlist),
> -					       GFP_KERNEL);
> -		if (!tv_cmd->tvc_prot_sgl) {
> -			pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n");
> -			goto out;
> -		}
> -	}
> -	return 0;
> -out:
> -	vhost_scsi_free_cmd_map_res(se_sess);
> -	return -ENOMEM;
> -}
> -
>  static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
>  				const char *name)
>  {
> @@ -1960,12 +2014,9 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
>  	 * struct se_node_acl for the vhost_scsi struct se_portal_group with
>  	 * the SCSI Initiator port name of the passed configfs group 'name'.
>  	 */
> -	tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg,
> -					VHOST_SCSI_DEFAULT_TAGS,
> -					sizeof(struct vhost_scsi_cmd),
> +	tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg, 0, 0,
>  					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS,
> -					(unsigned char *)name, tv_nexus,
> -					vhost_scsi_nexus_cb);
> +					(unsigned char *)name, tv_nexus, NULL);
>  	if (IS_ERR(tv_nexus->tvn_se_sess)) {
>  		mutex_unlock(&tpg->tv_tpg_mutex);
>  		kfree(tv_nexus);
> @@ -2015,7 +2066,6 @@ static int vhost_scsi_drop_nexus(struct vhost_scsi_tpg *tpg)
>  		" %s Initiator Port: %s\n", vhost_scsi_dump_proto_id(tpg->tport),
>  		tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
>  
> -	vhost_scsi_free_cmd_map_res(se_sess);
>  	/*
>  	 * Release the SCSI I_T Nexus to the emulated vhost Target Port
>  	 */
> -- 
> 1.8.3.1


  parent reply index

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-21 18:23 [PATCH 0/8] vhost scsi: fixes and cleanups Mike Christie
2020-09-21 18:23 ` [PATCH 1/8] vhost vdpa: fix vhost_vdpa_open error handling Mike Christie
2020-09-22  1:58   ` Jason Wang
2020-09-21 18:23 ` [PATCH 2/8] vhost: add helper to check if a vq has been setup Mike Christie
2020-09-22  2:02   ` Jason Wang
2020-09-23 19:12     ` Mike Christie
2020-09-24  7:22       ` Jason Wang
2020-09-22  2:45   ` Bart Van Assche
2020-09-22 15:34     ` Michael Christie
2020-09-21 18:23 ` [PATCH 3/8] vhost scsi: alloc cmds per vq instead of session Mike Christie
2020-09-22  1:32   ` kernel test robot
2020-09-23  7:53   ` Dan Carpenter
2020-09-24  6:22   ` Michael S. Tsirkin [this message]
2020-09-24 15:31     ` Michael Christie
2020-09-21 18:23 ` [PATCH 4/8] vhost scsi: fix cmd completion race Mike Christie
2020-09-22  2:48   ` Bart Van Assche
2020-09-22  2:51     ` Bart Van Assche
2020-09-21 18:23 ` [PATCH 5/8] vhost scsi: add lun parser helper Mike Christie
2020-09-23 10:23   ` Paolo Bonzini
2020-09-21 18:23 ` [PATCH 6/8] vhost scsi: Add support for LUN resets Mike Christie
2020-09-21 18:23 ` [PATCH 7/8] vhost: remove work arg from vhost_work_flush Mike Christie
2020-09-22  2:01   ` Jason Wang
2020-09-21 18:23 ` [PATCH 8/8] vhost scsi: remove extra flushes Mike Christie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200924022107-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=michael.christie@oracle.com \
    --cc=pbonzini@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=target-devel@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-SCSI Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-scsi/0 linux-scsi/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-scsi linux-scsi/ https://lore.kernel.org/linux-scsi \
		linux-scsi@vger.kernel.org
	public-inbox-index linux-scsi

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-scsi


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git