All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	gaowanlong@cn.fujitsu.com, hutao@cn.fujitsu.com,
	linux-scsi@vger.kernel.org,
	virtualization@lists.linux-foundation.org, rusty@rustcorp.com.au,
	asias@redhat.com, stefanha@redhat.com, nab@linux-iscsi.org
Subject: Re: [PATCH v2 5/5] virtio-scsi: introduce multiqueue support
Date: Tue, 18 Dec 2012 15:08:08 +0100	[thread overview]
Message-ID: <50D078C8.208@redhat.com> (raw)
In-Reply-To: <20121218135736.GF26110@redhat.com>

Il 18/12/2012 14:57, Michael S. Tsirkin ha scritto:
>> -static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
>> +static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
>> +				 struct virtio_scsi_target_state *tgt,
>> +				 struct scsi_cmnd *sc)
>>  {
>> -	struct virtio_scsi *vscsi = shost_priv(sh);
>> -	struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
>>  	struct virtio_scsi_cmd *cmd;
>> +	struct virtio_scsi_vq *req_vq;
>>  	int ret;
>>  
>>  	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
>> @@ -461,7 +533,8 @@ static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
>>  	BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
>>  	memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
>>  
>> -	if (virtscsi_kick_cmd(tgt, &vscsi->req_vq, cmd,
>> +	req_vq = ACCESS_ONCE(tgt->req_vq);
> 
> This ACCESS_ONCE without a barrier looks strange to me.
> Can req_vq change? Needs a comment.

Barriers are needed to order two things.  Here I don't have the second thing
to order against, hence no barrier.

Accessing req_vq lockless is safe, and there's a comment about it, but you
still want ACCESS_ONCE to ensure the compiler doesn't play tricks.  It
shouldn't be necessary, because the critical section of
virtscsi_queuecommand_multi will already include the appropriate
compiler barriers, but it is actually clearer this way to me. :)

>> +	if (virtscsi_kick_cmd(tgt, req_vq, cmd,
>>  			      sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
>>  			      GFP_ATOMIC) == 0)
>>  		ret = 0;
>> @@ -472,6 +545,48 @@ out:
>>  	return ret;
>>  }
>>  
>> +static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
>> +					struct scsi_cmnd *sc)
>> +{
>> +	struct virtio_scsi *vscsi = shost_priv(sh);
>> +	struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
>> +
>> +	atomic_inc(&tgt->reqs);
> 
> And here we don't have barrier after atomic? Why? Needs a comment.

Because we don't write req_vq, so there's no two writes to order.  Barrier
against what?

>> +	return virtscsi_queuecommand(vscsi, tgt, sc);
>> +}
>> +
>> +static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
>> +				       struct scsi_cmnd *sc)
>> +{
>> +	struct virtio_scsi *vscsi = shost_priv(sh);
>> +	struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
>> +	unsigned long flags;
>> +	u32 queue_num;
>> +
>> +	/*
>> +	 * Using an atomic_t for tgt->reqs lets the virtqueue handler
>> +	 * decrement it without taking the spinlock.
>> +	 *
>> +	 * We still need a critical section to prevent concurrent submissions
>> +	 * from picking two different req_vqs.
>> +	 */
>> +	spin_lock_irqsave(&tgt->tgt_lock, flags);
>> +	if (atomic_inc_return(&tgt->reqs) == 1) {
>> +		queue_num = smp_processor_id();
>> +		while (unlikely(queue_num >= vscsi->num_queues))
>> +			queue_num -= vscsi->num_queues;
>> +
>> +		/*
>> +		 * Write reqs before writing req_vq, matching the
>> +		 * smp_read_barrier_depends() in virtscsi_req_done.
>> +		 */
>> +		smp_wmb();
>> +		tgt->req_vq = &vscsi->req_vqs[queue_num];
>> +	}
>> +	spin_unlock_irqrestore(&tgt->tgt_lock, flags);
>> +	return virtscsi_queuecommand(vscsi, tgt, sc);
>> +}
>> +
>>  static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
>>  {
>>  	DECLARE_COMPLETION_ONSTACK(comp);
>> @@ -541,12 +656,26 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
>>  	return virtscsi_tmf(vscsi, cmd);
>>  }
>>  
>> -static struct scsi_host_template virtscsi_host_template = {
>> +static struct scsi_host_template virtscsi_host_template_single = {
>>  	.module = THIS_MODULE,
>>  	.name = "Virtio SCSI HBA",
>>  	.proc_name = "virtio_scsi",
>> -	.queuecommand = virtscsi_queuecommand,
>>  	.this_id = -1,
>> +	.queuecommand = virtscsi_queuecommand_single,
>> +	.eh_abort_handler = virtscsi_abort,
>> +	.eh_device_reset_handler = virtscsi_device_reset,
>> +
>> +	.can_queue = 1024,
>> +	.dma_boundary = UINT_MAX,
>> +	.use_clustering = ENABLE_CLUSTERING,
>> +};
>> +
>> +static struct scsi_host_template virtscsi_host_template_multi = {
>> +	.module = THIS_MODULE,
>> +	.name = "Virtio SCSI HBA",
>> +	.proc_name = "virtio_scsi",
>> +	.this_id = -1,
>> +	.queuecommand = virtscsi_queuecommand_multi,
>>  	.eh_abort_handler = virtscsi_abort,
>>  	.eh_device_reset_handler = virtscsi_device_reset,
>>  
>> @@ -572,16 +701,27 @@ static struct scsi_host_template virtscsi_host_template = {
>>  				  &__val, sizeof(__val)); \
>>  	})
>>  
>> +
>>  static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
>> -			     struct virtqueue *vq)
>> +			     struct virtqueue *vq, bool affinity)
>>  {
>>  	spin_lock_init(&virtscsi_vq->vq_lock);
>>  	virtscsi_vq->vq = vq;
>> +	if (affinity)
>> +		virtqueue_set_affinity(vq, vq->index - VIRTIO_SCSI_VQ_BASE);
> 
> I've been thinking about how set_affinity
> interacts with online/offline CPUs.
> Any idea?

No, I haven't tried.

>>  
>>  	/* Discover virtqueues and write information to configuration.  */
>> -	err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names);
>> +	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
>>  	if (err)
>>  		return err;
>>  
>> -	virtscsi_init_vq(&vscsi->ctrl_vq, vqs[0]);
>> -	virtscsi_init_vq(&vscsi->event_vq, vqs[1]);
>> -	virtscsi_init_vq(&vscsi->req_vq, vqs[2]);
>> +	virtscsi_init_vq(&vscsi->ctrl_vq, vqs[0], false);
>> +	virtscsi_init_vq(&vscsi->event_vq, vqs[1], false);
>> +	for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++)
>> +		virtscsi_init_vq(&vscsi->req_vqs[i - VIRTIO_SCSI_VQ_BASE],
>> +				 vqs[i], vscsi->num_queues > 1);
> 
> So affinity is true if >1 vq? I am guessing this is not
> going to do the right thing unless you have at least
> as many vqs as CPUs.

Yes, and then you're not setting up the thing correctly.

Isn't the same thing true for virtio-net mq?

Paolo

WARNING: multiple messages have this Message-ID (diff)
From: Paolo Bonzini <pbonzini@redhat.com>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: linux-scsi@vger.kernel.org, kvm@vger.kernel.org,
	hutao@cn.fujitsu.com, linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org, stefanha@redhat.com
Subject: Re: [PATCH v2 5/5] virtio-scsi: introduce multiqueue support
Date: Tue, 18 Dec 2012 15:08:08 +0100	[thread overview]
Message-ID: <50D078C8.208@redhat.com> (raw)
In-Reply-To: <20121218135736.GF26110@redhat.com>

Il 18/12/2012 14:57, Michael S. Tsirkin ha scritto:
>> -static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
>> +static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
>> +				 struct virtio_scsi_target_state *tgt,
>> +				 struct scsi_cmnd *sc)
>>  {
>> -	struct virtio_scsi *vscsi = shost_priv(sh);
>> -	struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
>>  	struct virtio_scsi_cmd *cmd;
>> +	struct virtio_scsi_vq *req_vq;
>>  	int ret;
>>  
>>  	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
>> @@ -461,7 +533,8 @@ static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
>>  	BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
>>  	memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
>>  
>> -	if (virtscsi_kick_cmd(tgt, &vscsi->req_vq, cmd,
>> +	req_vq = ACCESS_ONCE(tgt->req_vq);
> 
> This ACCESS_ONCE without a barrier looks strange to me.
> Can req_vq change? Needs a comment.

Barriers are needed to order two things.  Here I don't have the second thing
to order against, hence no barrier.

Accessing req_vq lockless is safe, and there's a comment about it, but you
still want ACCESS_ONCE to ensure the compiler doesn't play tricks.  It
shouldn't be necessary, because the critical section of
virtscsi_queuecommand_multi will already include the appropriate
compiler barriers, but it is actually clearer this way to me. :)

>> +	if (virtscsi_kick_cmd(tgt, req_vq, cmd,
>>  			      sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
>>  			      GFP_ATOMIC) == 0)
>>  		ret = 0;
>> @@ -472,6 +545,48 @@ out:
>>  	return ret;
>>  }
>>  
>> +static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
>> +					struct scsi_cmnd *sc)
>> +{
>> +	struct virtio_scsi *vscsi = shost_priv(sh);
>> +	struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
>> +
>> +	atomic_inc(&tgt->reqs);
> 
> And here we don't have barrier after atomic? Why? Needs a comment.

Because we don't write req_vq, so there's no two writes to order.  Barrier
against what?

>> +	return virtscsi_queuecommand(vscsi, tgt, sc);
>> +}
>> +
>> +static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
>> +				       struct scsi_cmnd *sc)
>> +{
>> +	struct virtio_scsi *vscsi = shost_priv(sh);
>> +	struct virtio_scsi_target_state *tgt = &vscsi->tgt[sc->device->id];
>> +	unsigned long flags;
>> +	u32 queue_num;
>> +
>> +	/*
>> +	 * Using an atomic_t for tgt->reqs lets the virtqueue handler
>> +	 * decrement it without taking the spinlock.
>> +	 *
>> +	 * We still need a critical section to prevent concurrent submissions
>> +	 * from picking two different req_vqs.
>> +	 */
>> +	spin_lock_irqsave(&tgt->tgt_lock, flags);
>> +	if (atomic_inc_return(&tgt->reqs) == 1) {
>> +		queue_num = smp_processor_id();
>> +		while (unlikely(queue_num >= vscsi->num_queues))
>> +			queue_num -= vscsi->num_queues;
>> +
>> +		/*
>> +		 * Write reqs before writing req_vq, matching the
>> +		 * smp_read_barrier_depends() in virtscsi_req_done.
>> +		 */
>> +		smp_wmb();
>> +		tgt->req_vq = &vscsi->req_vqs[queue_num];
>> +	}
>> +	spin_unlock_irqrestore(&tgt->tgt_lock, flags);
>> +	return virtscsi_queuecommand(vscsi, tgt, sc);
>> +}
>> +
>>  static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
>>  {
>>  	DECLARE_COMPLETION_ONSTACK(comp);
>> @@ -541,12 +656,26 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
>>  	return virtscsi_tmf(vscsi, cmd);
>>  }
>>  
>> -static struct scsi_host_template virtscsi_host_template = {
>> +static struct scsi_host_template virtscsi_host_template_single = {
>>  	.module = THIS_MODULE,
>>  	.name = "Virtio SCSI HBA",
>>  	.proc_name = "virtio_scsi",
>> -	.queuecommand = virtscsi_queuecommand,
>>  	.this_id = -1,
>> +	.queuecommand = virtscsi_queuecommand_single,
>> +	.eh_abort_handler = virtscsi_abort,
>> +	.eh_device_reset_handler = virtscsi_device_reset,
>> +
>> +	.can_queue = 1024,
>> +	.dma_boundary = UINT_MAX,
>> +	.use_clustering = ENABLE_CLUSTERING,
>> +};
>> +
>> +static struct scsi_host_template virtscsi_host_template_multi = {
>> +	.module = THIS_MODULE,
>> +	.name = "Virtio SCSI HBA",
>> +	.proc_name = "virtio_scsi",
>> +	.this_id = -1,
>> +	.queuecommand = virtscsi_queuecommand_multi,
>>  	.eh_abort_handler = virtscsi_abort,
>>  	.eh_device_reset_handler = virtscsi_device_reset,
>>  
>> @@ -572,16 +701,27 @@ static struct scsi_host_template virtscsi_host_template = {
>>  				  &__val, sizeof(__val)); \
>>  	})
>>  
>> +
>>  static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
>> -			     struct virtqueue *vq)
>> +			     struct virtqueue *vq, bool affinity)
>>  {
>>  	spin_lock_init(&virtscsi_vq->vq_lock);
>>  	virtscsi_vq->vq = vq;
>> +	if (affinity)
>> +		virtqueue_set_affinity(vq, vq->index - VIRTIO_SCSI_VQ_BASE);
> 
> I've been thinking about how set_affinity
> interacts with online/offline CPUs.
> Any idea?

No, I haven't tried.

>>  
>>  	/* Discover virtqueues and write information to configuration.  */
>> -	err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names);
>> +	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
>>  	if (err)
>>  		return err;
>>  
>> -	virtscsi_init_vq(&vscsi->ctrl_vq, vqs[0]);
>> -	virtscsi_init_vq(&vscsi->event_vq, vqs[1]);
>> -	virtscsi_init_vq(&vscsi->req_vq, vqs[2]);
>> +	virtscsi_init_vq(&vscsi->ctrl_vq, vqs[0], false);
>> +	virtscsi_init_vq(&vscsi->event_vq, vqs[1], false);
>> +	for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++)
>> +		virtscsi_init_vq(&vscsi->req_vqs[i - VIRTIO_SCSI_VQ_BASE],
>> +				 vqs[i], vscsi->num_queues > 1);
> 
> So affinity is true if >1 vq? I am guessing this is not
> going to do the right thing unless you have at least
> as many vqs as CPUs.

Yes, and then you're not setting up the thing correctly.

Isn't the same thing true for virtio-net mq?

Paolo

  reply	other threads:[~2012-12-18 14:08 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-18 12:32 [PATCH v2 0/5] Multiqueue virtio-scsi, and API for piecewise buffer submission Paolo Bonzini
2012-12-18 12:32 ` Paolo Bonzini
2012-12-18 12:32 ` [PATCH v2 1/5] virtio: add functions for piecewise addition of buffers Paolo Bonzini
2012-12-18 12:32   ` Paolo Bonzini
2012-12-18 13:36   ` Michael S. Tsirkin
2012-12-18 13:36     ` Michael S. Tsirkin
2012-12-18 13:43     ` Paolo Bonzini
2012-12-18 13:43       ` Paolo Bonzini
2012-12-18 13:59       ` Michael S. Tsirkin
2012-12-18 13:59         ` Michael S. Tsirkin
2012-12-18 14:32         ` Paolo Bonzini
2012-12-18 14:32           ` Paolo Bonzini
2012-12-18 15:06           ` Michael S. Tsirkin
2012-12-18 15:06             ` Michael S. Tsirkin
2012-12-19 10:47   ` Stefan Hajnoczi
2012-12-19 10:47   ` Stefan Hajnoczi
2012-12-19 12:04     ` Paolo Bonzini
2012-12-19 12:04       ` Paolo Bonzini
2012-12-19 12:40       ` Stefan Hajnoczi
2012-12-19 12:40         ` Stefan Hajnoczi
2012-12-19 16:51       ` Michael S. Tsirkin
2012-12-19 16:51         ` Michael S. Tsirkin
2012-12-19 16:52         ` Michael S. Tsirkin
2012-12-19 16:52           ` Michael S. Tsirkin
2013-01-02  5:03   ` Rusty Russell
2013-01-02  5:03     ` Rusty Russell
2013-01-03  8:58     ` Wanlong Gao
2013-01-03  8:58       ` Wanlong Gao
2013-01-03  8:58       ` Wanlong Gao
2013-01-06 23:32       ` Rusty Russell
2013-01-06 23:32       ` Rusty Russell
2013-01-06 23:32         ` Rusty Russell
2013-01-03  9:22     ` Paolo Bonzini
2013-01-03  9:22       ` Paolo Bonzini
2013-01-07  0:02       ` Rusty Russell
2013-01-07  0:02         ` Rusty Russell
2013-01-07 14:27         ` Paolo Bonzini
2013-01-08  0:12           ` Rusty Russell
2013-01-08  0:12             ` Rusty Russell
2013-01-10  8:44             ` Paolo Bonzini
2012-12-18 12:32 ` [PATCH v2 2/5] virtio-scsi: use functions for piecewise composition " Paolo Bonzini
2012-12-18 12:32   ` Paolo Bonzini
2012-12-18 13:37   ` Michael S. Tsirkin
2012-12-18 13:37     ` Michael S. Tsirkin
2012-12-18 13:35     ` Paolo Bonzini
2012-12-18 13:35       ` Paolo Bonzini
2012-12-18 12:32 ` [PATCH v2 3/5] virtio-scsi: redo allocation of target data Paolo Bonzini
2012-12-18 12:32   ` Paolo Bonzini
2012-12-18 12:32 ` [PATCH v2 4/5] virtio-scsi: pass struct virtio_scsi to virtqueue completion function Paolo Bonzini
2012-12-18 12:32   ` Paolo Bonzini
2012-12-18 12:32 ` [PATCH v2 5/5] virtio-scsi: introduce multiqueue support Paolo Bonzini
2012-12-18 13:57   ` Michael S. Tsirkin
2012-12-18 13:57     ` Michael S. Tsirkin
2012-12-18 14:08     ` Paolo Bonzini [this message]
2012-12-18 14:08       ` Paolo Bonzini
2012-12-18 15:03       ` Michael S. Tsirkin
2012-12-18 15:03         ` Michael S. Tsirkin
2012-12-18 15:51         ` Paolo Bonzini
2012-12-18 15:51           ` Paolo Bonzini
2012-12-18 16:02           ` Michael S. Tsirkin
2012-12-18 16:02             ` Michael S. Tsirkin
2012-12-25 12:41             ` Wanlong Gao
2012-12-25 12:41               ` Wanlong Gao
2012-12-19 11:27   ` Stefan Hajnoczi
2012-12-19 11:27   ` Stefan Hajnoczi
2012-12-18 12:32 ` Paolo Bonzini
2012-12-18 13:42 ` [PATCH v2 0/5] Multiqueue virtio-scsi, and API for piecewise buffer submission Michael S. Tsirkin
2012-12-18 13:42   ` Michael S. Tsirkin
2012-12-24  6:44   ` Wanlong Gao
2012-12-24  6:44     ` Wanlong Gao
2012-12-18 22:18 ` Rolf Eike Beer
2012-12-19  8:52   ` Paolo Bonzini
2012-12-19  8:52     ` Paolo Bonzini
2012-12-19 11:32     ` Michael S. Tsirkin
2012-12-19 11:32       ` Michael S. Tsirkin
2012-12-18 22:18 ` Rolf Eike Beer
2013-01-15  9:48 ` [PATCH 1/2] virtio-scsi: split out request queue set affinity function Wanlong Gao
2013-01-15  9:48   ` Wanlong Gao
2013-01-15  9:50   ` [PATCH 2/2] virtio-scsi: reset virtqueue affinity when doing cpu hotplug Wanlong Gao
2013-01-15  9:50     ` Wanlong Gao
2013-01-16  3:31     ` Rusty Russell
2013-01-16  3:31       ` Rusty Russell
2013-01-16  3:55       ` Wanlong Gao
2013-01-16  3:55         ` Wanlong Gao
2013-02-06 17:27         ` Paolo Bonzini
2013-02-06 17:27           ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50D078C8.208@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=asias@redhat.com \
    --cc=gaowanlong@cn.fujitsu.com \
    --cc=hutao@cn.fujitsu.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=nab@linux-iscsi.org \
    --cc=rusty@rustcorp.com.au \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.