All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: Jens Freimann <jfreimann@redhat.com>, dev@dpdk.org
Cc: tiwei.bie@intel.com, yliu@fridaylinux.org, mst@redhat.com
Subject: Re: [PATCH 12/14] vhost: dequeue for packed queues
Date: Thu, 1 Feb 2018 10:35:18 +0100	[thread overview]
Message-ID: <76b94d32-9521-35b7-c7a2-9aba217b01ba@redhat.com> (raw)
In-Reply-To: <20180129141143.13437-13-jfreimann@redhat.com>



On 01/29/2018 03:11 PM, Jens Freimann wrote:
> Implement code to dequeue and process descriptors from
> the vring if VIRTIO_F_PACKED is enabled.
> 
> Check if descriptor was made available by driver by looking at
> VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
> the used flag VIRTIO_F_DESC_USED to the current value of the
> used wrap counter.
> 
> Used ring wrap counter needs to be toggled when last descriptor is
> written out. This allows the host/guest to detect new descriptors even
> after the ring has wrapped.
> 
> Signed-off-by: Jens Freimann <jfreimann@redhat.com>
> ---
>   lib/librte_vhost/vhost.c      |   1 +
>   lib/librte_vhost/vhost.h      |   1 +
>   lib/librte_vhost/virtio_net.c | 194 ++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 196 insertions(+)
> 
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 78913912c..e5f58d9c8 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -191,6 +191,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
>   
>   	vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
>   	vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
> +	vq->used_wrap_counter = 1;
>   
>   	vhost_user_iotlb_init(dev, vring_idx);
>   	/* Backends are set to -1 indicating an inactive device. */
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 8554d51d8..a3d4214b6 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -106,6 +106,7 @@ struct vhost_virtqueue {
>   
>   	struct batch_copy_elem	*batch_copy_elems;
>   	uint16_t		batch_copy_nb_elems;
> +	uint32_t		used_wrap_counter;
>   
>   	rte_rwlock_t	iotlb_lock;
>   	rte_rwlock_t	iotlb_pending_lock;
> diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
> index edfab3ba6..5d4cfe8cc 100644
> --- a/lib/librte_vhost/virtio_net.c
> +++ b/lib/librte_vhost/virtio_net.c
> @@ -19,6 +19,7 @@
>   
>   #include "iotlb.h"
>   #include "vhost.h"
> +#include "virtio-1.1.h"
>   
>   #define MAX_PKT_BURST 32
>   
> @@ -1111,6 +1112,199 @@ restore_mbuf(struct rte_mbuf *m)
>   	}
>   }
>   
> +static inline uint16_t
> +dequeue_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +	     struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
> +	     struct vring_desc_1_1 *descs)
> +{
> +	struct vring_desc_1_1 *desc;
> +	uint64_t desc_addr;
> +	uint32_t desc_avail, desc_offset;
> +	uint32_t mbuf_avail, mbuf_offset;
> +	uint32_t cpy_len;
> +	struct rte_mbuf *cur = m, *prev = m;
> +	struct virtio_net_hdr *hdr = NULL;
> +	uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
> +	int wrap_counter = vq->used_wrap_counter;
> +
> +	desc = &descs[vq->last_used_idx & (vq->size - 1)];
> +	if (unlikely((desc->len < dev->vhost_hlen)) ||
> +			(desc->flags & VRING_DESC_F_INDIRECT))
> +		rte_panic("INDIRECT not supported yet");

Using rte_panic() may not be a good idea here, because a malicious guest
could make the vswitch to crash easily.

> +
> +	desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);

You should use vhost_iova_to_vva() here and everywhere else, otherwise
you break IOMMU support.

> +	if (unlikely(!desc_addr))
> +		return -1;
> +
> +	if (virtio_net_with_host_offload(dev)) {
> +		hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
> +		rte_prefetch0(hdr);
> +	}
> +
> +	/*
> +	 * A virtio driver normally uses at least 2 desc buffers
> +	 * for Tx: the first for storing the header, and others
> +	 * for storing the data.
> +	 */
> +	if (likely((desc->len == dev->vhost_hlen) &&
> +		   (desc->flags & VRING_DESC_F_NEXT) != 0)) {
> +		if ((++vq->last_used_idx & (vq->size - 1)) == 0)
> +			toggle_wrap_counter(vq);
> +
> +		desc = &descs[vq->last_used_idx & (vq->size - 1)];
> +
> +		if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
> +			rte_panic("INDIRECT not supported yet");
Ditto.

> +
> +		desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
> +		if (unlikely(!desc_addr))
> +			return -1;
> +
> +		desc_offset = 0;
> +		desc_avail  = desc->len;
> +	} else {
> +		desc_avail  = desc->len - dev->vhost_hlen;
> +		desc_offset = dev->vhost_hlen;
> +	}
> +
> +	rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
> +
> +	PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
> +
> +	mbuf_offset = 0;
> +	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
> +	while (1) {
> +		uint64_t hpa;
> +
> +		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
> +
> +		/*
> +		 * A desc buf might across two host physical pages that are
> +		 * not continuous. In such case (gpa_to_hpa returns 0), data
> +		 * will be copied even though zero copy is enabled.
> +		 */
> +		if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
> +					desc->addr + desc_offset, cpy_len)))) {
> +			cur->data_len = cpy_len;
> +			cur->data_off = 0;
> +			cur->buf_addr = (void *)(uintptr_t)desc_addr;
> +			cur->buf_physaddr = hpa;
> +
> +			/*
> +			 * In zero copy mode, one mbuf can only reference data
> +			 * for one or partial of one desc buff.
> +			 */
> +			mbuf_avail = cpy_len;
> +		} else {
> +			rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
> +							   mbuf_offset),
> +				(void *)((uintptr_t)(desc_addr + desc_offset)),
> +				cpy_len);
> +		}
> +
> +		mbuf_avail  -= cpy_len;
> +		mbuf_offset += cpy_len;
> +		desc_avail  -= cpy_len;
> +		desc_offset += cpy_len;
> +
> +		/* This desc reaches to its end, get the next one */
> +		if (desc_avail == 0) {
> +			if ((desc->flags & VRING_DESC_F_NEXT) == 0)
> +				break;
> +
> +			if ((++vq->last_used_idx & (vq->size - 1)) == 0)
> +				toggle_wrap_counter(vq);
> +
> +			desc = &descs[vq->last_used_idx & (vq->size - 1)];
> +			if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
> +				rte_panic("INDIRECT not supported yet");
> +
> +			desc_addr = rte_vhost_gpa_to_vva(dev->mem, desc->addr);
> +			if (unlikely(!desc_addr))
> +				return -1;
> +
> +			rte_prefetch0((void *)(uintptr_t)desc_addr);
> +
> +			desc_offset = 0;
> +			desc_avail  = desc->len;
> +
> +			PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
> +		}
> +
> +		/*
> +		 * This mbuf reaches to its end, get a new one
> +		 * to hold more data.
> +		 */
> +		if (mbuf_avail == 0) {
> +			cur = rte_pktmbuf_alloc(mbuf_pool);
> +			if (unlikely(cur == NULL)) {
> +				RTE_LOG(ERR, VHOST_DATA, "Failed to "
> +					"allocate memory for mbuf.\n");
> +				return -1;
> +			}
> +
> +			prev->next = cur;
> +			prev->data_len = mbuf_offset;
> +			m->nb_segs += 1;
> +			m->pkt_len += mbuf_offset;
> +			prev = cur;
> +
> +			mbuf_offset = 0;
> +			mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
> +		}
> +	}
> +
> +	if (hdr)
> +		vhost_dequeue_offload(hdr, m);
> +
> +	if ((++vq->last_used_idx & (vq->size - 1)) == 0)
> +		toggle_wrap_counter(vq);
> +
> +	rte_smp_wmb();
> +	_set_desc_used(&descs[head_idx], wrap_counter);
> +
> +	prev->data_len = mbuf_offset;
> +	m->pkt_len    += mbuf_offset;
> +
> +	return 0;
> +}
> +
> +static inline uint16_t
> +vhost_dequeue_burst_1_1(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +			struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
> +			uint16_t count)
> +{
> +	uint16_t i;
> +	uint16_t idx;
> +	struct vring_desc_1_1 *desc = vq->desc_1_1;
> +	int err;
> +
> +	count = RTE_MIN(MAX_PKT_BURST, count);
> +	for (i = 0; i < count; i++) {
> +		idx = vq->last_used_idx & (vq->size - 1);
> +		if (!desc_is_avail(vq, &desc[idx]))
> +			break;
> +		rte_smp_rmb();
> +
> +		pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
> +		if (unlikely(pkts[i] == NULL)) {
> +			RTE_LOG(ERR, VHOST_DATA,
> +				"Failed to allocate memory for mbuf.\n");
> +			break;
> +		}
> +
> +		err = dequeue_desc(dev, vq, mbuf_pool, pkts[i], desc);
> +		if (unlikely(err)) {
> +			rte_pktmbuf_free(pkts[i]);
> +			break;
> +		}
> +	}
> +
> +	rte_spinlock_unlock(&vq->access_lock);

Where is it locked? It looks unbalanced.

> +
> +	return i;
> +}
> +
>   uint16_t
>   rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>   	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
> 

  reply	other threads:[~2018-02-01  9:35 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-29 14:11 [PATCH 00/14] implement packed virtqueues Jens Freimann
2018-01-29 14:11 ` [PATCH 01/14] net/virtio: vring init for packed queues Jens Freimann
2018-01-29 14:11 ` [PATCH 02/14] net/virtio: don't call virtio_disable_intr() " Jens Freimann
2018-01-29 14:11 ` [PATCH 03/14] net/virtio: add virtio 1.1 defines Jens Freimann
2018-01-29 14:11 ` [PATCH 04/14] net/virtio: add packed virtqueue helpers Jens Freimann
2018-01-29 14:11 ` [PATCH 05/14] net/virtio: don't dump split virtqueue data Jens Freimann
2018-01-29 14:11 ` [PATCH 06/14] net/virtio-user: add option to use packed queues Jens Freimann
2018-01-29 14:11 ` [PATCH 07/14] net/virtio: implement transmit path for " Jens Freimann
2018-02-12 13:16   ` Jason Wang
2018-02-13  9:10     ` Jens Freimann
2018-01-29 14:11 ` [PATCH 08/14] net/virtio: implement receive " Jens Freimann
2018-01-29 14:11 ` [PATCH 09/14] vhost: add virtio 1.1 defines Jens Freimann
2018-01-29 14:24   ` Yuanhan Liu
2018-01-29 14:28     ` Jens Freimann
2018-01-29 14:11 ` [PATCH 10/14] vhost: vring address setup for packed queues Jens Freimann
2018-02-01  9:16   ` Maxime Coquelin
2018-01-29 14:11 ` [PATCH 11/14] vhost: add helpers for packed virtqueues Jens Freimann
2018-02-01  9:20   ` Maxime Coquelin
2018-01-29 14:11 ` [PATCH 12/14] vhost: dequeue for packed queues Jens Freimann
2018-02-01  9:35   ` Maxime Coquelin [this message]
2018-02-01 10:23     ` Jens Freimann
2018-01-29 14:11 ` [PATCH 13/14] vhost: packed queue enqueue path Jens Freimann
2018-01-29 14:11 ` [PATCH 14/14] vhost: enable packed virtqueues Jens Freimann
2018-01-30 10:45   ` Jens Freimann
2018-02-06  6:20 ` [PATCH 00/14] implement " Jason Wang
2018-02-06 13:58   ` Jens Freimann
2018-02-07  8:37     ` Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=76b94d32-9521-35b7-c7a2-9aba217b01ba@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=dev@dpdk.org \
    --cc=jfreimann@redhat.com \
    --cc=mst@redhat.com \
    --cc=tiwei.bie@intel.com \
    --cc=yliu@fridaylinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.