All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhihong Wang <zhihong.wang@intel.com>
To: dev@dpdk.org
Cc: Zhihong Wang <zhihong.wang@intel.com>
Subject: [PATCH] optimize vhost enqueue
Date: Mon, 15 Aug 2016 23:50:02 -0400	[thread overview]
Message-ID: <1471319402-112998-1-git-send-email-zhihong.wang@intel.com> (raw)

This patch optimizes the vhost enqueue function: rte_vhost_enqueue_burst.

Currently there're 2 callbacks for vhost enqueue:
 *  virtio_dev_merge_rx for mrg_rxbuf turned on cases.
 *  virtio_dev_rx for mrg_rxbuf turned off cases.

The virtio_dev_merge_rx doesn't provide optimal performance, also it is
reported having compatibility issue working with Windows VMs.

Besides, having 2 separated functions increases maintenance efforts.

This patch uses a single function logic to replace the current 2 for
better maintainability, and provides better performance by optimizing
caching behavior especially for mrg_rxbuf turned on cases.

It also fixes the issue working with Windows VMs.

Signed-off-by: Zhihong Wang <zhihong.wang@intel.com>
---
 lib/librte_vhost/vhost-net.h  |   6 +-
 lib/librte_vhost/vhost_rxtx.c | 582 ++++++++++++++----------------------------
 lib/librte_vhost/virtio-net.c |  15 +-
 3 files changed, 208 insertions(+), 395 deletions(-)

diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 38593a2..a15182c 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -71,7 +71,7 @@ struct vhost_virtqueue {
 	uint32_t		size;
 
 	/* Last index used on the available ring */
-	volatile uint16_t	last_used_idx;
+	uint16_t		last_used_idx;
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 
@@ -85,6 +85,10 @@ struct vhost_virtqueue {
 
 	/* Physical address of used ring, for logging */
 	uint64_t		log_guest_addr;
+
+	/* Shadow used ring for performance */
+	struct vring_used_elem	*shadow_used_ring;
+	uint32_t		shadow_used_idx;
 } __rte_cache_aligned;
 
 /* Old kernels have no such macro defined */
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 08a73fd..1263168 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
 	return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
 }
 
-static void
+static inline void __attribute__((always_inline))
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
 	if (m_buf->ol_flags & PKT_TX_L4_MASK) {
@@ -125,427 +125,227 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 	}
 }
 
-static inline void
-copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
-		    struct virtio_net_hdr_mrg_rxbuf hdr)
-{
-	if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
-		*(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
-	else
-		*(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
-}
-
-static inline int __attribute__((always_inline))
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
-		  struct rte_mbuf *m, uint16_t desc_idx)
+uint16_t
+rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
+	struct rte_mbuf **pkts, uint16_t count)
 {
-	uint32_t desc_avail, desc_offset;
-	uint32_t mbuf_avail, mbuf_offset;
-	uint32_t cpy_len;
+	struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
+	struct vhost_virtqueue *vq;
 	struct vring_desc *desc;
-	uint64_t desc_addr;
-	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
-
-	desc = &vq->desc[desc_idx];
-	desc_addr = gpa_to_vva(dev, desc->addr);
-	/*
-	 * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-	 * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-	 * otherwise stores offset on the stack instead of in a register.
-	 */
-	if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
-		return -1;
-
-	rte_prefetch0((void *)(uintptr_t)desc_addr);
-
-	virtio_enqueue_offload(m, &virtio_hdr.hdr);
-	copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
-	vhost_log_write(dev, desc->addr, dev->vhost_hlen);
-	PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
-
-	desc_offset = dev->vhost_hlen;
-	desc_avail  = desc->len - dev->vhost_hlen;
-
-	mbuf_avail  = rte_pktmbuf_data_len(m);
-	mbuf_offset = 0;
-	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current mbuf, fetch next */
-		if (mbuf_avail == 0) {
-			m = m->next;
-
-			mbuf_offset = 0;
-			mbuf_avail  = rte_pktmbuf_data_len(m);
-		}
-
-		/* done with current desc buf, fetch next */
-		if (desc_avail == 0) {
-			if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
-				/* Room in vring buffer is not enough */
-				return -1;
-			}
-			if (unlikely(desc->next >= vq->size))
-				return -1;
-
-			desc = &vq->desc[desc->next];
-			desc_addr = gpa_to_vva(dev, desc->addr);
-			if (unlikely(!desc_addr))
-				return -1;
-
-			desc_offset = 0;
-			desc_avail  = desc->len;
-		}
-
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
-		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
-			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
-			cpy_len);
-		vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
-		PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-			     cpy_len, 0);
-
-		mbuf_avail  -= cpy_len;
-		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
-	}
-
-	return 0;
-}
+	struct virtio_net *dev;
+	struct rte_mbuf *mbuf;
+	uint64_t desc_host_write_addr = 0;
+	uint32_t desc_chain_head = 0;
+	uint32_t desc_chain_len = 0;
+	uint32_t desc_current = 0;
+	uint32_t desc_write_offset = 0;
+	uint32_t used_idx_static = 0;
+	uint32_t pkt_idx = 0;
+	uint32_t pkt_left = 0;
+	uint32_t pkt_sent = 0;
+	uint32_t mbuf_len = 0;
+	uint32_t mbuf_len_left = 0;
+	uint32_t copy_len = 0;
+	uint32_t copy_virtio_hdr = 0;
+	uint32_t is_mrg_rxbuf = 0;
+	uint32_t is_virtio_1 = 0;
+
+	if (unlikely(count == 0))
+		return 0;
 
-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when the mbuf is scattered, but
- * it doesn't support the mergeable feature.
- */
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
-	      struct rte_mbuf **pkts, uint32_t count)
-{
-	struct vhost_virtqueue *vq;
-	uint16_t avail_idx, free_entries, start_idx;
-	uint16_t desc_indexes[MAX_PKT_BURST];
-	uint16_t used_idx;
-	uint32_t i;
+	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
 
-	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
+	dev = get_device(vid);
+	if (unlikely(!dev))
 		return 0;
-	}
 
-	vq = dev->virtqueue[queue_id];
-	if (unlikely(vq->enabled == 0))
+	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb)))
 		return 0;
 
-	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-	start_idx = vq->last_used_idx;
-	free_entries = avail_idx - start_idx;
-	count = RTE_MIN(count, free_entries);
-	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-	if (count == 0)
+	vq = dev->virtqueue[queue_id];
+	if (unlikely(!vq->enabled))
 		return 0;
 
-	LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
-		dev->vid, start_idx, start_idx + count);
+	if (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
+		is_mrg_rxbuf = 1;
+
+	if (dev->features & (1ULL << VIRTIO_F_VERSION_1))
+		is_virtio_1 = 1;
+
+	pkt_idx = 0;
+	pkt_left = count;
+	used_idx_static = vq->last_used_idx & (vq->size - 1);
+	vq->shadow_used_idx = 0;
+
+	while (pkt_left > 0) {
+		if (unlikely(vq->avail->idx == vq->last_used_idx))
+			goto done;
+
+		if (pkt_left > 1 && vq->avail->idx != vq->last_used_idx + 1)
+			rte_prefetch0(&vq->desc[
+					vq->avail->ring[
+					(vq->last_used_idx + 1) &
+					(vq->size - 1)]]);
+
+		mbuf = pkts[pkt_idx];
+		mbuf_len = rte_pktmbuf_data_len(mbuf);
+		mbuf_len_left = mbuf_len;
+		pkt_idx++;
+		pkt_left--;
+
+		desc_chain_head = vq->avail->ring[(vq->last_used_idx) &
+			(vq->size - 1)];
+		desc_current = desc_chain_head;
+		desc = &vq->desc[desc_current];
+		desc_host_write_addr = gpa_to_vva(dev, desc->addr);
+		if (unlikely(!desc_host_write_addr))
+			goto done;
+
+		virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)
+			(uintptr_t)desc_host_write_addr;
+		copy_virtio_hdr = 1;
+
+		vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+		desc_write_offset = dev->vhost_hlen;
+		desc_chain_len = desc_write_offset;
+		desc_host_write_addr += desc_write_offset;
+
+		while (1) {
+			if (!mbuf_len_left) {
+				if (mbuf->next) {
+					mbuf = mbuf->next;
+					mbuf_len = rte_pktmbuf_data_len(mbuf);
+					mbuf_len_left = mbuf_len;
+				} else
+					break;
+			}
 
-	/* Retrieve all of the desc indexes first to avoid caching issues. */
-	rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
-	for (i = 0; i < count; i++) {
-		used_idx = (start_idx + i) & (vq->size - 1);
-		desc_indexes[i] = vq->avail->ring[used_idx];
-		vq->used->ring[used_idx].id = desc_indexes[i];
-		vq->used->ring[used_idx].len = pkts[i]->pkt_len +
-					       dev->vhost_hlen;
-		vhost_log_used_vring(dev, vq,
-			offsetof(struct vring_used, ring[used_idx]),
-			sizeof(vq->used->ring[used_idx]));
-	}
+			if (desc->len <= desc_write_offset) {
+				if (desc->flags & VRING_DESC_F_NEXT) {
+					desc_write_offset = 0;
+					desc_current = desc->next;
+					desc = &vq->desc[desc_current];
+					desc_host_write_addr =
+						gpa_to_vva(dev, desc->addr);
+					if (unlikely(!desc_host_write_addr))
+						goto rollback;
+				} else if (is_mrg_rxbuf) {
+					vq->shadow_used_ring[
+						vq->shadow_used_idx].id =
+						desc_chain_head;
+					vq->shadow_used_ring[
+						vq->shadow_used_idx].len =
+						desc_chain_len;
+					vq->shadow_used_idx++;
+					vq->last_used_idx++;
+					virtio_hdr->num_buffers++;
+					if (unlikely(vq->avail->idx ==
+							vq->last_used_idx))
+						goto rollback;
+
+					desc_chain_head = vq->avail->ring[
+						(vq->last_used_idx) &
+						(vq->size - 1)];
+					desc_current = desc_chain_head;
+					desc = &vq->desc[desc_current];
+					desc_host_write_addr =
+						gpa_to_vva(dev, desc->addr);
+					if (unlikely(!desc_host_write_addr))
+						goto rollback;
+
+					desc_chain_len = 0;
+					desc_write_offset = 0;
+				} else
+					goto rollback;
+			}
 
-	rte_prefetch0(&vq->desc[desc_indexes[0]]);
-	for (i = 0; i < count; i++) {
-		uint16_t desc_idx = desc_indexes[i];
-		int err;
+			copy_len = RTE_MIN(desc->len - desc_write_offset,
+					mbuf_len_left);
+			if (copy_virtio_hdr) {
+				copy_virtio_hdr = 0;
+				memset((void *)(uintptr_t)&(virtio_hdr->hdr),
+						0, dev->vhost_hlen);
+				virtio_enqueue_offload(mbuf,
+						&(virtio_hdr->hdr));
+				if (is_mrg_rxbuf || is_virtio_1)
+					virtio_hdr->num_buffers = 1;
+			}
 
-		err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx);
-		if (unlikely(err)) {
-			used_idx = (start_idx + i) & (vq->size - 1);
-			vq->used->ring[used_idx].len = dev->vhost_hlen;
-			vhost_log_used_vring(dev, vq,
-				offsetof(struct vring_used, ring[used_idx]),
-				sizeof(vq->used->ring[used_idx]));
+			rte_memcpy((void *)(uintptr_t)desc_host_write_addr,
+					rte_pktmbuf_mtod_offset(mbuf, void *,
+						mbuf_len - mbuf_len_left),
+					copy_len);
+			vhost_log_write(dev, desc->addr + desc_write_offset,
+					copy_len);
+			mbuf_len_left -= copy_len;
+			desc_write_offset += copy_len;
+			desc_host_write_addr += copy_len;
+			desc_chain_len += copy_len;
 		}
 
-		if (i + 1 < count)
-			rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+		vq->shadow_used_ring[vq->shadow_used_idx].id = desc_chain_head;
+		vq->shadow_used_ring[vq->shadow_used_idx].len = desc_chain_len;
+		vq->shadow_used_idx++;
+		vq->last_used_idx++;
+		pkt_sent++;
 	}
 
-	rte_smp_wmb();
-
-	*(volatile uint16_t *)&vq->used->idx += count;
-	vq->last_used_idx += count;
-	vhost_log_used_vring(dev, vq,
-		offsetof(struct vring_used, idx),
-		sizeof(vq->used->idx));
-
-	/* flush used->idx update before we read avail->flags. */
-	rte_mb();
-
-	/* Kick the guest if necessary. */
-	if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
-			&& (vq->callfd >= 0))
-		eventfd_write(vq->callfd, (eventfd_t)1);
-	return count;
-}
-
-static inline int
-fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
-	     uint32_t *allocated, uint32_t *vec_idx,
-	     struct buf_vector *buf_vec)
-{
-	uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
-	uint32_t vec_id = *vec_idx;
-	uint32_t len    = *allocated;
-
-	while (1) {
-		if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
-			return -1;
-
-		len += vq->desc[idx].len;
-		buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
-		buf_vec[vec_id].buf_len  = vq->desc[idx].len;
-		buf_vec[vec_id].desc_idx = idx;
-		vec_id++;
-
-		if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0)
-			break;
-
-		idx = vq->desc[idx].next;
-	}
-
-	*allocated = len;
-	*vec_idx   = vec_id;
-
-	return 0;
-}
-
-/*
- * Returns -1 on fail, 0 on success
- */
-static inline int
-reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
-			    uint16_t *end, struct buf_vector *buf_vec)
-{
-	uint16_t cur_idx;
-	uint16_t avail_idx;
-	uint32_t allocated = 0;
-	uint32_t vec_idx = 0;
-	uint16_t tries = 0;
-
-	cur_idx  = vq->last_used_idx;
-
-	while (1) {
-		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-		if (unlikely(cur_idx == avail_idx))
-			return -1;
-
-		if (unlikely(fill_vec_buf(vq, cur_idx, &allocated,
-					  &vec_idx, buf_vec) < 0))
-			return -1;
-
-		cur_idx++;
-		tries++;
-
-		if (allocated >= size)
-			break;
-
-		/*
-		 * if we tried all available ring items, and still
-		 * can't get enough buf, it means something abnormal
-		 * happened.
-		 */
-		if (unlikely(tries >= vq->size))
-			return -1;
-	}
-
-	*end = cur_idx;
-	return 0;
-}
-
-static inline uint32_t __attribute__((always_inline))
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			    uint16_t end_idx, struct rte_mbuf *m,
-			    struct buf_vector *buf_vec)
-{
-	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
-	uint32_t vec_idx = 0;
-	uint16_t start_idx = vq->last_used_idx;
-	uint16_t cur_idx = start_idx;
-	uint64_t desc_addr;
-	uint32_t mbuf_offset, mbuf_avail;
-	uint32_t desc_offset, desc_avail;
-	uint32_t cpy_len;
-	uint16_t desc_idx, used_idx;
-
-	if (unlikely(m == NULL))
-		return 0;
-
-	LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
-		dev->vid, cur_idx, end_idx);
-
-	desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
-	if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr)
-		return 0;
-
-	rte_prefetch0((void *)(uintptr_t)desc_addr);
-
-	virtio_hdr.num_buffers = end_idx - start_idx;
-	LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
-		dev->vid, virtio_hdr.num_buffers);
-
-	virtio_enqueue_offload(m, &virtio_hdr.hdr);
-	copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
-	vhost_log_write(dev, buf_vec[vec_idx].buf_addr, dev->vhost_hlen);
-	PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
-
-	desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
-	desc_offset = dev->vhost_hlen;
-
-	mbuf_avail  = rte_pktmbuf_data_len(m);
-	mbuf_offset = 0;
-	while (mbuf_avail != 0 || m->next != NULL) {
-		/* done with current desc buf, get the next one */
-		if (desc_avail == 0) {
-			desc_idx = buf_vec[vec_idx].desc_idx;
-
-			if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) {
-				/* Update used ring with desc information */
-				used_idx = cur_idx++ & (vq->size - 1);
-				vq->used->ring[used_idx].id  = desc_idx;
-				vq->used->ring[used_idx].len = desc_offset;
-				vhost_log_used_vring(dev, vq,
+done:
+	if (likely(vq->shadow_used_idx > 0)) {
+		if (used_idx_static + vq->shadow_used_idx < vq->size) {
+			rte_memcpy(&vq->used->ring[used_idx_static],
+					&vq->shadow_used_ring[0],
+					vq->shadow_used_idx *
+					sizeof(struct vring_used_elem));
+			vhost_log_used_vring(dev, vq,
 					offsetof(struct vring_used,
-						 ring[used_idx]),
-					sizeof(vq->used->ring[used_idx]));
-			}
-
-			vec_idx++;
-			desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
-			if (unlikely(!desc_addr))
-				return 0;
-
-			/* Prefetch buffer address. */
-			rte_prefetch0((void *)(uintptr_t)desc_addr);
-			desc_offset = 0;
-			desc_avail  = buf_vec[vec_idx].buf_len;
-		}
-
-		/* done with current mbuf, get the next one */
-		if (mbuf_avail == 0) {
-			m = m->next;
+						ring[used_idx_static]),
+					vq->shadow_used_idx *
+					sizeof(struct vring_used_elem));
+		} else {
+			uint32_t part_1 = vq->size - used_idx_static;
+			uint32_t part_2 = vq->shadow_used_idx - part_1;
 
-			mbuf_offset = 0;
-			mbuf_avail  = rte_pktmbuf_data_len(m);
+			rte_memcpy(&vq->used->ring[used_idx_static],
+					&vq->shadow_used_ring[0],
+					part_1 *
+					sizeof(struct vring_used_elem));
+			vhost_log_used_vring(dev, vq,
+					offsetof(struct vring_used,
+						ring[used_idx_static]),
+					part_1 *
+					sizeof(struct vring_used_elem));
+			rte_memcpy(&vq->used->ring[0],
+					&vq->shadow_used_ring[part_1],
+					part_2 *
+					sizeof(struct vring_used_elem));
+			vhost_log_used_vring(dev, vq,
+					offsetof(struct vring_used,
+						ring[0]),
+					part_2 *
+					sizeof(struct vring_used_elem));
 		}
-
-		cpy_len = RTE_MIN(desc_avail, mbuf_avail);
-		rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
-			rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
-			cpy_len);
-		vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset,
-			cpy_len);
-		PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-			cpy_len, 0);
-
-		mbuf_avail  -= cpy_len;
-		mbuf_offset += cpy_len;
-		desc_avail  -= cpy_len;
-		desc_offset += cpy_len;
 	}
 
-	used_idx = cur_idx & (vq->size - 1);
-	vq->used->ring[used_idx].id = buf_vec[vec_idx].desc_idx;
-	vq->used->ring[used_idx].len = desc_offset;
+	rte_smp_wmb();
+	vq->used->idx = vq->last_used_idx;
 	vhost_log_used_vring(dev, vq,
-		offsetof(struct vring_used, ring[used_idx]),
-		sizeof(vq->used->ring[used_idx]));
-
-	return end_idx - start_idx;
-}
-
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
-	struct rte_mbuf **pkts, uint32_t count)
-{
-	struct vhost_virtqueue *vq;
-	uint32_t pkt_idx = 0, nr_used = 0;
-	uint16_t end;
-	struct buf_vector buf_vec[BUF_VECTOR_MAX];
-
-	LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-	if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
-		RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-			dev->vid, __func__, queue_id);
-		return 0;
-	}
-
-	vq = dev->virtqueue[queue_id];
-	if (unlikely(vq->enabled == 0))
-		return 0;
-
-	count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-	if (count == 0)
-		return 0;
-
-	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
-
-		if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
-							 &end, buf_vec) < 0)) {
-			LOG_DEBUG(VHOST_DATA,
-				"(%d) failed to get enough desc from vring\n",
-				dev->vid);
-			break;
-		}
-
-		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end,
-						      pkts[pkt_idx], buf_vec);
-		rte_smp_wmb();
-
-		*(volatile uint16_t *)&vq->used->idx += nr_used;
-		vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+			offsetof(struct vring_used, idx),
 			sizeof(vq->used->idx));
-		vq->last_used_idx += nr_used;
-	}
-
-	if (likely(pkt_idx)) {
-		/* flush used->idx update before we read avail->flags. */
-		rte_mb();
-
-		/* Kick the guest if necessary. */
+	rte_mb();
+	if (likely(pkt_sent)) {
 		if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
 				&& (vq->callfd >= 0))
 			eventfd_write(vq->callfd, (eventfd_t)1);
 	}
 
-	return pkt_idx;
-}
-
-uint16_t
-rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
-	struct rte_mbuf **pkts, uint16_t count)
-{
-	struct virtio_net *dev = get_device(vid);
+	return pkt_sent;
 
-	if (!dev)
-		return 0;
+rollback:
+	if (is_mrg_rxbuf || is_virtio_1)
+		vq->last_used_idx -= virtio_hdr->num_buffers - 1;
 
-	if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-		return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-	else
-		return virtio_dev_rx(dev, queue_id, pkts, count);
+	goto done;
 }
 
 static void
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 1785695..87d09fa 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -152,10 +152,14 @@ cleanup_device(struct virtio_net *dev, int destroy)
 static void
 free_device(struct virtio_net *dev)
 {
+	struct vhost_virtqueue *vq;
 	uint32_t i;
 
-	for (i = 0; i < dev->virt_qp_nb; i++)
-		rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
+	for (i = 0; i < dev->virt_qp_nb; i++) {
+		vq = dev->virtqueue[i * VIRTIO_QNUM];
+		rte_free(vq->shadow_used_ring);
+		rte_free(vq);
+	}
 
 	rte_free(dev);
 }
@@ -418,13 +422,18 @@ int
 vhost_set_vring_num(int vid, struct vhost_vring_state *state)
 {
 	struct virtio_net *dev;
+	struct vhost_virtqueue *vq;
 
 	dev = get_device(vid);
 	if (dev == NULL)
 		return -1;
 
 	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
-	dev->virtqueue[state->index]->size = state->num;
+	vq = dev->virtqueue[state->index];
+	vq->size = state->num;
+	vq->shadow_used_ring = rte_malloc("",
+			vq->size * sizeof(struct vring_used_elem),
+			RTE_CACHE_LINE_SIZE);
 
 	return 0;
 }
-- 
2.7.4

             reply	other threads:[~2016-08-16 10:58 UTC|newest]

Thread overview: 141+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-16  3:50 Zhihong Wang [this message]
2016-08-16 13:59 ` [PATCH] optimize vhost enqueue Maxime Coquelin
2016-08-17  1:45   ` Wang, Zhihong
2016-08-17  2:38     ` Yuanhan Liu
2016-08-17  6:41       ` Wang, Zhihong
2016-08-17  9:17         ` Maxime Coquelin
2016-08-17  9:51           ` Yuanhan Liu
2016-08-18 13:44             ` Wang, Zhihong
2016-08-17 10:07           ` Wang, Zhihong
2016-08-18  6:33 ` [PATCH v2 0/6] vhost: optimize enqueue Zhihong Wang
2016-08-18  6:33   ` [PATCH v2 1/6] vhost: rewrite enqueue Zhihong Wang
2016-08-19  2:39     ` Yuanhan Liu
2016-08-19  7:07       ` Wang, Zhihong
2016-08-18  6:33   ` [PATCH v2 2/6] vhost: remove obsolete Zhihong Wang
2016-08-19  2:32     ` Yuanhan Liu
2016-08-19  7:08       ` Wang, Zhihong
2016-08-18  6:33   ` [PATCH v2 3/6] vhost: remove useless volatile Zhihong Wang
2016-08-18  6:33   ` [PATCH v2 4/6] vhost: add desc prefetch Zhihong Wang
2016-08-18  6:33   ` [PATCH v2 5/6] vhost: batch update used ring Zhihong Wang
2016-08-18  6:33   ` [PATCH v2 6/6] vhost: optimize cache access Zhihong Wang
2016-08-19  5:43 ` [PATCH v3 0/5] vhost: optimize enqueue Zhihong Wang
2016-08-19  5:43   ` [PATCH v3 1/5] vhost: rewrite enqueue Zhihong Wang
2016-08-22  9:35     ` Maxime Coquelin
2016-08-23  2:27       ` Wang, Zhihong
2016-08-25  4:00       ` Yuanhan Liu
2016-08-19  5:43   ` [PATCH v3 2/5] vhost: remove useless volatile Zhihong Wang
2016-08-19  5:43   ` [PATCH v3 3/5] vhost: add desc prefetch Zhihong Wang
2016-08-19  5:43   ` [PATCH v3 4/5] vhost: batch update used ring Zhihong Wang
2016-08-25  3:48     ` Yuanhan Liu
2016-08-25  5:19       ` Wang, Zhihong
2016-08-19  5:43   ` [PATCH v3 5/5] vhost: optimize cache access Zhihong Wang
2016-08-22  8:11   ` [PATCH v3 0/5] vhost: optimize enqueue Maxime Coquelin
2016-08-22 10:01     ` Maxime Coquelin
2016-08-22 10:35       ` Thomas Monjalon
2016-08-24  3:37         ` Wang, Zhihong
2016-08-23  2:31       ` Wang, Zhihong
2016-08-23 10:43         ` Wang, Zhihong
2016-08-23 12:16           ` Maxime Coquelin
2016-08-23 12:22           ` Yuanhan Liu
2016-08-23  2:15     ` Wang, Zhihong
2016-09-21  8:50     ` Jianbo Liu
2016-09-21  9:27       ` Wang, Zhihong
2016-09-21 12:54         ` Jianbo Liu
2016-09-22  2:11           ` Wang, Zhihong
2016-09-22  2:29           ` Yuanhan Liu
2016-09-22  5:47             ` Jianbo Liu
2016-09-22  6:58               ` Wang, Zhihong
2016-09-22  9:01                 ` Jianbo Liu
2016-09-22 10:04                   ` Wang, Zhihong
2016-09-22 14:41                     ` Jianbo Liu
2016-09-23  2:56                       ` Wang, Zhihong
2016-09-23 10:41                         ` Jianbo Liu
2016-09-23 13:41                           ` Thomas Monjalon
2016-09-25  5:41                             ` Wang, Zhihong
2016-09-26  5:12                               ` Jianbo Liu
2016-09-26  5:25                                 ` Wang, Zhihong
2016-09-26  5:38                                   ` Jianbo Liu
2016-09-26  6:00                                     ` Wang, Zhihong
2016-09-26  4:24                             ` Jianbo Liu
2016-09-26  5:37                   ` Luke Gorrie
2016-09-26  5:40                     ` Jianbo Liu
2016-09-27 10:21                   ` Yuanhan Liu
2016-09-27 16:45                     ` Wang, Zhihong
2016-10-09 12:09                       ` Wang, Zhihong
2016-10-10  2:44                         ` Yuanhan Liu
2016-10-10  5:31                           ` Jianbo Liu
2016-10-10  6:22                             ` Wang, Zhihong
2016-10-10  6:57                               ` Jianbo Liu
2016-10-10  7:25                                 ` Wang, Zhihong
2016-10-12  2:53               ` Yuanhan Liu
2016-10-12 12:22                 ` Wang, Zhihong
2016-10-12 15:31                   ` Thomas Monjalon
2016-10-13  1:21                     ` Wang, Zhihong
2016-10-13  3:51                     ` Jianbo Liu
2016-10-13  5:33                   ` Yuanhan Liu
2016-10-13  5:35                     ` Yuanhan Liu
2016-10-13  6:02                     ` Wang, Zhihong
2016-10-13  7:54                       ` Maxime Coquelin
2016-10-13  9:23                         ` Maxime Coquelin
2016-10-14 10:11                           ` Yuanhan Liu
2016-08-30  3:35 ` [PATCH v4 0/6] " Zhihong Wang
2016-08-30  3:35   ` [PATCH v4 1/6] vhost: fix windows vm hang Zhihong Wang
2016-09-05  5:24     ` [dpdk-stable] " Yuanhan Liu
2016-09-05  5:25       ` Wang, Zhihong
2016-09-05  5:40         ` Yuanhan Liu
2016-08-30  3:36   ` [PATCH v4 2/6] vhost: rewrite enqueue Zhihong Wang
2016-09-05  6:39     ` Yuanhan Liu
2016-09-07  5:33       ` Yuanhan Liu
2016-09-07  5:39         ` Wang, Zhihong
2016-08-30  3:36   ` [PATCH v4 3/6] vhost: remove useless volatile Zhihong Wang
2016-08-30  3:36   ` [PATCH v4 4/6] vhost: add desc prefetch Zhihong Wang
2016-08-30  3:36   ` [PATCH v4 5/6] vhost: batch update used ring Zhihong Wang
2016-08-30  3:36   ` [PATCH v4 6/6] vhost: optimize cache access Zhihong Wang
2016-09-09  3:39 ` [PATCH v5 0/6] vhost: optimize enqueue Zhihong Wang
2016-09-09  3:39   ` [PATCH v5 1/6] vhost: fix windows vm hang Zhihong Wang
2016-09-09  3:39   ` [PATCH v5 2/6] vhost: rewrite enqueue Zhihong Wang
2016-09-12 15:42     ` Maxime Coquelin
2016-09-14  8:20       ` Wang, Zhihong
2016-09-15 16:35         ` Maxime Coquelin
2016-09-12 16:26     ` Maxime Coquelin
2016-09-14  8:22       ` Wang, Zhihong
2016-09-18 14:19     ` Yuanhan Liu
2016-09-19  3:29       ` Wang, Zhihong
2016-09-09  3:39   ` [PATCH v5 3/6] vhost: remove useless volatile Zhihong Wang
2016-09-09  3:39   ` [PATCH v5 4/6] vhost: add desc prefetch Zhihong Wang
2016-09-09  3:39   ` [PATCH v5 5/6] vhost: batch update used ring Zhihong Wang
2016-09-12 15:45     ` Maxime Coquelin
2016-09-14  8:43       ` Wang, Zhihong
2016-09-15 16:38         ` Maxime Coquelin
2016-09-18  2:55           ` Yuanhan Liu
2016-09-18  2:57             ` Wang, Zhihong
2016-09-09  3:39   ` [PATCH v5 6/6] vhost: optimize cache access Zhihong Wang
2016-09-12 13:52   ` [PATCH v5 0/6] vhost: optimize enqueue Maxime Coquelin
2016-09-12 13:56     ` Maxime Coquelin
2016-09-12 14:01     ` Yuanhan Liu
2016-09-20  2:00 ` [PATCH v6 " Zhihong Wang
2016-09-20  2:00   ` [PATCH v6 1/6] vhost: fix windows vm hang Zhihong Wang
2016-10-13  6:18     ` [dpdk-stable] " Yuanhan Liu
2016-09-20  2:00   ` [PATCH v6 2/6] vhost: rewrite enqueue Zhihong Wang
2016-09-22  9:58     ` Jianbo Liu
2016-09-22 10:13       ` Wang, Zhihong
2016-09-20  2:00   ` [PATCH v6 3/6] vhost: remove useless volatile Zhihong Wang
2016-09-20  2:00   ` [PATCH v6 4/6] vhost: add desc prefetch Zhihong Wang
2016-09-20  2:00   ` [PATCH v6 5/6] vhost: batch update used ring Zhihong Wang
2016-09-20  2:00   ` [PATCH v6 6/6] vhost: optimize cache access Zhihong Wang
2016-09-21  4:32     ` Maxime Coquelin
2016-09-21  2:26   ` [PATCH v6 0/6] vhost: optimize enqueue Yuanhan Liu
2016-09-21  4:39     ` Maxime Coquelin
2016-10-14  9:34   ` [PATCH v7 0/7] vhost: optimize mergeable Rx path Yuanhan Liu
2016-10-14  9:34     ` [PATCH v7 1/7] vhost: remove useless volatile Yuanhan Liu
2016-10-14  9:34     ` [PATCH v7 2/7] vhost: optimize cache access Yuanhan Liu
2016-10-14  9:34     ` [PATCH v7 3/7] vhost: simplify mergeable Rx vring reservation Yuanhan Liu
2016-10-25 22:08       ` Thomas Monjalon
2016-10-26  2:56         ` Yuanhan Liu
2016-10-14  9:34     ` [PATCH v7 4/7] vhost: use last avail idx for avail ring reservation Yuanhan Liu
2016-10-14  9:34     ` [PATCH v7 5/7] vhost: shadow used ring update Yuanhan Liu
2016-10-14  9:34     ` [PATCH v7 6/7] vhost: prefetch avail ring Yuanhan Liu
2016-10-14  9:34     ` [PATCH v7 7/7] vhost: retrieve avail head once Yuanhan Liu
2016-10-18  2:25     ` [PATCH v7 0/7] vhost: optimize mergeable Rx path Jianbo Liu
2016-10-18 14:53     ` Maxime Coquelin
2016-10-21  7:51     ` Yuanhan Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1471319402-112998-1-git-send-email-zhihong.wang@intel.com \
    --to=zhihong.wang@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.