All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] vhost: remove internal lockless enqueue
@ 2016-06-13 11:52 Huawei Xie
  2016-06-14 14:07 ` Yuanhan Liu
  0 siblings, 1 reply; 2+ messages in thread
From: Huawei Xie @ 2016-06-13 11:52 UTC (permalink / raw)
  To: dev
  Cc: yuanhan.liu, jianfeng.tan, mukawa, kevin.traynor, haifeng.lin,
	Huawei Xie

All other DPDK PMDs doesn't support concurrent receiving or sending
packets to the same queue. The upper application should deal with
this, normally through queue and core bindings.

Due to historical reason, vhost internally supports concurrent lockless
enqueuing packets to the same virtio queue through costly cmpset operation.
This patch removes this internal lockless implementation and should improve
performance a bit.

Luckily DPDK OVS doesn't rely on this behavior.

Signed-off-by: Huawei Xie <huawei.xie@intel.com>
---
 doc/guides/rel_notes/release_16_07.rst |   3 +
 lib/librte_vhost/rte_virtio_net.h      |   3 +-
 lib/librte_vhost/vhost_rxtx.c          | 106 +++++++--------------------------
 lib/librte_vhost/virtio-net.c          |   1 -
 4 files changed, 24 insertions(+), 89 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index 30e78d4..e96250f 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -112,6 +112,9 @@ API Changes
    * Add a short 1-2 sentence description of the API change. Use fixed width
      quotes for ``rte_function_names`` or ``rte_struct_names``. Use the past tense.
 
+* The function ``rte_vhost_enqueue_burst`` no longer supports concurrent enqueuing
+  packets to the same queue.
+
 * The following counters are removed from ``rte_eth_stats`` structure:
   ibadcrc, ibadlen, imcasts, fdirmatch, fdirmiss,
   tx_pause_xon, rx_pause_xon, tx_pause_xoff, rx_pause_xoff.
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 600b20b..dbba24e 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -88,7 +88,6 @@ struct vhost_virtqueue {
 	uint32_t		backend;		/**< Backend value to determine if device should started/stopped. */
 	uint16_t		vhost_hlen;		/**< Vhost header length (varies depending on RX merge buffers. */
 	volatile uint16_t	last_used_idx;		/**< Last index used on the available ring */
-	volatile uint16_t	last_used_idx_res;	/**< Used for multiple devices reserving buffers. */
 #define VIRTIO_INVALID_EVENTFD		(-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD	(-2)
 	int			callfd;			/**< Used to notify the guest (trigger interrupt). */
@@ -192,7 +191,7 @@ rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
 	if (!vq->enabled)
 		return 0;
 
-	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res;
+	return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
 }
 
 /**
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 750821a..3fa75cb 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -205,49 +205,6 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
 	return 0;
 }
 
-/*
- * As many data cores may want to access available buffers
- * they need to be reserved.
- */
-static inline uint32_t
-reserve_avail_buf(struct vhost_virtqueue *vq, uint32_t count,
-		  uint16_t *start, uint16_t *end)
-{
-	uint16_t res_start_idx;
-	uint16_t res_end_idx;
-	uint16_t avail_idx;
-	uint16_t free_entries;
-	int success;
-
-	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-
-again:
-	res_start_idx = vq->last_used_idx_res;
-	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-
-	free_entries = avail_idx - res_start_idx;
-	count = RTE_MIN(count, free_entries);
-	if (count == 0)
-		return 0;
-
-	res_end_idx = res_start_idx + count;
-
-	/*
-	 * update vq->last_used_idx_res atomically; try again if failed.
-	 *
-	 * TODO: Allow to disable cmpset if no concurrency in application.
-	 */
-	success = rte_atomic16_cmpset(&vq->last_used_idx_res,
-				      res_start_idx, res_end_idx);
-	if (unlikely(!success))
-		goto again;
-
-	*start = res_start_idx;
-	*end   = res_end_idx;
-
-	return count;
-}
-
 /**
  * This function adds buffers to the virtio devices RX virtqueue. Buffers can
  * be received from the physical port or from another virtio device. A packet
@@ -260,7 +217,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	      struct rte_mbuf **pkts, uint32_t count)
 {
 	struct vhost_virtqueue *vq;
-	uint16_t res_start_idx, res_end_idx;
+	uint16_t avail_idx, free_entries, res_start_idx;
 	uint16_t desc_indexes[MAX_PKT_BURST];
 	uint32_t i;
 
@@ -276,13 +233,19 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 	if (unlikely(vq->enabled == 0))
 		return 0;
 
-	count = reserve_avail_buf(vq, count, &res_start_idx, &res_end_idx);
+
+	count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
+	avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+	res_start_idx = vq->last_used_idx;
+	free_entries = avail_idx - res_start_idx;
+	count = RTE_MIN(count, free_entries);
+
 	if (count == 0)
 		return 0;
 
 	LOG_DEBUG(VHOST_DATA,
 		"(%"PRIu64") res_start_idx %d| res_end_idx Index %d\n",
-		dev->device_fh, res_start_idx, res_end_idx);
+		dev->device_fh, res_start_idx, res_start_idx + count);
 
 	/* Retrieve all of the desc indexes first to avoid caching issues. */
 	rte_prefetch0(&vq->avail->ring[res_start_idx & (vq->size - 1)]);
@@ -315,12 +278,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 
 	rte_smp_wmb();
 
-	/* Wait until it's our turn to add our buffer to the used ring. */
-	while (unlikely(vq->last_used_idx != res_start_idx))
-		rte_pause();
-
 	*(volatile uint16_t *)&vq->used->idx += count;
-	vq->last_used_idx = res_end_idx;
+	vq->last_used_idx += count;
 	vhost_log_used_vring(dev, vq,
 		offsetof(struct vring_used, idx),
 		sizeof(vq->used->idx));
@@ -366,29 +325,20 @@ fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
 }
 
 /*
- * As many data cores may want to access available buffers concurrently,
- * they need to be reserved.
- *
  * Returns -1 on fail, 0 on success
  */
 static inline int
 reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
-			    uint16_t *start, uint16_t *end)
+			    uint16_t *end)
 {
-	uint16_t res_start_idx;
 	uint16_t res_cur_idx;
 	uint16_t avail_idx;
-	uint32_t allocated;
-	uint32_t vec_idx;
-	uint16_t tries;
+	uint32_t allocated = 0;
+	uint32_t vec_idx = 0;
+	uint16_t tries = 0;
 
-again:
-	res_start_idx = vq->last_used_idx_res;
-	res_cur_idx  = res_start_idx;
+	res_cur_idx  = vq->last_used_idx;
 
-	allocated = 0;
-	vec_idx   = 0;
-	tries     = 0;
 	while (1) {
 		avail_idx = *((volatile uint16_t *)&vq->avail->idx);
 		if (unlikely(res_cur_idx == avail_idx))
@@ -413,26 +363,17 @@ again:
 			return -1;
 	}
 
-	/*
-	 * update vq->last_used_idx_res atomically.
-	 * retry again if failed.
-	 */
-	if (rte_atomic16_cmpset(&vq->last_used_idx_res,
-				res_start_idx, res_cur_idx) == 0)
-		goto again;
-
-	*start = res_start_idx;
 	*end   = res_cur_idx;
 	return 0;
 }
 
 static inline uint32_t __attribute__((always_inline))
 copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
-			    uint16_t res_start_idx, uint16_t res_end_idx,
-			    struct rte_mbuf *m)
+			    uint16_t res_end_idx, struct rte_mbuf *m)
 {
 	struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
 	uint32_t vec_idx = 0;
+	uint16_t res_start_idx = vq->last_used_idx;
 	uint16_t cur_idx = res_start_idx;
 	uint64_t desc_addr;
 	uint32_t mbuf_offset, mbuf_avail;
@@ -531,7 +472,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 {
 	struct vhost_virtqueue *vq;
 	uint32_t pkt_idx = 0, nr_used = 0;
-	uint16_t start, end;
+	uint16_t end;
 
 	LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n",
 		dev->device_fh);
@@ -554,24 +495,17 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
 		uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen;
 
 		if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
-							 &start, &end) < 0)) {
+							 &end) < 0)) {
 			LOG_DEBUG(VHOST_DATA,
 				"(%" PRIu64 ") Failed to get enough desc from vring\n",
 				dev->device_fh);
 			break;
 		}
 
-		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, start, end,
+		nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end,
 						      pkts[pkt_idx]);
 		rte_smp_wmb();
 
-		/*
-		 * Wait until it's our turn to add our buffer
-		 * to the used ring.
-		 */
-		while (unlikely(vq->last_used_idx != start))
-			rte_pause();
-
 		*(volatile uint16_t *)&vq->used->idx += nr_used;
 		vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
 			sizeof(vq->used->idx));
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index f4695af..8912c89 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -601,7 +601,6 @@ vhost_set_vring_base(struct vhost_device_ctx ctx,
 
 	/* State->index refers to the queue index. The txq is 1, rxq is 0. */
 	dev->virtqueue[state->index]->last_used_idx = state->num;
-	dev->virtqueue[state->index]->last_used_idx_res = state->num;
 
 	return 0;
 }
-- 
1.8.1.4

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] vhost: remove internal lockless enqueue
  2016-06-13 11:52 [PATCH] vhost: remove internal lockless enqueue Huawei Xie
@ 2016-06-14 14:07 ` Yuanhan Liu
  0 siblings, 0 replies; 2+ messages in thread
From: Yuanhan Liu @ 2016-06-14 14:07 UTC (permalink / raw)
  To: Huawei Xie
  Cc: dev, yuanhan.liu, jianfeng.tan, mukawa, kevin.traynor, haifeng.lin

Firstly, it's V2. So, don't forget to add version number and link it to 
the previous version next time.

On Mon, Jun 13, 2016 at 07:52:12PM +0800, Huawei Xie wrote:
> All other DPDK PMDs doesn't support concurrent receiving or sending
> packets to the same queue. The upper application should deal with
> this, normally through queue and core bindings.
> 
> Due to historical reason, vhost internally supports concurrent lockless
> enqueuing packets to the same virtio queue through costly cmpset operation.
> This patch removes this internal lockless implementation and should improve
> performance a bit.
> 
> Luckily DPDK OVS doesn't rely on this behavior.
> 
> Signed-off-by: Huawei Xie <huawei.xie@intel.com>

Applied to dpdk-next-virtio, with the rebase on top of my vhost ABI/API
changes.

FYI, I also renamed the title a bit to "remove concurrent enqueue" as
Thomas mentioned in the last version, that it's confusing to say "remove
lockless" here, since we are actually removing the lock.

Thanks.

	--yliu

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-06-14 14:06 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-13 11:52 [PATCH] vhost: remove internal lockless enqueue Huawei Xie
2016-06-14 14:07 ` Yuanhan Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.