All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: xuan.ding@intel.com, chenbo.xia@intel.com
Cc: dev@dpdk.org, jiayu.hu@intel.com, cheng1.jiang@intel.com,
	sunil.pai.g@intel.com, liangma@liangbit.com,
	Yuan Wang <yuanx.wang@intel.com>
Subject: Re: [RFC,v3 1/2] vhost: support async dequeue for split ring
Date: Thu, 31 Mar 2022 11:15:12 +0200	[thread overview]
Message-ID: <38827506-7d29-9081-bb48-dbe01bd69b72@redhat.com> (raw)
In-Reply-To: <20220310065407.17145-2-xuan.ding@intel.com>



On 3/10/22 07:54, xuan.ding@intel.com wrote:
> From: Xuan Ding <xuan.ding@intel.com>
> 
> This patch implements asynchronous dequeue data path for vhost split
> ring, with dmadev library integrated.
> 
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> ---
>   lib/vhost/rte_vhost_async.h |  37 ++-
>   lib/vhost/version.map       |   1 +
>   lib/vhost/vhost.h           |   1 +
>   lib/vhost/virtio_net.c      | 504 ++++++++++++++++++++++++++++++++++++
>   4 files changed, 541 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
> index f1293c6a9d..b6ab0b06a2 100644
> --- a/lib/vhost/rte_vhost_async.h
> +++ b/lib/vhost/rte_vhost_async.h
> @@ -155,9 +155,9 @@ int rte_vhost_async_get_inflight(int vid, uint16_t queue_id);
>    * @param count
>    *  Size of the packet array
>    * @param dma_id
> - *  the identifier of DMA device
> + *  The identifier of DMA device
>    * @param vchan_id
> - *  the identifier of virtual DMA channel
> + *  The identifier of virtual DMA channel

This is unrelated to the purpose of this patch, it can be moved in a
dedicated trivial patch.

>    * @return
>    *  Number of packets returned
>    */
> @@ -187,6 +187,39 @@ uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
>   __rte_experimental
>   int rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id);
>   
> +/**
> + * This function tries to receive packets from the guest with offloading
> + * copies to the async channel. The packets that are transfer completed
> + * are returned in "pkts". The other packets that their copies are submitted to
> + * the async channel but not completed are called "in-flight packets".
> + * This function will not return in-flight packets until their copies are
> + * completed by the async channel.
> + *
> + * @param vid
> + *  ID of vhost device to dequeue data
> + * @param queue_id
> + *  ID of virtqueue to dequeue data
> + * @param mbuf_pool
> + *  Mbuf_pool where host mbuf is allocated
> + * @param pkts
> + *  Blank array to keep successfully dequeued packets
> + * @param count
> + *  Size of the packet array
> + * @param nr_inflight
> + *  The amount of in-flight packets. If error occurred, its value is set to -1.
> + * @param dma_id
> + *  The identifier of DMA device
> + * @param vchan_id
> + *  The identifier of virtual DMA channel
> + * @return
> + *  Number of successfully dequeued packets
> + */
> +__rte_experimental
> +uint16_t
> +rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
> +	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
> +	int *nr_inflight, uint16_t dma_id, uint16_t vchan_id);
> +
>   #ifdef __cplusplus
>   }
>   #endif
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map
> index 0a66c5840c..968d6d4290 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -87,6 +87,7 @@ EXPERIMENTAL {
>   
>   	# added in 22.03
>   	rte_vhost_async_dma_configure;

# added in 22.07

> +	rte_vhost_async_try_dequeue_burst;
>   };
>   
>   INTERNAL {
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index a9edc271aa..3799d41089 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -178,6 +178,7 @@ extern struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
>    */
>   struct async_inflight_info {
>   	struct rte_mbuf *mbuf;
> +	struct virtio_net_hdr nethdr;
>   	uint16_t descs; /* num of descs inflight */
>   	uint16_t nr_buffers; /* num of buffers inflight for packed ring */
>   };
> diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
> index 5f432b0d77..3816caca79 100644
> --- a/lib/vhost/virtio_net.c
> +++ b/lib/vhost/virtio_net.c
> @@ -3141,3 +3141,507 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>   
>   	return count;
>   }
> +
> +static __rte_always_inline int
> +async_desc_to_mbuf_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +			struct rte_mbuf *m, uint32_t mbuf_offset,
> +			uint64_t buf_iova, uint32_t cpy_len)
> +{
> +	struct vhost_async *async = vq->async;
> +	uint64_t mapped_len;
> +	uint32_t buf_offset = 0;
> +	void *host_iova;
> +
> +	while (cpy_len) {
> +		host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev,
> +					buf_iova + buf_offset, cpy_len,
> +					&mapped_len);
> +		if (unlikely(!host_iova)) {
> +			VHOST_LOG_DATA(ERR, "(%s) %s: failed to get host_iova.\n",
> +				dev->ifname, __func__);
> +			return -1;
> +		}
> +
> +		if (unlikely(async_iter_add_iovec(dev, async, host_iova,
> +				(void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset),
> +				(size_t)mapped_len)))
> +			return -1;
> +
> +		cpy_len -= (uint32_t)mapped_len;
> +		mbuf_offset += (uint32_t)mapped_len;
> +		buf_offset += (uint32_t)mapped_len;
> +	}
> +
> +	return 0;
> +}

It looks really similar to async_mbuf_to_desc_seg(), just the direction
of the DMA copoy is changed.

Maybe we could refactor the two functions in a single one with adding a 
parameter for the direction. Something like this:

static __rte_always_inline int
async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
		struct rte_mbuf *m, uint32_t mbuf_offset,
		uint64_t buf_iova, uint32_t cpy_len, bool to_desc)
{
	struct vhost_async *async = vq->async;
	uint64_t mapped_len;
	uint32_t buf_offset = 0;
	void *src, *dst
	void *host_iova;

	while (cpy_len) {
		host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev,
				buf_iova + buf_offset, cpy_len, &mapped_len);
		if (unlikely(!host_iova)) {
			VHOST_LOG_DATA(ERR, "(%s) %s: failed to get host iova.\n",
				       dev->ifname, __func__);
			return -1;
		}

		if (to_desc) {
			src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuff_offset);
			dst = host_iova
		} else {
			dst = host_iova
			src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuff_offset);
		}

		if (unlikely(async_iter_add_iovec(dev, async, src, dst, 
(size_t)mapped_len)))
			return -1;

		cpy_len -= (uint32_t)mapped_len;
		mbuf_offset += (uint32_t)mapped_len;
		buf_offset += (uint32_t)mapped_len;
	}

	return 0;
}


Then if you don't pass a variable but a static true/false value for
to_desc, the compiler should generate the same code, so no performance
degradation.

> +
> +static __rte_always_inline int
> +async_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +		  struct buf_vector *buf_vec, uint16_t nr_vec,
> +		  struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
> +		  struct virtio_net_hdr *nethdr)
> +{
> +	uint64_t buf_addr, buf_iova;
> +	uint32_t buf_avail, buf_offset, buf_len;
> +	uint32_t mbuf_avail, mbuf_offset;
> +	uint32_t cpy_len;
> +	/* A counter to avoid desc dead loop chain */
> +	uint16_t vec_idx = 0;
> +	struct rte_mbuf *cur = m, *prev = m;
> +	struct virtio_net_hdr tmp_hdr;
> +	struct virtio_net_hdr *hdr = NULL;
> +	struct vhost_async *async = vq->async;
> +
> +	buf_addr = buf_vec[vec_idx].buf_addr;
> +	buf_len = buf_vec[vec_idx].buf_len;
> +	buf_iova = buf_vec[vec_idx].buf_iova;
> +
> +	if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1))
> +		return -1;
> +
> +	if (virtio_net_with_host_offload(dev)) {
> +		if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
> +			/*
> +			 * No luck, the virtio-net header doesn't fit
> +			 * in a contiguous virtual area.
> +			 */
> +			copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
> +			hdr = &tmp_hdr;
> +		} else {
> +			hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
> +		}
> +	}
> +
> +	/*
> +	 * A virtio driver normally uses at least 2 desc buffers
> +	 * for Tx: the first for storing the header, and others
> +	 * for storing the data.
> +	 */
> +	if (unlikely(buf_len < dev->vhost_hlen)) {
> +		buf_offset = dev->vhost_hlen - buf_len;
> +		vec_idx++;
> +		buf_addr = buf_vec[vec_idx].buf_addr;
> +		buf_iova = buf_vec[vec_idx].buf_iova;
> +		buf_len = buf_vec[vec_idx].buf_len;
> +		buf_avail  = buf_len - buf_offset;
> +	} else if (buf_len == dev->vhost_hlen) {
> +		if (unlikely(++vec_idx >= nr_vec))
> +			return -1;
> +		buf_addr = buf_vec[vec_idx].buf_addr;
> +		buf_iova = buf_vec[vec_idx].buf_iova;
> +		buf_len = buf_vec[vec_idx].buf_len;
> +
> +		buf_offset = 0;
> +		buf_avail = buf_len;
> +	} else {
> +		buf_offset = dev->vhost_hlen;
> +		buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
> +	}
> +
> +	PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset), (uint32_t)buf_avail, 0);
> +
> +	mbuf_offset = 0;
> +	mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
> +
> +	if (async_iter_initialize(dev, async))
> +		return -1;
> +
> +	while (1) {
> +		cpy_len = RTE_MIN(buf_avail, mbuf_avail);
> +
> +		if (async_desc_to_mbuf_seg(dev, vq, cur, mbuf_offset, buf_iova + buf_offset,
> +					   cpy_len) < 0)
> +			goto error;
> +
> +		mbuf_avail -= cpy_len;
> +		buf_avail -= cpy_len;
> +		mbuf_offset += cpy_len;
> +		buf_offset += cpy_len;
> +
> +		/* This buf reaches to its end, get the next one */
> +		if (buf_avail == 0) {
> +			if (++vec_idx >= nr_vec)
> +				break;
> +
> +			buf_addr = buf_vec[vec_idx].buf_addr;
> +			buf_iova = buf_vec[vec_idx].buf_iova;
> +			buf_len = buf_vec[vec_idx].buf_len;
> +
> +			buf_offset = 0;
> +			buf_avail = buf_len;
> +
> +			PRINT_PACKET(dev, (uintptr_t)buf_addr, (uint32_t)buf_avail, 0);
> +		}
> +
> +		/*
> +		 * This mbuf reaches to its end, get a new one
> +		 * to hold more data.
> +		 */
> +		if (mbuf_avail == 0) {
> +			cur = rte_pktmbuf_alloc(mbuf_pool);
> +			if (unlikely(cur == NULL)) {
> +				VHOST_LOG_DATA(ERR,
> +					"(%s) %s: failed to allocate memory for mbuf.\n",
> +					dev->ifname, __func__);
> +				goto error;
> +			}
> +
> +			prev->next = cur;
> +			prev->data_len = mbuf_offset;
> +			m->nb_segs += 1;
> +			m->pkt_len += mbuf_offset;
> +			prev = cur;
> +
> +			mbuf_offset = 0;
> +			mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
> +		}
> +	}
> +
> +	prev->data_len = mbuf_offset;
> +	m->pkt_len += mbuf_offset;
> +
> +	async_iter_finalize(async);
> +	if (hdr)
> +		*nethdr = *hdr;
> +
> +	return 0;
> +
> +error:
> +	async_iter_cancel(async);
> +	return -1;
> +}


You can do here the same refactoring I did for the enqueue path, i.e.
merging copy_desc_to_mbuf and async_desc_to_mbuf in a single function.

> +static __rte_always_inline uint16_t
> +async_poll_dequeue_completed_split(struct virtio_net *dev, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint16_t count, uint16_t dma_id,
> +		uint16_t vchan_id, bool legacy_ol_flags)
> +{
> +	uint16_t start_idx, from, i;
> +	uint16_t nr_cpl_pkts = 0;
> +	struct async_inflight_info *pkts_info;
> +	struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
> +
> +	pkts_info = vq->async->pkts_info;
> +
> +	vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE);
> +
> +	start_idx = async_get_first_inflight_pkt_idx(vq);
> +
> +	from = start_idx;
> +	while (vq->async->pkts_cmpl_flag[from] && count--) {
> +		vq->async->pkts_cmpl_flag[from] = false;
> +		from = (from + 1) & (vq->size - 1);
> +		nr_cpl_pkts++;
> +	}
> +
> +	if (nr_cpl_pkts == 0)
> +		return 0;
> +
> +	for (i = 0; i < nr_cpl_pkts; i++) {
> +		from = (start_idx + i) & (vq->size - 1);
> +		pkts[i] = pkts_info[from].mbuf;
> +
> +		if (virtio_net_with_host_offload(dev))
> +			vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i],
> +					      legacy_ol_flags);
> +	}
> +
> +	/* write back completed descs to used ring and update used idx */
> +	write_back_completed_descs_split(vq, nr_cpl_pkts);
> +	__atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE);
> +	vhost_vring_call_split(dev, vq);
> +
> +	vq->async->pkts_inflight_n -= nr_cpl_pkts;
> +
> +	return nr_cpl_pkts;
> +}
> +
> +static __rte_always_inline uint16_t
> +virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +		uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
> +		uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags)
> +{
> +	static bool allocerr_warned;
> +	bool dropped = false;
> +	uint16_t free_entries;
> +	uint16_t pkt_idx, slot_idx = 0;
> +	uint16_t nr_done_pkts = 0;
> +	uint16_t pkt_err = 0;
> +	uint16_t n_xfer;
> +	struct vhost_async *async = vq->async;
> +	struct async_inflight_info *pkts_info = async->pkts_info;
> +	struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST];
> +	uint16_t pkts_size = count;
> +
> +	/**
> +	 * The ordering between avail index and
> +	 * desc reads needs to be enforced.
> +	 */
> +	free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) - vq->last_avail_idx;
> +	if (free_entries == 0)
> +		goto out;
> +
> +	rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
> +
> +	async_iter_reset(async);
> +
> +	count = RTE_MIN(count, MAX_PKT_BURST);
> +	count = RTE_MIN(count, free_entries);
> +	VHOST_LOG_DATA(DEBUG, "(%s) about to dequeue %u buffers\n", dev->ifname, count);
> +
> +	if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count))
> +		goto out;
> +
> +	for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
> +		uint16_t head_idx = 0;
> +		uint16_t nr_vec = 0;
> +		uint16_t to;
> +		uint32_t buf_len;
> +		int err;
> +		struct buf_vector buf_vec[BUF_VECTOR_MAX];
> +		struct rte_mbuf *pkt = pkts_prealloc[pkt_idx];
> +
> +		if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx,
> +						&nr_vec, buf_vec,
> +						&head_idx, &buf_len,
> +						VHOST_ACCESS_RO) < 0)) {
> +			dropped = true;
> +			break;
> +		}
> +
> +		err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len);
> +		if (unlikely(err)) {
> +			/**
> +			 * mbuf allocation fails for jumbo packets when external
> +			 * buffer allocation is not allowed and linear buffer
> +			 * is required. Drop this packet.
> +			 */
> +			if (!allocerr_warned) {
> +				VHOST_LOG_DATA(ERR,
> +					"(%s) %s: Failed mbuf alloc of size %d from %s\n",
> +					dev->ifname, __func__, buf_len, mbuf_pool->name);
> +				allocerr_warned = true;
> +			}
> +			dropped = true;
> +			break;
> +		}
> +
> +		slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1);
> +		err = async_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool,
> +					&pkts_info[slot_idx].nethdr);
> +		if (unlikely(err)) {
> +			if (!allocerr_warned) {
> +				VHOST_LOG_DATA(ERR,
> +					"(%s) %s: Failed to offload copies to async channel.\n",
> +					dev->ifname, __func__);
> +				allocerr_warned = true;
> +			}
> +			dropped = true;
> +			break;
> +		}
> +
> +		pkts_info[slot_idx].mbuf = pkt;
> +
> +		/* store used descs */
> +		to = async->desc_idx_split & (vq->size - 1);
> +		async->descs_split[to].id = head_idx;
> +		async->descs_split[to].len = 0;
> +		async->desc_idx_split++;
> +
> +		vq->last_avail_idx++;
> +	}
> +
> +	if (unlikely(dropped))
> +		rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx);
> +
> +	n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
> +					  async->iov_iter, pkt_idx);
> +
> +	async->pkts_inflight_n += n_xfer;
> +
> +	pkt_err = pkt_idx - n_xfer;
> +	if (unlikely(pkt_err)) {
> +		VHOST_LOG_DATA(DEBUG,
> +			"(%s) %s: failed to transfer data for queue id %d.\n",
> +			dev->ifname, __func__, queue_id);
> +
> +		pkt_idx = n_xfer;
> +		/* recover available ring */
> +		vq->last_avail_idx -= pkt_err;
> +
> +		/**
> +		 * recover async channel copy related structures and free pktmbufs
> +		 * for error pkts.
> +		 */
> +		async->desc_idx_split -= pkt_err;
> +		while (pkt_err-- > 0) {
> +			rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf);
> +			slot_idx--;
> +		}
> +	}
> +
> +	async->pkts_idx += pkt_idx;
> +	if (async->pkts_idx >= vq->size)
> +		async->pkts_idx -= vq->size;
> +
> +out:
> +	/* DMA device may serve other queues, unconditionally check completed. */
> +	nr_done_pkts = async_poll_dequeue_completed_split(dev, queue_id, pkts, pkts_size,
> +							  dma_id, vchan_id, legacy_ol_flags);
> +
> +	return nr_done_pkts;
> +}
> +
> +__rte_noinline
> +static uint16_t
> +virtio_dev_tx_async_split_legacy(struct virtio_net *dev,
> +		struct vhost_virtqueue *vq, uint16_t queue_id,
> +		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
> +		uint16_t count, uint16_t dma_id, uint16_t vchan_id)
> +{
> +	return virtio_dev_tx_async_split(dev, vq, queue_id, mbuf_pool,
> +				pkts, count, dma_id, vchan_id, true);
> +}
> +
> +__rte_noinline
> +static uint16_t
> +virtio_dev_tx_async_split_compliant(struct virtio_net *dev,
> +		struct vhost_virtqueue *vq, uint16_t queue_id,
> +		struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
> +		uint16_t count, uint16_t dma_id, uint16_t vchan_id)
> +{
> +	return virtio_dev_tx_async_split(dev, vq, queue_id, mbuf_pool,
> +				pkts, count, dma_id, vchan_id, false);
> +}
> +
> +uint16_t
> +rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
> +	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
> +	int *nr_inflight, uint16_t dma_id, uint16_t vchan_id)
> +{
> +	struct virtio_net *dev;
> +	struct rte_mbuf *rarp_mbuf = NULL;
> +	struct vhost_virtqueue *vq;
> +	int16_t success = 1;
> +
> +	*nr_inflight = -1;
> +
> +	dev = get_device(vid);
> +	if (!dev)
> +		return 0;
> +
> +	if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
> +		VHOST_LOG_DATA(ERR,
> +			"(%s) %s: built-in vhost net backend is disabled.\n",
> +			dev->ifname, __func__);
> +		return 0;
> +	}
> +
> +	if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
> +		VHOST_LOG_DATA(ERR,
> +			"(%s) %s: invalid virtqueue idx %d.\n",
> +			dev->ifname, __func__, queue_id);
> +		return 0;
> +	}
> +
> +	if (unlikely(!dma_copy_track[dma_id].vchans ||
> +				!dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
> +		VHOST_LOG_DATA(ERR, "(%s) %s: invalid channel %d:%u.\n", dev->ifname, __func__,
> +			       dma_id, vchan_id);
> +		return 0;
> +	}
> +
> +	vq = dev->virtqueue[queue_id];
> +
> +	if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
> +		return 0;
> +
> +	if (unlikely(vq->enabled == 0)) {
> +		count = 0;
> +		goto out_access_unlock;
> +	}
> +
> +	if (unlikely(!vq->async)) {
> +		VHOST_LOG_DATA(ERR, "(%s) %s: async not registered for queue id %d.\n",
> +			dev->ifname, __func__, queue_id);
> +		count = 0;
> +		goto out_access_unlock;
> +	}
> +
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_lock(vq);
> +
> +	if (unlikely(vq->access_ok == 0))
> +		if (unlikely(vring_translate(dev, vq) < 0)) {
> +			count = 0;
> +			goto out;
> +		}
> +
> +	/*
> +	 * Construct a RARP broadcast packet, and inject it to the "pkts"
> +	 * array, to looks like that guest actually send such packet.
> +	 *
> +	 * Check user_send_rarp() for more information.
> +	 *
> +	 * broadcast_rarp shares a cacheline in the virtio_net structure
> +	 * with some fields that are accessed during enqueue and
> +	 * __atomic_compare_exchange_n causes a write if performed compare
> +	 * and exchange. This could result in false sharing between enqueue
> +	 * and dequeue.
> +	 *
> +	 * Prevent unnecessary false sharing by reading broadcast_rarp first
> +	 * and only performing compare and exchange if the read indicates it
> +	 * is likely to be set.
> +	 */
> +	if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
> +			__atomic_compare_exchange_n(&dev->broadcast_rarp,
> +			&success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
> +
> +		rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
> +		if (rarp_mbuf == NULL) {
> +			VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n");
> +			count = 0;
> +			goto out;
> +		}
> +		/*
> +		 * Inject it to the head of "pkts" array, so that switch's mac
> +		 * learning table will get updated first.
> +		 */
> +		pkts[0] = rarp_mbuf;
> +		pkts++;
> +		count -= 1;
> +	}
> +
> +	if (unlikely(vq_is_packed(dev))) {
> +		static bool not_support_pack_log;
> +		if (!not_support_pack_log) {
> +			VHOST_LOG_DATA(ERR,
> +				"(%s) %s: async dequeue does not support packed ring.\n",
> +				dev->ifname, __func__);
> +			not_support_pack_log = true;
> +		}
> +		count = 0;
> +		goto out;
> +	}
> +
> +	if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
> +		count = virtio_dev_tx_async_split_legacy(dev, vq, queue_id,
> +				mbuf_pool, pkts, count, dma_id, vchan_id);
> +	else
> +		count = virtio_dev_tx_async_split_compliant(dev, vq, queue_id,
> +				mbuf_pool, pkts, count, dma_id, vchan_id);
> +
> +	*nr_inflight = vq->async->pkts_inflight_n;
> +
> +out:
> +	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> +		vhost_user_iotlb_rd_unlock(vq);
> +
> +out_access_unlock:
> +	rte_spinlock_unlock(&vq->access_lock);
> +
> +	if (unlikely(rarp_mbuf != NULL))
> +		count += 1;
> +
> +	return count;
> +}


  reply	other threads:[~2022-03-31  9:15 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-01  0:12 [RFC 0/2] vhost: support async dequeue data path xuan.ding
2022-01-01  0:12 ` [RFC 1/2] vhost: support async dequeue for split ring xuan.ding
2022-01-01  0:12 ` [RFC 2/2] examples/vhost: support async dequeue data path xuan.ding
2022-02-24 11:03 ` [RFC,v2 0/2] vhost: " xuan.ding
2022-02-24 11:03   ` [RFC,v2 1/2] vhost: support async dequeue for split ring xuan.ding
2022-02-24 11:04   ` [RFC,v2 2/2] examples/vhost: support async dequeue data path xuan.ding
2022-03-10  6:54 ` [RFC,v3 0/2] vhost: " xuan.ding
2022-03-10  6:54   ` [RFC,v3 1/2] vhost: support async dequeue for split ring xuan.ding
2022-03-31  9:15     ` Maxime Coquelin [this message]
2022-03-31 11:20       ` Ding, Xuan
2022-03-10  6:54   ` [RFC,v3 2/2] examples/vhost: support async dequeue data path xuan.ding

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=38827506-7d29-9081-bb48-dbe01bd69b72@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=chenbo.xia@intel.com \
    --cc=cheng1.jiang@intel.com \
    --cc=dev@dpdk.org \
    --cc=jiayu.hu@intel.com \
    --cc=liangma@liangbit.com \
    --cc=sunil.pai.g@intel.com \
    --cc=xuan.ding@intel.com \
    --cc=yuanx.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.