All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Hu, Jiayu" <jiayu.hu@intel.com>
To: "Ding, Xuan" <xuan.ding@intel.com>, "dev@dpdk.org" <dev@dpdk.org>,
	"Burakov, Anatoly" <anatoly.burakov@intel.com>,
	"maxime.coquelin@redhat.com" <maxime.coquelin@redhat.com>,
	"Xia, Chenbo" <chenbo.xia@intel.com>
Cc: "Jiang, Cheng1" <cheng1.jiang@intel.com>,
	"Richardson, Bruce" <bruce.richardson@intel.com>,
	"Pai G, Sunil" <sunil.pai.g@intel.com>,
	"Wang, Yinan" <yinan.wang@intel.com>,
	"Yang, YvonneX" <yvonnex.yang@intel.com>
Subject: Re: [dpdk-dev] [PATCH v3 2/2] vhost: enable IOMMU for async vhost
Date: Mon, 27 Sep 2021 04:17:39 +0000	[thread overview]
Message-ID: <144cf26ebf434ff4b6f3b0b22ebc41a6@intel.com> (raw)
In-Reply-To: <20210925100358.61995-3-xuan.ding@intel.com>

Hi Xuan,

> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Saturday, September 25, 2021 6:04 PM
> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> Subject: [PATCH v3 2/2] vhost: enable IOMMU for async vhost
> 
> The use of IOMMU has many advantages, such as isolation and address
> translation. This patch extends the capbility of DMA engine to use IOMMU if
> the DMA engine is bound to vfio.
> 
> When set memory table, the guest memory will be mapped into the default
> container of DPDK.
> 
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> ---
>  lib/vhost/vhost.h      |   4 ++
>  lib/vhost/vhost_user.c | 112
> ++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> 89a31e4ca8..bc5695e899 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -370,6 +370,10 @@ struct virtio_net {
>  	int16_t			broadcast_rarp;
>  	uint32_t		nr_vring;
>  	int			async_copy;
> +
> +	/* Record the dma map status for each region. */
> +	bool			*async_map_status;
> +
>  	int			extbuf;
>  	int			linearbuf;
>  	struct vhost_virtqueue	*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> 29a4c9af60..3990e9b057 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -45,6 +45,8 @@
>  #include <rte_common.h>
>  #include <rte_malloc.h>
>  #include <rte_log.h>
> +#include <rte_vfio.h>
> +#include <rte_errno.h>
> 
>  #include "iotlb.h"
>  #include "vhost.h"
> @@ -141,6 +143,63 @@ get_blk_size(int fd)
>  	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }
> 
> +static int
> +async_dma_map(struct rte_vhost_mem_region *region, bool
> +*dma_map_success, bool do_map) {
> +	uint64_t host_iova;
> +	int ret = 0;
> +
> +	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> >host_user_addr);
> +	if (do_map) {
> +		/* Add mapped region into the default container of DPDK. */
> +		ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +						 region->host_user_addr,
> +						 host_iova,
> +						 region->size);
> +		*dma_map_success = ret == 0;
> +
> +		if (ret) {
> +			/*
> +			 * DMA device may bind with kernel driver, in this
> case,
> +			 * we don't need to program IOMMU manually.
> However, if no
> +			 * device is bound with vfio/uio in DPDK, and vfio
> kernel
> +			 * module is loaded, the API will still be called and
> return
> +			 * with ENODEV/ENOSUP.
> +			 *
> +			 * DPDK VFIO only returns ENODEV/ENOSUP in very
> similar
> +			 * situations(VFIO either unsupported, or supported
> +			 * but no devices found). Either way, no mappings
> could be
> +			 * performed. We treat it as normal case in async
> path.
> +			 */
> +			if (rte_errno == ENODEV && rte_errno == ENOTSUP) {
> +				return 0;
> +			} else {
> +				VHOST_LOG_CONFIG(ERR, "DMA engine map
> failed\n");
> +				return ret;
> +			}
> +		}
> +
> +	} else {
> +		/* No need to do vfio unmap if the map failed. */
> +		if (!*dma_map_success)
> +			return 0;
> +
> +		/* Remove mapped region from the default container of
> DPDK. */
> +		ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +						   region->host_user_addr,
> +						   host_iova,
> +						   region->size);
> +		if (ret) {
> +			VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> failed\n");
> +			return ret;
> +		}
> +		/* Clear the flag once the unmap succeeds. */
> +		*dma_map_success = 0;
> +	}
> +
> +	return ret;
> +}
> +
>  static void
>  free_mem_region(struct virtio_net *dev)  { @@ -153,6 +212,9 @@
> free_mem_region(struct virtio_net *dev)
>  	for (i = 0; i < dev->mem->nregions; i++) {
>  		reg = &dev->mem->regions[i];
>  		if (reg->host_user_addr) {
> +			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +				async_dma_map(reg, &dev-
> >async_map_status[i], false);
> +
>  			munmap(reg->mmap_addr, reg->mmap_size);
>  			close(reg->fd);
>  		}
> @@ -203,6 +265,11 @@ vhost_backend_cleanup(struct virtio_net *dev)
>  	}
> 
>  	dev->postcopy_listening = 0;
> +
> +	if (dev->async_map_status) {
> +		rte_free(dev->async_map_status);
> +		dev->async_map_status = NULL;
> +	}
>  }
> 
>  static void
> @@ -621,6 +688,17 @@ numa_realloc(struct virtio_net *dev, int index)
>  	}
>  	dev->mem = mem;
> 
> +	if (dev->async_copy && rte_vfio_is_enabled("vfio")) {
> +		dev->async_map_status = rte_zmalloc_socket("async-dma-
> map-status",
> +					sizeof(bool) * dev->mem->nregions,
> 0, node);
> +		if (!dev->async_map_status) {
> +			VHOST_LOG_CONFIG(ERR,
> +				"(%d) failed to realloc dma mapping status on
> node\n",
> +				dev->vid);
> +			return dev;
> +		}
> +	}
> +
>  	gp = rte_realloc_socket(dev->guest_pages, dev->max_guest_pages *
> sizeof(*gp),
>  			RTE_CACHE_LINE_SIZE, node);
>  	if (!gp) {
> @@ -1151,12 +1229,14 @@ vhost_user_postcopy_register(struct virtio_net
> *dev, int main_fd,  static int  vhost_user_mmap_region(struct virtio_net *dev,
>  		struct rte_vhost_mem_region *region,
> +		uint32_t region_index,
>  		uint64_t mmap_offset)
>  {
>  	void *mmap_addr;
>  	uint64_t mmap_size;
>  	uint64_t alignment;
>  	int populate;
> +	int ret;
> 
>  	/* Check for memory_size + mmap_offset overflow */
>  	if (mmap_offset >= -region->size) {
> @@ -1210,13 +1290,25 @@ vhost_user_mmap_region(struct virtio_net *dev,
>  	region->mmap_size = mmap_size;
>  	region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
> mmap_offset;
> 
> -	if (dev->async_copy)
> +	if (dev->async_copy) {
>  		if (add_guest_pages(dev, region, alignment) < 0) {
>  			VHOST_LOG_CONFIG(ERR,
>  					"adding guest pages to region
> failed.\n");
>  			return -1;
>  		}
> 
> +		if (rte_vfio_is_enabled("vfio")) {
> +			ret = async_dma_map(region, &dev-
> >async_map_status[region_index], true);
> +			if (ret) {
> +				VHOST_LOG_CONFIG(ERR, "Configure
> IOMMU for DMA "
> +							"engine failed\n");
> +				rte_free(dev->async_map_status);
> +				dev->async_map_status = NULL;

The freed dev->async_map_status is accessed in free_mem_region() later.
You need to free it after calling free_mem_region().

> +				return -1;
> +			}
> +		}
> +	}
> +
>  	VHOST_LOG_CONFIG(INFO,
>  			"guest memory region size: 0x%" PRIx64 "\n"
>  			"\t guest physical addr: 0x%" PRIx64 "\n"
> @@ -1291,6 +1383,11 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
>  		dev->mem = NULL;
>  	}
> 
> +	if (dev->async_map_status) {
> +		rte_free(dev->async_map_status);
> +		dev->async_map_status = NULL;
> +	}

To handle the gust memory hot-plug case, you need to un-map
iommu tables before program iommu for new memory. But you
seem only free the old dev->async_map_status.

Thanks,
Jiayu

> +
>  	/* Flush IOTLB cache as previous HVAs are now invalid */
>  	if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
>  		for (i = 0; i < dev->nr_vring; i++)
> @@ -1329,6 +1426,17 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
>  		goto free_guest_pages;
>  	}
> 
> +	if (dev->async_copy) {
> +		dev->async_map_status = rte_zmalloc_socket("async-dma-
> map-status",
> +					sizeof(bool) * memory->nregions, 0,
> numa_node);
> +		if (!dev->async_map_status) {
> +			VHOST_LOG_CONFIG(ERR,
> +				"(%d) failed to allocate memory for dma
> mapping status\n",
> +				dev->vid);
> +			goto free_guest_pages;
> +		}
> +	}
> +
>  	for (i = 0; i < memory->nregions; i++) {
>  		reg = &dev->mem->regions[i];
> 
> @@ -1345,7 +1453,7 @@ vhost_user_set_mem_table(struct virtio_net
> **pdev, struct VhostUserMsg *msg,
> 
>  		mmap_offset = memory->regions[i].mmap_offset;
> 
> -		if (vhost_user_mmap_region(dev, reg, mmap_offset) < 0) {
> +		if (vhost_user_mmap_region(dev, reg, i, mmap_offset) < 0) {
>  			VHOST_LOG_CONFIG(ERR, "Failed to mmap
> region %u\n", i);
>  			goto free_mem_table;
>  		}
> --
> 2.17.1


  reply	other threads:[~2021-09-27  4:17 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-01  5:30 [dpdk-dev] [PATCH 0/2] *** support IOMMU for DMA device *** Xuan Ding
2021-09-01  5:30 ` [dpdk-dev] [PATCH 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-01  5:30 ` [dpdk-dev] [PATCH 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-17  5:25 ` [dpdk-dev] [PATCH v2 0/2] support IOMMU for DMA device Xuan Ding
2021-09-17  5:25   ` [dpdk-dev] [PATCH v2 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-17  5:25   ` [dpdk-dev] [PATCH v2 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-23 14:39     ` Hu, Jiayu
2021-09-23 14:56       ` Maxime Coquelin
2021-09-24  1:53         ` Xia, Chenbo
2021-09-24  7:13           ` Maxime Coquelin
2021-09-24  7:35             ` Xia, Chenbo
2021-09-24  8:18               ` Ding, Xuan
2021-09-25 10:03 ` [dpdk-dev] [PATCH v3 0/2] support IOMMU for DMA device Xuan Ding
2021-09-25 10:03   ` [dpdk-dev] [PATCH v3 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-25 10:03   ` [dpdk-dev] [PATCH v3 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-27  4:17     ` Hu, Jiayu [this message]
2021-09-27  4:55       ` Ding, Xuan
2021-09-25 10:33 ` [dpdk-dev] [PATCH v4 0/2] support IOMMU for DMA device Xuan Ding
2021-09-25 10:33   ` [dpdk-dev] [PATCH v4 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-25 10:33   ` [dpdk-dev] [PATCH v4 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-27  7:48 ` [dpdk-dev] [PATCH v5 0/2] support IOMMU for DMA device Xuan Ding
2021-09-27  7:48   ` [dpdk-dev] [PATCH v5 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-27  7:48   ` [dpdk-dev] [PATCH v5 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-27 12:13     ` Burakov, Anatoly
2021-09-28  9:03       ` Ding, Xuan
2021-09-29  2:41 ` [dpdk-dev] [PATCH v6 0/2] support IOMMU for DMA device Xuan Ding
2021-09-29  2:41   ` [dpdk-dev] [PATCH v6 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-09-29  2:41   ` [dpdk-dev] [PATCH v6 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-09-29  6:12     ` Hu, Jiayu
2021-09-29  9:39       ` Burakov, Anatoly
2021-09-30  5:17         ` Hu, Jiayu
2021-09-30  5:19     ` Hu, Jiayu
2021-10-11  7:59 ` [dpdk-dev] [PATCH v7 0/2] Support IOMMU for DMA device Xuan Ding
2021-10-11  7:59   ` [dpdk-dev] [PATCH v7 1/2] vfio: allow partially unmapping adjacent memory Xuan Ding
2021-10-13  6:57     ` Yang, YvonneX
2021-10-21  9:50     ` Maxime Coquelin
2021-10-11  7:59   ` [dpdk-dev] [PATCH v7 2/2] vhost: enable IOMMU for async vhost Xuan Ding
2021-10-13  6:57     ` Yang, YvonneX
2021-10-21 10:00     ` Maxime Coquelin
2021-10-21 12:33   ` [dpdk-dev] [PATCH v7 0/2] Support IOMMU for DMA device Maxime Coquelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=144cf26ebf434ff4b6f3b0b22ebc41a6@intel.com \
    --to=jiayu.hu@intel.com \
    --cc=anatoly.burakov@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=chenbo.xia@intel.com \
    --cc=cheng1.jiang@intel.com \
    --cc=dev@dpdk.org \
    --cc=maxime.coquelin@redhat.com \
    --cc=sunil.pai.g@intel.com \
    --cc=xuan.ding@intel.com \
    --cc=yinan.wang@intel.com \
    --cc=yvonnex.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.