From mboxrd@z Thu Jan 1 00:00:00 1970 From: Xiao Wang Subject: [PATCH v8 2/5] vfio: add multi container support Date: Mon, 16 Apr 2018 23:34:35 +0800 Message-ID: <20180416153438.79355-3-xiao.w.wang@intel.com> References: <20180415153349.62105-2-xiao.w.wang@intel.com> <20180416153438.79355-1-xiao.w.wang@intel.com> Cc: dev@dpdk.org, maxime.coquelin@redhat.com, zhihong.wang@intel.com, tiwei.bie@intel.com, jianfeng.tan@intel.com, cunming.liang@intel.com, dan.daly@intel.com, thomas@monjalon.net, Xiao Wang , Junjie Chen To: ferruh.yigit@intel.com, anatoly.burakov@intel.com Return-path: Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id 0E564D08E for ; Mon, 16 Apr 2018 17:35:40 +0200 (CEST) In-Reply-To: <20180416153438.79355-1-xiao.w.wang@intel.com> List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds APIs to support container create/destroy and device bind/unbind with a container. It also provides API for IOMMU programing on a specified container. A driver could use "rte_vfio_container_create" helper to create a new container from eal, use "rte_vfio_container_group_bind" to bind a device to the newly created container. During rte_vfio_setup_device the container bound with the device will be used for IOMMU setup. Signed-off-by: Junjie Chen Signed-off-by: Xiao Wang Reviewed-by: Maxime Coquelin Reviewed-by: Ferruh Yigit --- lib/librte_eal/bsdapp/eal/eal.c | 52 ++++++ lib/librte_eal/common/include/rte_vfio.h | 128 ++++++++++++++- lib/librte_eal/linuxapp/eal/eal_vfio.c | 269 ++++++++++++++++++++++++++++--- lib/librte_eal/rte_eal_version.map | 6 + 4 files changed, 436 insertions(+), 19 deletions(-) diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index bfbec0d7f..b5c0386e4 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -769,6 +769,14 @@ int rte_vfio_noiommu_is_enabled(void); int rte_vfio_clear_group(int vfio_group_fd); int rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len); int rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len); +int rte_vfio_container_create(void); +int rte_vfio_container_destroy(int container_fd); +int rte_vfio_container_group_bind(int container_fd, int iommu_group_num); +int rte_vfio_container_group_unbind(int container_fd, int iommu_group_num); +int rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, + uint64_t iova, uint64_t len); +int rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, + uint64_t iova, uint64_t len); int rte_vfio_setup_device(__rte_unused const char *sysfs_base, __rte_unused const char *dev_addr, @@ -838,3 +846,47 @@ rte_vfio_get_group_fd(__rte_unused int iommu_group_num) { return -1; } + +int __rte_experimental +rte_vfio_container_create(void) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_destroy(__rte_unused int container_fd) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_group_bind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_group_unbind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h index c4a2e606f..c10c206a3 100644 --- a/lib/librte_eal/common/include/rte_vfio.h +++ b/lib/librte_eal/common/include/rte_vfio.h @@ -154,7 +154,10 @@ rte_vfio_clear_group(int vfio_group_fd); /** * Map memory region for use with VFIO. * - * @note requires at least one device to be attached at the time of mapping. + * @note Require at least one device to be attached at the time of + * mapping. DMA maps done via this API will only apply to default + * container and will not apply to any of the containers created + * via rte_vfio_container_create(). * * @param vaddr * Starting virtual address of memory to be mapped. @@ -245,6 +248,129 @@ rte_vfio_get_container_fd(void); int __rte_experimental rte_vfio_get_group_fd(int iommu_group_num); +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Create a new container for device binding. + * + * @note Any newly allocated DPDK memory will not be mapped into these + * containers by default, user needs to manage DMA mappings for + * any container created by this API. + * + * @return + * the container fd if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_create(void); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Destroy the container, unbind all vfio groups within it. + * + * @param container_fd + * the container fd to destroy + * + * @return + * 0 if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_destroy(int container_fd); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Bind a IOMMU group to a container. + * + * @param container_fd + * the container's fd + * + * @param iommu_group_num + * the iommu group number to bind to container + * + * @return + * group fd if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_group_bind(int container_fd, int iommu_group_num); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Unbind a IOMMU group from a container. + * + * @param container_fd + * the container fd of container + * + * @param iommu_group_num + * the iommu group number to delete from container + * + * @return + * 0 if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_group_unbind(int container_fd, int iommu_group_num); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Perform DMA mapping for devices in a container. + * + * @param container_fd + * the specified container fd + * + * @param vaddr + * Starting virtual address of memory to be mapped. + * + * @param iova + * Starting IOVA address of memory to be mapped. + * + * @param len + * Length of memory segment being mapped. + * + * @return + * 0 if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, + uint64_t iova, uint64_t len); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Perform DMA unmapping for devices in a container. + * + * @param container_fd + * the specified container fd + * + * @param vaddr + * Starting virtual address of memory to be unmapped. + * + * @param iova + * Starting IOVA address of memory to be unmapped. + * + * @param len + * Length of memory segment being unmapped. + * + * @return + * 0 if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, + uint64_t iova, uint64_t len); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 6289f6316..64ea194f0 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -1532,19 +1532,15 @@ vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, len, do_map); } -int __rte_experimental -rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) +static int +container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, + uint64_t len) { struct user_mem_map *new_map; struct user_mem_maps *user_mem_maps; int ret = 0; - if (len == 0) { - rte_errno = EINVAL; - return -1; - } - - user_mem_maps = &default_vfio_cfg->mem_maps; + user_mem_maps = &vfio_cfg->mem_maps; rte_spinlock_recursive_lock(&user_mem_maps->lock); if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) { RTE_LOG(ERR, EAL, "No more space for user mem maps\n"); @@ -1553,7 +1549,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) goto out; } /* map the entry */ - if (vfio_dma_mem_map(default_vfio_cfg, vaddr, iova, len, 1)) { + if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) { /* technically, this will fail if there are currently no devices * plugged in, even if a device were added later, this mapping * might have succeeded. however, since we cannot verify if this @@ -1577,19 +1573,15 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) return ret; } -int __rte_experimental -rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) +static int +container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, + uint64_t len) { struct user_mem_map *map, *new_map = NULL; struct user_mem_maps *user_mem_maps; int ret = 0; - if (len == 0) { - rte_errno = EINVAL; - return -1; - } - - user_mem_maps = &default_vfio_cfg->mem_maps; + user_mem_maps = &vfio_cfg->mem_maps; rte_spinlock_recursive_lock(&user_mem_maps->lock); /* find our mapping */ @@ -1614,7 +1606,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) } /* unmap the entry */ - if (vfio_dma_mem_map(default_vfio_cfg, vaddr, iova, len, 0)) { + if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) { /* there may not be any devices plugged in, so unmapping will * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't * stop us from removing the mapping, as the assumption is we @@ -1653,6 +1645,28 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) return ret; } +int __rte_experimental +rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) +{ + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + return container_dma_map(default_vfio_cfg, vaddr, iova, len); +} + +int __rte_experimental +rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) +{ + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + return container_dma_unmap(default_vfio_cfg, vaddr, iova, len); +} + int rte_vfio_noiommu_is_enabled(void) { @@ -1685,6 +1699,181 @@ rte_vfio_noiommu_is_enabled(void) return c == 'Y'; } +int __rte_experimental +rte_vfio_container_create(void) +{ + int i; + + /* Find an empty slot to store new vfio config */ + for (i = 1; i < VFIO_MAX_CONTAINERS; i++) { + if (vfio_cfgs[i].vfio_container_fd == -1) + break; + } + + if (i == VFIO_MAX_CONTAINERS) { + RTE_LOG(ERR, EAL, "exceed max vfio container limit\n"); + return -1; + } + + vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd(); + if (vfio_cfgs[i].vfio_container_fd < 0) { + RTE_LOG(NOTICE, EAL, "fail to create a new container\n"); + return -1; + } + + return vfio_cfgs[i].vfio_container_fd; +} + +int __rte_experimental +rte_vfio_container_destroy(int container_fd) +{ + struct vfio_config *vfio_cfg; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg->vfio_groups[i].group_num != -1) + rte_vfio_container_group_unbind(container_fd, + vfio_cfg->vfio_groups[i].group_num); + + close(container_fd); + vfio_cfg->vfio_container_fd = -1; + vfio_cfg->vfio_active_groups = 0; + vfio_cfg->vfio_iommu_type = NULL; + + return 0; +} + +int __rte_experimental +rte_vfio_container_group_bind(int container_fd, int iommu_group_num) +{ + struct vfio_config *vfio_cfg; + struct vfio_group *cur_grp; + int vfio_group_fd; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + /* Check room for new group */ + if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + return -1; + } + + /* Get an index for the new group */ + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg->vfio_groups[i].group_num == -1) { + cur_grp = &vfio_cfg->vfio_groups[i]; + break; + } + + /* This should not happen */ + if (i == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); + return -1; + } + + vfio_group_fd = vfio_open_group_fd(iommu_group_num); + if (vfio_group_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num); + return -1; + } + cur_grp->group_num = iommu_group_num; + cur_grp->fd = vfio_group_fd; + cur_grp->devices = 0; + vfio_cfg->vfio_active_groups++; + + return vfio_group_fd; +} + +int __rte_experimental +rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) +{ + struct vfio_config *vfio_cfg; + struct vfio_group *cur_grp; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + for (i = 0; i < VFIO_MAX_GROUPS; i++) { + if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) { + cur_grp = &vfio_cfg->vfio_groups[i]; + break; + } + } + + /* This should not happen */ + if (i == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Specified group number not found\n"); + return -1; + } + + if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) { + RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for" + " iommu_group_num %d\n", iommu_group_num); + return -1; + } + cur_grp->group_num = -1; + cur_grp->fd = -1; + cur_grp->devices = 0; + vfio_cfg->vfio_active_groups--; + + return 0; +} + +int __rte_experimental +rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len) +{ + struct vfio_config *vfio_cfg; + + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + return container_dma_map(vfio_cfg, vaddr, iova, len); +} + +int __rte_experimental +rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len) +{ + struct vfio_config *vfio_cfg; + + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + return container_dma_unmap(vfio_cfg, vaddr, iova, len); +} + #else int __rte_experimental @@ -1701,4 +1890,48 @@ rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova, return -1; } +int __rte_experimental +rte_vfio_container_create(void) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_destroy(__rte_unused int container_fd) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_group_bind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_group_unbind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + #endif diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index d02d80b8a..28f51f8d2 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -293,5 +293,11 @@ EXPERIMENTAL { rte_vfio_get_container_fd; rte_vfio_get_group_fd; rte_vfio_get_group_num; + rte_vfio_container_create; + rte_vfio_container_destroy; + rte_vfio_container_dma_map; + rte_vfio_container_dma_unmap; + rte_vfio_container_group_bind; + rte_vfio_container_group_unbind; } DPDK_18.02; -- 2.15.1