All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiao Wang <xiao.w.wang@intel.com>
To: ferruh.yigit@intel.com, anatoly.burakov@intel.com
Cc: dev@dpdk.org, maxime.coquelin@redhat.com, zhihong.wang@intel.com,
	tiwei.bie@intel.com, jianfeng.tan@intel.com,
	cunming.liang@intel.com, dan.daly@intel.com, thomas@monjalon.net,
	Xiao Wang <xiao.w.wang@intel.com>,
	Junjie Chen <junjie.j.chen@intel.com>
Subject: [PATCH v7 1/5] vfio: extend data structure for multi container
Date: Sun, 15 Apr 2018 23:33:45 +0800	[thread overview]
Message-ID: <20180415153349.62105-2-xiao.w.wang@intel.com> (raw)
In-Reply-To: <20180415153349.62105-1-xiao.w.wang@intel.com>

Currently eal vfio framework binds vfio group fd to the default
container fd during rte_vfio_setup_device, while in some cases,
e.g. vDPA (vhost data path acceleration), we want to put vfio group
to a separate container and program IOMMU via this container.

This patch extends the vfio_config structure to contain per-container
user_mem_maps and defines an array of vfio_config. The next patch will
base on this to add container API.

Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
---
 config/common_base                     |   1 +
 lib/librte_eal/linuxapp/eal/eal_vfio.c | 407 ++++++++++++++++++++++-----------
 lib/librte_eal/linuxapp/eal/eal_vfio.h |  19 +-
 3 files changed, 275 insertions(+), 152 deletions(-)

diff --git a/config/common_base b/config/common_base
index c4236fd1f..4a76d2f14 100644
--- a/config/common_base
+++ b/config/common_base
@@ -87,6 +87,7 @@ CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n
 CONFIG_RTE_EAL_IGB_UIO=n
 CONFIG_RTE_EAL_VFIO=n
 CONFIG_RTE_MAX_VFIO_GROUPS=64
+CONFIG_RTE_MAX_VFIO_CONTAINERS=64
 CONFIG_RTE_MALLOC_DEBUG=n
 CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=n
 CONFIG_RTE_USE_LIBBSD=n
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 589d7d478..46fba2d8d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -22,8 +22,46 @@
 
 #define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb"
 
+/*
+ * we don't need to store device fd's anywhere since they can be obtained from
+ * the group fd via an ioctl() call.
+ */
+struct vfio_group {
+	int group_no;
+	int fd;
+	int devices;
+};
+
+/* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can
+ * recreate the mappings for DPDK segments, but we cannot do so for memory that
+ * was registered by the user themselves, so we need to store the user mappings
+ * somewhere, to recreate them later.
+ */
+#define VFIO_MAX_USER_MEM_MAPS 256
+struct user_mem_map {
+	uint64_t addr;
+	uint64_t iova;
+	uint64_t len;
+};
+
+struct user_mem_maps {
+	rte_spinlock_recursive_t lock;
+	int n_maps;
+	struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS];
+};
+
+struct vfio_config {
+	int vfio_enabled;
+	int vfio_container_fd;
+	int vfio_active_groups;
+	const struct vfio_iommu_type *vfio_iommu_type;
+	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
+	struct user_mem_maps mem_maps;
+};
+
 /* per-process VFIO config */
-static struct vfio_config vfio_cfg;
+static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS];
+static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0];
 
 static int vfio_type1_dma_map(int);
 static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
@@ -31,8 +69,8 @@ static int vfio_spapr_dma_map(int);
 static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 static int vfio_noiommu_dma_map(int);
 static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
-static int vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len,
-		int do_map);
+static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr,
+		uint64_t iova, uint64_t len, int do_map);
 
 /* IOMMU types we support */
 static const struct vfio_iommu_type iommu_types[] = {
@@ -59,25 +97,6 @@ static const struct vfio_iommu_type iommu_types[] = {
 	},
 };
 
-/* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can
- * recreate the mappings for DPDK segments, but we cannot do so for memory that
- * was registered by the user themselves, so we need to store the user mappings
- * somewhere, to recreate them later.
- */
-#define VFIO_MAX_USER_MEM_MAPS 256
-struct user_mem_map {
-	uint64_t addr;
-	uint64_t iova;
-	uint64_t len;
-};
-static struct {
-	rte_spinlock_recursive_t lock;
-	int n_maps;
-	struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS];
-} user_mem_maps = {
-	.lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER
-};
-
 /* for sPAPR IOMMU, we will need to walk memseg list, but we cannot use
  * rte_memseg_walk() because by the time we enter callback we will be holding a
  * write lock, so regular rte-memseg_walk will deadlock. copying the same
@@ -206,14 +225,15 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right)
 }
 
 static struct user_mem_map *
-find_user_mem_map(uint64_t addr, uint64_t iova, uint64_t len)
+find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
+		uint64_t iova, uint64_t len)
 {
 	uint64_t va_end = addr + len;
 	uint64_t iova_end = iova + len;
 	int i;
 
-	for (i = 0; i < user_mem_maps.n_maps; i++) {
-		struct user_mem_map *map = &user_mem_maps.maps[i];
+	for (i = 0; i < user_mem_maps->n_maps; i++) {
+		struct user_mem_map *map = &user_mem_maps->maps[i];
 		uint64_t map_va_end = map->addr + map->len;
 		uint64_t map_iova_end = map->iova + map->len;
 
@@ -239,20 +259,20 @@ find_user_mem_map(uint64_t addr, uint64_t iova, uint64_t len)
 
 /* this will sort all user maps, and merge/compact any adjacent maps */
 static void
-compact_user_maps(void)
+compact_user_maps(struct user_mem_maps *user_mem_maps)
 {
 	int i, n_merged, cur_idx;
 
-	qsort(user_mem_maps.maps, user_mem_maps.n_maps,
-			sizeof(user_mem_maps.maps[0]), user_mem_map_cmp);
+	qsort(user_mem_maps->maps, user_mem_maps->n_maps,
+			sizeof(user_mem_maps->maps[0]), user_mem_map_cmp);
 
 	/* we'll go over the list backwards when merging */
 	n_merged = 0;
-	for (i = user_mem_maps.n_maps - 2; i >= 0; i--) {
+	for (i = user_mem_maps->n_maps - 2; i >= 0; i--) {
 		struct user_mem_map *l, *r;
 
-		l = &user_mem_maps.maps[i];
-		r = &user_mem_maps.maps[i + 1];
+		l = &user_mem_maps->maps[i];
+		r = &user_mem_maps->maps[i + 1];
 
 		if (is_null_map(l) || is_null_map(r))
 			continue;
@@ -266,12 +286,12 @@ compact_user_maps(void)
 	 */
 	if (n_merged > 0) {
 		cur_idx = 0;
-		for (i = 0; i < user_mem_maps.n_maps; i++) {
-			if (!is_null_map(&user_mem_maps.maps[i])) {
+		for (i = 0; i < user_mem_maps->n_maps; i++) {
+			if (!is_null_map(&user_mem_maps->maps[i])) {
 				struct user_mem_map *src, *dst;
 
-				src = &user_mem_maps.maps[i];
-				dst = &user_mem_maps.maps[cur_idx++];
+				src = &user_mem_maps->maps[i];
+				dst = &user_mem_maps->maps[cur_idx++];
 
 				if (src != dst) {
 					memcpy(dst, src, sizeof(*src));
@@ -279,41 +299,16 @@ compact_user_maps(void)
 				}
 			}
 		}
-		user_mem_maps.n_maps = cur_idx;
+		user_mem_maps->n_maps = cur_idx;
 	}
 }
 
-int
-vfio_get_group_fd(int iommu_group_no)
+static int
+vfio_open_group_fd(int iommu_group_no)
 {
-	int i;
 	int vfio_group_fd;
 	char filename[PATH_MAX];
-	struct vfio_group *cur_grp;
-
-	/* check if we already have the group descriptor open */
-	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
-			return vfio_cfg.vfio_groups[i].fd;
-
-	/* Lets see first if there is room for a new group */
-	if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) {
-		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
-		return -1;
-	}
-
-	/* Now lets get an index for the new group */
-	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].group_no == -1) {
-			cur_grp = &vfio_cfg.vfio_groups[i];
-			break;
-		}
 
-	/* This should not happen */
-	if (i == VFIO_MAX_GROUPS) {
-		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
-		return -1;
-	}
 	/* if primary, try to open the group */
 	if (internal_config.process_type == RTE_PROC_PRIMARY) {
 		/* try regular group format */
@@ -343,9 +338,6 @@ vfio_get_group_fd(int iommu_group_no)
 			/* noiommu group found */
 		}
 
-		cur_grp->group_no = iommu_group_no;
-		cur_grp->fd = vfio_group_fd;
-		vfio_cfg.vfio_active_groups++;
 		return vfio_group_fd;
 	}
 	/* if we're in a secondary process, request group fd from the primary
@@ -380,9 +372,6 @@ vfio_get_group_fd(int iommu_group_no)
 			/* if we got the fd, store it and return it */
 			if (vfio_group_fd > 0) {
 				close(socket_fd);
-				cur_grp->group_no = iommu_group_no;
-				cur_grp->fd = vfio_group_fd;
-				vfio_cfg.vfio_active_groups++;
 				return vfio_group_fd;
 			}
 			/* fall-through on error */
@@ -392,56 +381,164 @@ vfio_get_group_fd(int iommu_group_no)
 			return -1;
 		}
 	}
-	return -1;
 }
 
+static struct vfio_config *
+get_vfio_cfg_by_group_no(int iommu_group_no)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++) {
+			if (vfio_cfg->vfio_groups[j].group_no ==
+					iommu_group_no)
+				return vfio_cfg;
+		}
+	}
+
+	return default_vfio_cfg;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_group_fd(int vfio_group_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++)
+			if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+				return vfio_cfg;
+	}
 
-static int
-get_vfio_group_idx(int vfio_group_fd)
+	return default_vfio_cfg;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_container_fd(int container_fd)
 {
 	int i;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		if (vfio_cfgs[i].vfio_container_fd == container_fd)
+			return &vfio_cfgs[i];
+	}
+
+	return NULL;
+}
+
+int
+vfio_get_group_fd(int iommu_group_no)
+{
+	int i;
+	int vfio_group_fd;
+	struct vfio_group *cur_grp;
+	struct vfio_config *vfio_cfg;
+
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_no(iommu_group_no);
+
+	/* check if we already have the group descriptor open */
+	for (i = 0; i < VFIO_MAX_GROUPS; i++)
+		if (vfio_cfg->vfio_groups[i].group_no == iommu_group_no)
+			return vfio_cfg->vfio_groups[i].fd;
+
+	/* Lets see first if there is room for a new group */
+	if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+		return -1;
+	}
+
+	/* Now lets get an index for the new group */
 	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd)
-			return i;
+		if (vfio_cfg->vfio_groups[i].group_no == -1) {
+			cur_grp = &vfio_cfg->vfio_groups[i];
+			break;
+		}
+
+	/* This should not happen */
+	if (i == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+		return -1;
+	}
+
+	vfio_group_fd = vfio_open_group_fd(iommu_group_no);
+	if (vfio_group_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_no);
+		return -1;
+	}
+
+	cur_grp->group_no = iommu_group_no;
+	cur_grp->fd = vfio_group_fd;
+	vfio_cfg->vfio_active_groups++;
+
+	return vfio_group_fd;
+}
+
+static int
+get_vfio_group_idx(int vfio_group_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++)
+			if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+				return j;
+	}
+
 	return -1;
 }
 
 static void
 vfio_group_device_get(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 	else
-		vfio_cfg.vfio_groups[i].devices++;
+		vfio_cfg->vfio_groups[i].devices++;
 }
 
 static void
 vfio_group_device_put(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 	else
-		vfio_cfg.vfio_groups[i].devices--;
+		vfio_cfg->vfio_groups[i].devices--;
 }
 
 static int
 vfio_group_device_count(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) {
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 		return -1;
 	}
 
-	return vfio_cfg.vfio_groups[i].devices;
+	return vfio_cfg->vfio_groups[i].devices;
 }
 
 static void
@@ -457,9 +554,11 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
 	if (rte_eal_iova_mode() == RTE_IOVA_VA) {
 		uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
 		if (type == RTE_MEM_EVENT_ALLOC)
-			vfio_dma_mem_map(vfio_va, vfio_va, len, 1);
+			vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
+					len, 1);
 		else
-			vfio_dma_mem_map(vfio_va, vfio_va, len, 0);
+			vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
+					len, 0);
 		return;
 	}
 
@@ -467,9 +566,11 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
 	ms = rte_mem_virt2memseg(addr, msl);
 	while (cur_len < len) {
 		if (type == RTE_MEM_EVENT_ALLOC)
-			vfio_dma_mem_map(ms->addr_64, ms->iova, ms->len, 1);
+			vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
+					ms->iova, ms->len, 1);
 		else
-			vfio_dma_mem_map(ms->addr_64, ms->iova, ms->len, 0);
+			vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
+					ms->iova, ms->len, 0);
 
 		cur_len += ms->len;
 		++ms;
@@ -481,16 +582,19 @@ rte_vfio_clear_group(int vfio_group_fd)
 {
 	int i;
 	int socket_fd, ret;
+	struct vfio_config *vfio_cfg;
+
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
 
 	if (internal_config.process_type == RTE_PROC_PRIMARY) {
 
 		i = get_vfio_group_idx(vfio_group_fd);
 		if (i < 0)
 			return -1;
-		vfio_cfg.vfio_groups[i].group_no = -1;
-		vfio_cfg.vfio_groups[i].fd = -1;
-		vfio_cfg.vfio_groups[i].devices = 0;
-		vfio_cfg.vfio_active_groups--;
+		vfio_cfg->vfio_groups[i].group_no = -1;
+		vfio_cfg->vfio_groups[i].fd = -1;
+		vfio_cfg->vfio_groups[i].devices = 0;
+		vfio_cfg->vfio_active_groups--;
 		return 0;
 	}
 
@@ -543,6 +647,9 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
 	};
+	struct vfio_config *vfio_cfg;
+	struct user_mem_maps *user_mem_maps;
+	int vfio_container_fd;
 	int vfio_group_fd;
 	int iommu_group_no;
 	int i, ret;
@@ -591,12 +698,17 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		return -1;
 	}
 
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_no(iommu_group_no);
+	vfio_container_fd = vfio_cfg->vfio_container_fd;
+	user_mem_maps = &vfio_cfg->mem_maps;
+
 	/* check if group does not have a container yet */
 	if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
 
 		/* add group to a container */
 		ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
-				&vfio_cfg.vfio_container_fd);
+				&vfio_container_fd);
 		if (ret) {
 			RTE_LOG(ERR, EAL, "  %s cannot add VFIO group to container, "
 					"error %i (%s)\n", dev_addr, errno, strerror(errno));
@@ -614,11 +726,11 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		 * functionality.
 		 */
 		if (internal_config.process_type == RTE_PROC_PRIMARY &&
-				vfio_cfg.vfio_active_groups == 1) {
+				vfio_cfg->vfio_active_groups == 1) {
 			const struct vfio_iommu_type *t;
 
 			/* select an IOMMU type which we will be using */
-			t = vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+			t = vfio_set_iommu_type(vfio_container_fd);
 			if (!t) {
 				RTE_LOG(ERR, EAL,
 					"  %s failed to select IOMMU type\n",
@@ -631,7 +743,10 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 			 * after registering callback, to prevent races
 			 */
 			rte_rwlock_read_lock(mem_lock);
-			ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
+			if (vfio_cfg == default_vfio_cfg)
+				ret = t->dma_map_func(vfio_container_fd);
+			else
+				ret = 0;
 			if (ret) {
 				RTE_LOG(ERR, EAL,
 					"  %s DMA remapping failed, error %i (%s)\n",
@@ -642,22 +757,22 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				return -1;
 			}
 
-			vfio_cfg.vfio_iommu_type = t;
+			vfio_cfg->vfio_iommu_type = t;
 
 			/* re-map all user-mapped segments */
-			rte_spinlock_recursive_lock(&user_mem_maps.lock);
+			rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
 			/* this IOMMU type may not support DMA mapping, but
 			 * if we have mappings in the list - that means we have
 			 * previously mapped something successfully, so we can
 			 * be sure that DMA mapping is supported.
 			 */
-			for (i = 0; i < user_mem_maps.n_maps; i++) {
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
 				struct user_mem_map *map;
-				map = &user_mem_maps.maps[i];
+				map = &user_mem_maps->maps[i];
 
 				ret = t->dma_user_map_func(
-						vfio_cfg.vfio_container_fd,
+						vfio_container_fd,
 						map->addr, map->iova, map->len,
 						1);
 				if (ret) {
@@ -668,17 +783,20 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 							map->addr, map->iova,
 							map->len);
 					rte_spinlock_recursive_unlock(
-							&user_mem_maps.lock);
+							&user_mem_maps->lock);
 					rte_rwlock_read_unlock(mem_lock);
 					return -1;
 				}
 			}
-			rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+			rte_spinlock_recursive_unlock(&user_mem_maps->lock);
 
 			/* register callback for mem events */
-			ret = rte_mem_event_callback_register(
+			if (vfio_cfg == default_vfio_cfg)
+				ret = rte_mem_event_callback_register(
 					VFIO_MEM_EVENT_CLB_NAME,
 					vfio_mem_event_callback);
+			else
+				ret = 0;
 			/* unlock memory hotplug */
 			rte_rwlock_read_unlock(mem_lock);
 
@@ -732,6 +850,7 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
 	};
+	struct vfio_config *vfio_cfg;
 	int vfio_group_fd;
 	int iommu_group_no;
 	int ret;
@@ -761,6 +880,9 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 		goto out;
 	}
 
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_no(iommu_group_no);
+
 	/* At this point we got an active group. Closing it will make the
 	 * container detachment. If this is the last active group, VFIO kernel
 	 * code will unset the container and the IOMMU mappings.
@@ -798,7 +920,7 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 	/* if there are no active device groups, unregister the callback to
 	 * avoid spurious attempts to map/unmap memory from VFIO.
 	 */
-	if (vfio_cfg.vfio_active_groups == 0)
+	if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0)
 		rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME);
 
 	/* success */
@@ -813,13 +935,21 @@ int
 rte_vfio_enable(const char *modname)
 {
 	/* initialize group list */
-	int i;
+	int i, j;
 	int vfio_available;
-
-	for (i = 0; i < VFIO_MAX_GROUPS; i++) {
-		vfio_cfg.vfio_groups[i].fd = -1;
-		vfio_cfg.vfio_groups[i].group_no = -1;
-		vfio_cfg.vfio_groups[i].devices = 0;
+	rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfgs[i].vfio_container_fd = -1;
+		vfio_cfgs[i].vfio_active_groups = 0;
+		vfio_cfgs[i].vfio_iommu_type = NULL;
+		vfio_cfgs[i].mem_maps.lock = lock;
+
+		for (j = 0; j < VFIO_MAX_GROUPS; j++) {
+			vfio_cfgs[i].vfio_groups[j].fd = -1;
+			vfio_cfgs[i].vfio_groups[j].group_no = -1;
+			vfio_cfgs[i].vfio_groups[j].devices = 0;
+		}
 	}
 
 	/* inform the user that we are probing for VFIO */
@@ -841,12 +971,12 @@ rte_vfio_enable(const char *modname)
 		return 0;
 	}
 
-	vfio_cfg.vfio_container_fd = vfio_get_container_fd();
+	default_vfio_cfg->vfio_container_fd = vfio_get_container_fd();
 
 	/* check if we have VFIO driver enabled */
-	if (vfio_cfg.vfio_container_fd != -1) {
+	if (default_vfio_cfg->vfio_container_fd != -1) {
 		RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
-		vfio_cfg.vfio_enabled = 1;
+		default_vfio_cfg->vfio_enabled = 1;
 	} else {
 		RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
 	}
@@ -858,7 +988,7 @@ int
 rte_vfio_is_enabled(const char *modname)
 {
 	const int mod_available = rte_eal_check_module(modname) > 0;
-	return vfio_cfg.vfio_enabled && mod_available;
+	return default_vfio_cfg->vfio_enabled && mod_available;
 }
 
 const struct vfio_iommu_type *
@@ -1220,9 +1350,13 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	struct vfio_iommu_spapr_tce_create create = {
 		.argsz = sizeof(create),
 	};
+	struct vfio_config *vfio_cfg;
+	struct user_mem_maps *user_mem_maps;
 	int i, ret = 0;
 
-	rte_spinlock_recursive_lock(&user_mem_maps.lock);
+	vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd);
+	user_mem_maps = &vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
 	/* check if window size needs to be adjusted */
 	memset(&param, 0, sizeof(param));
@@ -1235,9 +1369,9 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	}
 
 	/* also check user maps */
-	for (i = 0; i < user_mem_maps.n_maps; i++) {
-		uint64_t max = user_mem_maps.maps[i].iova +
-				user_mem_maps.maps[i].len;
+	for (i = 0; i < user_mem_maps->n_maps; i++) {
+		uint64_t max = user_mem_maps->maps[i].iova +
+				user_mem_maps->maps[i].len;
 		create.window_size = RTE_MAX(create.window_size, max);
 	}
 
@@ -1263,9 +1397,9 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				goto out;
 			}
 			/* remap all user maps */
-			for (i = 0; i < user_mem_maps.n_maps; i++) {
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
 				struct user_mem_map *map =
-						&user_mem_maps.maps[i];
+						&user_mem_maps->maps[i];
 				if (vfio_spapr_dma_do_map(vfio_container_fd,
 						map->addr, map->iova, map->len,
 						1)) {
@@ -1306,7 +1440,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);
 	}
 out:
-	rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
 	return ret;
 }
 
@@ -1358,9 +1492,10 @@ vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
 }
 
 static int
-vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len, int do_map)
+vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+		uint64_t len, int do_map)
 {
-	const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+	const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type;
 
 	if (!t) {
 		RTE_LOG(ERR, EAL, "  VFIO support not initialized\n");
@@ -1376,7 +1511,7 @@ vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len, int do_map)
 		return -1;
 	}
 
-	return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+	return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova,
 			len, do_map);
 }
 
@@ -1384,6 +1519,7 @@ int __rte_experimental
 rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
 {
 	struct user_mem_map *new_map;
+	struct user_mem_maps *user_mem_maps;
 	int ret = 0;
 
 	if (len == 0) {
@@ -1391,15 +1527,16 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
 		return -1;
 	}
 
-	rte_spinlock_recursive_lock(&user_mem_maps.lock);
-	if (user_mem_maps.n_maps == VFIO_MAX_USER_MEM_MAPS) {
+	user_mem_maps = &default_vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
+	if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
 		RTE_LOG(ERR, EAL, "No more space for user mem maps\n");
 		rte_errno = ENOMEM;
 		ret = -1;
 		goto out;
 	}
 	/* map the entry */
-	if (vfio_dma_mem_map(vaddr, iova, len, 1)) {
+	if (vfio_dma_mem_map(default_vfio_cfg, vaddr, iova, len, 1)) {
 		/* technically, this will fail if there are currently no devices
 		 * plugged in, even if a device were added later, this mapping
 		 * might have succeeded. however, since we cannot verify if this
@@ -1412,14 +1549,14 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
 		goto out;
 	}
 	/* create new user mem map entry */
-	new_map = &user_mem_maps.maps[user_mem_maps.n_maps++];
+	new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
 	new_map->addr = vaddr;
 	new_map->iova = iova;
 	new_map->len = len;
 
-	compact_user_maps();
+	compact_user_maps(user_mem_maps);
 out:
-	rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
 	return ret;
 }
 
@@ -1427,6 +1564,7 @@ int __rte_experimental
 rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
 {
 	struct user_mem_map *map, *new_map = NULL;
+	struct user_mem_maps *user_mem_maps;
 	int ret = 0;
 
 	if (len == 0) {
@@ -1434,10 +1572,11 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
 		return -1;
 	}
 
-	rte_spinlock_recursive_lock(&user_mem_maps.lock);
+	user_mem_maps = &default_vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
 	/* find our mapping */
-	map = find_user_mem_map(vaddr, iova, len);
+	map = find_user_mem_map(user_mem_maps, vaddr, iova, len);
 	if (!map) {
 		RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n");
 		rte_errno = EINVAL;
@@ -1448,17 +1587,17 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
 		/* we're partially unmapping a previously mapped region, so we
 		 * need to split entry into two.
 		 */
-		if (user_mem_maps.n_maps == VFIO_MAX_USER_MEM_MAPS) {
+		if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
 			RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n");
 			rte_errno = ENOMEM;
 			ret = -1;
 			goto out;
 		}
-		new_map = &user_mem_maps.maps[user_mem_maps.n_maps++];
+		new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
 	}
 
 	/* unmap the entry */
-	if (vfio_dma_mem_map(vaddr, iova, len, 0)) {
+	if (vfio_dma_mem_map(default_vfio_cfg, vaddr, iova, len, 0)) {
 		/* there may not be any devices plugged in, so unmapping will
 		 * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't
 		 * stop us from removing the mapping, as the assumption is we
@@ -1481,19 +1620,19 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
 
 		/* if we've created a new map by splitting, sort everything */
 		if (!is_null_map(new_map)) {
-			compact_user_maps();
+			compact_user_maps(user_mem_maps);
 		} else {
 			/* we've created a new mapping, but it was unused */
-			user_mem_maps.n_maps--;
+			user_mem_maps->n_maps--;
 		}
 	} else {
 		memset(map, 0, sizeof(*map));
-		compact_user_maps();
-		user_mem_maps.n_maps--;
+		compact_user_maps(user_mem_maps);
+		user_mem_maps->n_maps--;
 	}
 
 out:
-	rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
 	return ret;
 }
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 549f4427e..e14d5be99 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -88,6 +88,7 @@ struct vfio_iommu_spapr_tce_info {
 #endif
 
 #define VFIO_MAX_GROUPS RTE_MAX_VFIO_GROUPS
+#define VFIO_MAX_CONTAINERS RTE_MAX_VFIO_CONTAINERS
 
 /*
  * Function prototypes for VFIO multiprocess sync functions
@@ -98,24 +99,6 @@ int vfio_mp_sync_send_fd(int socket, int fd);
 int vfio_mp_sync_receive_fd(int socket);
 int vfio_mp_sync_connect_to_primary(void);
 
-/*
- * we don't need to store device fd's anywhere since they can be obtained from
- * the group fd via an ioctl() call.
- */
-struct vfio_group {
-	int group_no;
-	int fd;
-	int devices;
-};
-
-struct vfio_config {
-	int vfio_enabled;
-	int vfio_container_fd;
-	int vfio_active_groups;
-	const struct vfio_iommu_type *vfio_iommu_type;
-	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
-};
-
 /* DMA mapping function prototype.
  * Takes VFIO container fd as a parameter.
  * Returns 0 on success, -1 on error.
-- 
2.15.1

  reply	other threads:[~2018-04-15 15:35 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-09 23:08 [PATCH 0/3] add ifcvf driver Xiao Wang
2018-03-09 23:08 ` [PATCH 1/3] eal/vfio: add support for multiple container Xiao Wang
2018-03-14 12:08   ` Burakov, Anatoly
2018-03-15 16:49     ` Wang, Xiao W
2018-03-09 23:08 ` [PATCH 2/3] bus/pci: expose sysfs parsing API Xiao Wang
2018-03-14 11:19   ` Burakov, Anatoly
2018-03-14 13:30     ` Gaëtan Rivet
2018-03-15 16:49       ` Wang, Xiao W
2018-03-15 17:19         ` Gaëtan Rivet
2018-03-19  1:31           ` Wang, Xiao W
2018-03-21 13:21   ` [PATCH v2 0/3] add ifcvf driver Xiao Wang
2018-03-21 13:21     ` [PATCH v2 1/3] eal/vfio: add support for multiple container Xiao Wang
2018-03-21 20:32       ` Thomas Monjalon
2018-03-21 21:37         ` Gaëtan Rivet
2018-03-22  3:00           ` Wang, Xiao W
2018-03-21 13:21     ` [PATCH v2 2/3] bus/pci: expose sysfs parsing API Xiao Wang
2018-03-21 20:44       ` Thomas Monjalon
2018-03-22  2:46         ` Wang, Xiao W
2018-03-21 13:21     ` [PATCH v2 3/3] net/ifcvf: add ifcvf driver Xiao Wang
2018-03-21 20:52       ` Thomas Monjalon
2018-03-23 10:39         ` Wang, Xiao W
2018-03-21 20:57       ` Maxime Coquelin
2018-03-23 10:37         ` Wang, Xiao W
2018-03-22  8:51       ` Ferruh Yigit
2018-03-22 17:23         ` Wang, Xiao W
2018-03-31  2:29       ` [PATCH v3 0/3] add ifcvf vdpa driver Xiao Wang
2018-03-31  2:29         ` [PATCH v3 1/4] eal/vfio: add support for multiple container Xiao Wang
2018-03-31 11:06           ` Maxime Coquelin
2018-03-31  2:29         ` [PATCH v3 2/4] net/virtio: skip device probe in vdpa mode Xiao Wang
2018-03-31 11:13           ` Maxime Coquelin
2018-03-31 13:16             ` Thomas Monjalon
2018-04-02  4:08               ` Wang, Xiao W
2018-03-31  2:29         ` [PATCH v3 3/4] net/ifcvf: add ifcvf vdpa driver Xiao Wang
2018-03-31 11:26           ` Maxime Coquelin
2018-04-03  9:38             ` Wang, Xiao W
2018-04-04 14:40           ` [PATCH v4 0/4] " Xiao Wang
2018-04-04 14:40             ` [PATCH v4 1/4] eal/vfio: add multiple container support Xiao Wang
2018-04-05 18:06               ` [PATCH v5 0/4] add ifcvf vdpa driver Xiao Wang
2018-04-05 18:06                 ` [PATCH v5 1/4] eal/vfio: add multiple container support Xiao Wang
2018-04-05 18:06                 ` [PATCH v5 2/4] net/virtio: skip device probe in vdpa mode Xiao Wang
2018-04-11 18:58                   ` Ferruh Yigit
2018-04-05 18:07                 ` [PATCH v5 3/4] net/ifcvf: add ifcvf vdpa driver Xiao Wang
2018-04-11 18:58                   ` Ferruh Yigit
2018-04-12  7:19                   ` [PATCH v6 0/4] " Xiao Wang
2018-04-12  7:19                     ` [PATCH v6 1/4] eal/vfio: add multiple container support Xiao Wang
2018-04-12 14:03                       ` Burakov, Anatoly
2018-04-12 16:07                         ` Wang, Xiao W
2018-04-12 16:24                           ` Burakov, Anatoly
2018-04-13  9:18                             ` Wang, Xiao W
2018-04-15 15:33                       ` [PATCH v7 0/5] add ifcvf vdpa driver Xiao Wang
2018-04-15 15:33                         ` Xiao Wang [this message]
2018-04-16 10:02                           ` [PATCH v7 1/5] vfio: extend data structure for multi container Burakov, Anatoly
2018-04-16 12:22                             ` Wang, Xiao W
2018-04-16 15:34                           ` [PATCH v8 0/5] add ifcvf vdpa driver Xiao Wang
2018-04-16 15:34                             ` [PATCH v8 1/5] vfio: extend data structure for multi container Xiao Wang
2018-04-16 15:56                               ` Burakov, Anatoly
2018-04-16 15:34                             ` [PATCH v8 2/5] vfio: add multi container support Xiao Wang
2018-04-16 15:58                               ` Burakov, Anatoly
2018-04-17  7:06                               ` [PATCH v9 0/5] add ifcvf vdpa driver Xiao Wang
2018-04-17  7:06                                 ` [PATCH v9 1/5] vfio: extend data structure for multi container Xiao Wang
2018-04-17  7:06                                 ` [PATCH v9 2/5] vfio: add multi container support Xiao Wang
2018-04-17  7:06                                 ` [PATCH v9 3/5] net/virtio: skip device probe in vdpa mode Xiao Wang
2018-04-17  7:06                                 ` [PATCH v9 4/5] net/ifcvf: add ifcvf vdpa driver Xiao Wang
2018-04-17  7:06                                 ` [PATCH v9 5/5] doc: add ifcvf driver document and release note Xiao Wang
2018-04-17 11:13                                 ` [PATCH v9 0/5] add ifcvf vdpa driver Ferruh Yigit
2018-04-16 15:34                             ` [PATCH v8 3/5] net/virtio: skip device probe in vdpa mode Xiao Wang
2018-04-16 15:34                             ` [PATCH v8 4/5] net/ifcvf: add ifcvf vdpa driver Xiao Wang
2018-04-16 15:34                             ` [PATCH v8 5/5] doc: add ifcvf driver document and release note Xiao Wang
2018-04-16 16:36                             ` [PATCH v8 0/5] add ifcvf vdpa driver Ferruh Yigit
2018-04-16 18:07                               ` Thomas Monjalon
2018-04-17  5:36                                 ` Wang, Xiao W
2018-04-15 15:33                         ` [PATCH v7 2/5] vfio: add multi container support Xiao Wang
2018-04-16 10:03                           ` Burakov, Anatoly
2018-04-16 12:44                             ` Wang, Xiao W
2018-04-15 15:33                         ` [PATCH v7 3/5] net/virtio: skip device probe in vdpa mode Xiao Wang
2018-04-15 15:33                         ` [PATCH v7 4/5] net/ifcvf: add ifcvf vdpa driver Xiao Wang
2018-04-15 15:33                         ` [PATCH v7 5/5] doc: add ifcvf driver document and release note Xiao Wang
2018-04-12  7:19                     ` [PATCH v6 2/4] net/virtio: skip device probe in vdpa mode Xiao Wang
2018-04-12  7:19                     ` [PATCH v6 3/4] net/ifcvf: add ifcvf vdpa driver Xiao Wang
2018-04-12  7:19                     ` [PATCH v6 4/4] doc: add ifcvf driver document and release note Xiao Wang
2018-04-05 18:07                 ` [PATCH v5 " Xiao Wang
2018-04-11 18:59                 ` [PATCH v5 0/4] add ifcvf vdpa driver Ferruh Yigit
2018-04-12  5:47                   ` Wang, Xiao W
2018-04-04 14:40             ` [PATCH v4 2/4] net/virtio: skip device probe in vdpa mode Xiao Wang
2018-04-04 14:40             ` [PATCH v4 3/4] net/ifcvf: add ifcvf vdpa driver Xiao Wang
2018-04-04 14:40             ` [PATCH v4 4/4] doc: add ifcvf driver document and release note Xiao Wang
2018-03-31  2:29         ` [PATCH v3 4/4] net/ifcvf: add " Xiao Wang
2018-03-31 11:28           ` Maxime Coquelin
2018-03-09 23:08 ` [PATCH 3/3] net/ifcvf: add ifcvf driver Xiao Wang
2018-03-10 18:23 ` [PATCH 0/3] " Maxime Coquelin
2018-03-15 16:49   ` Wang, Xiao W
2018-03-21 20:47     ` Maxime Coquelin
2018-03-23 10:27       ` Wang, Xiao W
2018-03-25  9:51         ` Maxime Coquelin
2018-03-26  9:05           ` Wang, Xiao W
2018-03-26 13:29             ` Maxime Coquelin
2018-03-27  4:40               ` Wang, Xiao W
2018-03-27  5:09                 ` Maxime Coquelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180415153349.62105-2-xiao.w.wang@intel.com \
    --to=xiao.w.wang@intel.com \
    --cc=anatoly.burakov@intel.com \
    --cc=cunming.liang@intel.com \
    --cc=dan.daly@intel.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=jianfeng.tan@intel.com \
    --cc=junjie.j.chen@intel.com \
    --cc=maxime.coquelin@redhat.com \
    --cc=thomas@monjalon.net \
    --cc=tiwei.bie@intel.com \
    --cc=zhihong.wang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.