DPDK-dev Archive on lore.kernel.org
 help / color / Atom feed
* [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
@ 2019-06-12  6:33 Takeshi Yoshimura
  2019-06-12 14:06 ` Aaron Conole
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Takeshi Yoshimura @ 2019-06-12  6:33 UTC (permalink / raw)
  To: dev; +Cc: Takeshi Yoshimura

In ppc64le, expanding DMA areas always fail because we cannot remove
a DMA window. As a result, we cannot allocate more than one memseg in
ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
the mapped DMA before removing the window. This patch fixes this
incorrect behavior.

I added a global variable to track current window size since we do
not have better ways to get exact size of it than doing so. sPAPR
IOMMU seems not to provide any ways to get window size with ioctl
interfaces. rte_memseg_walk*() is currently used to calculate window
size, but it walks memsegs that are marked as used, not mapped. So,
we need to determine if a given memseg is mapped or not, otherwise
the ioctl reports errors due to attempting to unregister memory
addresses that are not registered. The global variable is excluded
in non-ppc64le binaries.

Similar problems happen in user maps. We need to avoid attempting to
unmap the address that is given as the function's parameter. The
compaction of user maps prevents us from passing correct length for
unmapping DMA at the window recreation. So, I removed it in ppc64le.

I also fixed the order of ioctl for unregister and unmap. The ioctl
for unregister sometimes report device busy errors due to the
existence of mapped area.

Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
---
 lib/librte_eal/linux/eal/eal_vfio.c | 154 +++++++++++++++++++---------
 1 file changed, 103 insertions(+), 51 deletions(-)

diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
index f16c5c3c0..6edbaaff5 100644
--- a/lib/librte_eal/linux/eal/eal_vfio.c
+++ b/lib/librte_eal/linux/eal/eal_vfio.c
@@ -93,6 +93,7 @@ is_null_map(const struct user_mem_map *map)
 	return map->addr == 0 && map->iova == 0 && map->len == 0;
 }
 
+#ifndef RTE_ARCH_PPC_64
 /* we may need to merge user mem maps together in case of user mapping/unmapping
  * chunks of memory, so we'll need a comparator function to sort segments.
  */
@@ -126,6 +127,7 @@ user_mem_map_cmp(const void *a, const void *b)
 
 	return 0;
 }
+#endif
 
 /* adjust user map entry. this may result in shortening of existing map, or in
  * splitting existing map in two pieces.
@@ -162,6 +164,7 @@ adjust_map(struct user_mem_map *src, struct user_mem_map *end,
 	}
 }
 
+#ifndef RTE_ARCH_PPC_64
 /* try merging two maps into one, return 1 if succeeded */
 static int
 merge_map(struct user_mem_map *left, struct user_mem_map *right)
@@ -177,6 +180,7 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right)
 
 	return 1;
 }
+#endif
 
 static struct user_mem_map *
 find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
@@ -211,6 +215,16 @@ find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
 	return NULL;
 }
 
+#ifdef RTE_ARCH_PPC_64
+/* Recreation of DMA window requires unregistering DMA memory.
+ * Compaction confuses the logic and causes false reports in the recreation.
+ * For now, we do not compact user maps in ppc64le.
+ */
+static void
+compact_user_maps(__rte_unused struct user_mem_maps *user_mem_maps)
+{
+}
+#else
 /* this will sort all user maps, and merge/compact any adjacent maps */
 static void
 compact_user_maps(struct user_mem_maps *user_mem_maps)
@@ -256,6 +270,7 @@ compact_user_maps(struct user_mem_maps *user_mem_maps)
 		user_mem_maps->n_maps = cur_idx;
 	}
 }
+#endif
 
 static int
 vfio_open_group_fd(int iommu_group_num)
@@ -1357,14 +1372,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		}
 
 	} else {
-		ret = ioctl(vfio_container_fd,
-				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
-		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
-					errno, strerror(errno));
-			return -1;
-		}
-
 		memset(&dma_unmap, 0, sizeof(dma_unmap));
 		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
 		dma_unmap.size = len;
@@ -1377,24 +1384,50 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 					errno, strerror(errno));
 			return -1;
 		}
+
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
 	}
 
 	return 0;
 }
 
+struct spapr_remap_walk_param {
+	int vfio_container_fd;
+	uint64_t window_size;
+};
+
 static int
 vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
-	int *vfio_container_fd = arg;
+	struct spapr_remap_walk_param *p = arg;
 
-	if (msl->external)
+	if (msl->external || ms->iova >= p->window_size)
 		return 0;
 
-	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
 
+static int
+vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct spapr_remap_walk_param *p = arg;
+
+	if (msl->external || ms->iova >= p->window_size)
+		return 0;
+
+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
+			ms->len, 0);
+}
+
 struct spapr_walk_param {
 	uint64_t window_size;
 	uint64_t hugepage_sz;
@@ -1481,14 +1514,14 @@ vfio_spapr_create_new_dma_window(int vfio_container_fd,
 	return 0;
 }
 
+#ifdef RTE_ARCH_PPC_64
+static struct vfio_iommu_spapr_tce_create prev_create;
+
 static int
 vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len, int do_map)
 {
-	struct spapr_walk_param param;
-	struct vfio_iommu_spapr_tce_create create = {
-		.argsz = sizeof(create),
-	};
+	struct vfio_iommu_spapr_tce_create create;
 	struct vfio_config *vfio_cfg;
 	struct user_mem_maps *user_mem_maps;
 	int i, ret = 0;
@@ -1502,43 +1535,59 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	user_mem_maps = &vfio_cfg->mem_maps;
 	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
-	/* check if window size needs to be adjusted */
-	memset(&param, 0, sizeof(param));
-
-	/* we're inside a callback so use thread-unsafe version */
-	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
-				&param) < 0) {
-		RTE_LOG(ERR, EAL, "Could not get window size\n");
-		ret = -1;
-		goto out;
-	}
+	memcpy(&create, &prev_create, sizeof(create));
 
 	/* also check user maps */
 	for (i = 0; i < user_mem_maps->n_maps; i++) {
-		uint64_t max = user_mem_maps->maps[i].iova +
-				user_mem_maps->maps[i].len;
-		create.window_size = RTE_MAX(create.window_size, max);
+		struct user_mem_map *map = &user_mem_maps->maps[i];
+
+		if (vaddr == map->addr && len == map->len)
+			continue;
+		create.window_size = RTE_MAX(create.window_size, map->iova + map->len);
 	}
 
 	/* sPAPR requires window size to be a power of 2 */
-	create.window_size = rte_align64pow2(param.window_size);
-	create.page_shift = __builtin_ctzll(param.hugepage_sz);
-	create.levels = 1;
+	create.window_size = rte_align64pow2(create.window_size);
 
 	if (do_map) {
-		void *addr;
 		/* re-create window and remap the entire memory */
-		if (iova > create.window_size) {
+		if (iova + len > create.window_size) {
+			struct spapr_remap_walk_param param = {
+				.vfio_container_fd = vfio_container_fd,
+			    .window_size = create.window_size,
+			};
+
+			/* we're inside a callback, so use thread-unsafe version
+			 */
+			rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
+					&param);
+			/* destruct all user maps */
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
+				struct user_mem_map *map =
+						&user_mem_maps->maps[i];
+				if (vaddr == map->addr && len == map->len)
+					continue;
+				if (vfio_spapr_dma_do_map(vfio_container_fd,
+						map->addr, map->iova, map->len,
+						0)) {
+					RTE_LOG(ERR, EAL, "Could not destruct user DMA maps\n");
+					ret = -1;
+					goto out;
+				}
+			}
+
+			create.window_size = rte_align64pow2(iova + len);
 			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
 					&create) < 0) {
 				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
 				ret = -1;
 				goto out;
 			}
+			memcpy(&prev_create, &create, sizeof(create));
 			/* we're inside a callback, so use thread-unsafe version
 			 */
 			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
-					&vfio_container_fd) < 0) {
+					&param) < 0) {
 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
 				ret = -1;
 				goto out;
@@ -1547,6 +1596,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 			for (i = 0; i < user_mem_maps->n_maps; i++) {
 				struct user_mem_map *map =
 						&user_mem_maps->maps[i];
+				if (vaddr == map->addr && len == map->len)
+					continue;
 				if (vfio_spapr_dma_do_map(vfio_container_fd,
 						map->addr, map->iova, map->len,
 						1)) {
@@ -1556,23 +1607,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				}
 			}
 		}
-
-		/* now that we've remapped all of the memory that was present
-		 * before, map the segment that we were requested to map.
-		 *
-		 * however, if we were called by the callback, the memory we
-		 * were called with was already in the memseg list, so previous
-		 * mapping should've mapped that segment already.
-		 *
-		 * virt2memseg_list is a relatively cheap check, so use that. if
-		 * memory is within any memseg list, it's a memseg, so it's
-		 * already mapped.
-		 */
-		addr = (void *)(uintptr_t)vaddr;
-		if (rte_mem_virt2memseg_list(addr) == NULL &&
-				vfio_spapr_dma_do_map(vfio_container_fd,
-					vaddr, iova, len, 1) < 0) {
-			RTE_LOG(ERR, EAL, "Could not map segment\n");
+		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
+			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
 			ret = -1;
 			goto out;
 		}
@@ -1613,6 +1649,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
 		RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
 		return -1;
 	}
+	memcpy(&prev_create, &create, sizeof(create));
 
 	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
 	if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
@@ -1620,6 +1657,21 @@ vfio_spapr_dma_map(int vfio_container_fd)
 
 	return 0;
 }
+#else
+static int
+vfio_spapr_dma_mem_map(int __rte_unused vfio_container_fd,
+			uint64_t __rte_unused vaddr,
+			uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			int __rte_unused do_map)
+{
+	return 0;
+}
+static int
+vfio_spapr_dma_map(int __rte_unused vfio_container_fd)
+{
+	return 0;
+}
+#endif
 
 static int
 vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-12  6:33 [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le Takeshi Yoshimura
@ 2019-06-12 14:06 ` Aaron Conole
  2019-06-13  2:22 ` Takeshi Yoshimura
  2019-06-13  2:30 ` Takeshi T Yoshimura
  2 siblings, 0 replies; 16+ messages in thread
From: Aaron Conole @ 2019-06-12 14:06 UTC (permalink / raw)
  To: Takeshi Yoshimura; +Cc: dev

Takeshi Yoshimura <tyos@jp.ibm.com> writes:

> In ppc64le, expanding DMA areas always fail because we cannot remove
> a DMA window. As a result, we cannot allocate more than one memseg in
> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> the mapped DMA before removing the window. This patch fixes this
> incorrect behavior.
>
> I added a global variable to track current window size since we do
> not have better ways to get exact size of it than doing so. sPAPR
> IOMMU seems not to provide any ways to get window size with ioctl
> interfaces. rte_memseg_walk*() is currently used to calculate window
> size, but it walks memsegs that are marked as used, not mapped. So,
> we need to determine if a given memseg is mapped or not, otherwise
> the ioctl reports errors due to attempting to unregister memory
> addresses that are not registered. The global variable is excluded
> in non-ppc64le binaries.
>
> Similar problems happen in user maps. We need to avoid attempting to
> unmap the address that is given as the function's parameter. The
> compaction of user maps prevents us from passing correct length for
> unmapping DMA at the window recreation. So, I removed it in ppc64le.
>
> I also fixed the order of ioctl for unregister and unmap. The ioctl
> for unregister sometimes report device busy errors due to the
> existence of mapped area.
>
> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> ---

Hi Takeshi,

I see the compilation with this patch has failed.

See https://travis-ci.com/ovsrobot/dpdk/builds/115206950 for the jobs
run.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-12  6:33 [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le Takeshi Yoshimura
  2019-06-12 14:06 ` Aaron Conole
@ 2019-06-13  2:22 ` Takeshi Yoshimura
  2019-06-13 17:37   ` David Christensen
                     ` (6 more replies)
  2019-06-13  2:30 ` Takeshi T Yoshimura
  2 siblings, 7 replies; 16+ messages in thread
From: Takeshi Yoshimura @ 2019-06-13  2:22 UTC (permalink / raw)
  To: dev; +Cc: drc, pradeep, Takeshi Yoshimura

In ppc64le, expanding DMA areas always fail because we cannot remove
a DMA window. As a result, we cannot allocate more than one memseg in
ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
the mapped DMA before removing the window. This patch fixes this
incorrect behavior.

I added a global variable to track current window size since we do
not have better ways to get exact size of it than doing so. sPAPR
IOMMU seems not to provide any ways to get window size with ioctl
interfaces. rte_memseg_walk*() is currently used to calculate window
size, but it walks memsegs that are marked as used, not mapped. So,
we need to determine if a given memseg is mapped or not, otherwise
the ioctl reports errors due to attempting to unregister memory
addresses that are not registered. The global variable is excluded
in non-ppc64le binaries.

Similar problems happen in user maps. We need to avoid attempting to
unmap the address that is given as the function's parameter. The
compaction of user maps prevents us from passing correct length for
unmapping DMA at the window recreation. So, I removed it in ppc64le.

I also fixed the order of ioctl for unregister and unmap. The ioctl
for unregister sometimes report device busy errors due to the
existence of mapped area.

Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
---
 lib/librte_eal/linux/eal/eal_vfio.c | 154 +++++++++++++++++++---------
 1 file changed, 103 insertions(+), 51 deletions(-)

diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
index f16c5c3c0..c1b275b56 100644
--- a/lib/librte_eal/linux/eal/eal_vfio.c
+++ b/lib/librte_eal/linux/eal/eal_vfio.c
@@ -93,6 +93,7 @@ is_null_map(const struct user_mem_map *map)
 	return map->addr == 0 && map->iova == 0 && map->len == 0;
 }
 
+#ifndef RTE_ARCH_PPC_64
 /* we may need to merge user mem maps together in case of user mapping/unmapping
  * chunks of memory, so we'll need a comparator function to sort segments.
  */
@@ -126,6 +127,7 @@ user_mem_map_cmp(const void *a, const void *b)
 
 	return 0;
 }
+#endif
 
 /* adjust user map entry. this may result in shortening of existing map, or in
  * splitting existing map in two pieces.
@@ -162,6 +164,7 @@ adjust_map(struct user_mem_map *src, struct user_mem_map *end,
 	}
 }
 
+#ifndef RTE_ARCH_PPC_64
 /* try merging two maps into one, return 1 if succeeded */
 static int
 merge_map(struct user_mem_map *left, struct user_mem_map *right)
@@ -177,6 +180,7 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right)
 
 	return 1;
 }
+#endif
 
 static struct user_mem_map *
 find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
@@ -211,6 +215,16 @@ find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
 	return NULL;
 }
 
+#ifdef RTE_ARCH_PPC_64
+/* Recreation of DMA window requires unregistering DMA memory.
+ * Compaction confuses the logic and causes false reports in the recreation.
+ * For now, we do not compact user maps in ppc64le.
+ */
+static void
+compact_user_maps(__rte_unused struct user_mem_maps *user_mem_maps)
+{
+}
+#else
 /* this will sort all user maps, and merge/compact any adjacent maps */
 static void
 compact_user_maps(struct user_mem_maps *user_mem_maps)
@@ -256,6 +270,7 @@ compact_user_maps(struct user_mem_maps *user_mem_maps)
 		user_mem_maps->n_maps = cur_idx;
 	}
 }
+#endif
 
 static int
 vfio_open_group_fd(int iommu_group_num)
@@ -1306,6 +1321,7 @@ vfio_type1_dma_map(int vfio_container_fd)
 	return rte_memseg_walk(type1_map, &vfio_container_fd);
 }
 
+#ifdef RTE_ARCH_PPC_64
 static int
 vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len, int do_map)
@@ -1357,14 +1373,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		}
 
 	} else {
-		ret = ioctl(vfio_container_fd,
-				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
-		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
-					errno, strerror(errno));
-			return -1;
-		}
-
 		memset(&dma_unmap, 0, sizeof(dma_unmap));
 		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
 		dma_unmap.size = len;
@@ -1377,24 +1385,50 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 					errno, strerror(errno));
 			return -1;
 		}
+
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
 	}
 
 	return 0;
 }
 
+struct spapr_remap_walk_param {
+	int vfio_container_fd;
+	uint64_t window_size;
+};
+
 static int
 vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
-	int *vfio_container_fd = arg;
+	struct spapr_remap_walk_param *p = arg;
 
-	if (msl->external)
+	if (msl->external || ms->iova >= p->window_size)
 		return 0;
 
-	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
 
+static int
+vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct spapr_remap_walk_param *p = arg;
+
+	if (msl->external || ms->iova >= p->window_size)
+		return 0;
+
+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
+			ms->len, 0);
+}
+
 struct spapr_walk_param {
 	uint64_t window_size;
 	uint64_t hugepage_sz;
@@ -1481,14 +1515,13 @@ vfio_spapr_create_new_dma_window(int vfio_container_fd,
 	return 0;
 }
 
+static struct vfio_iommu_spapr_tce_create prev_create;
+
 static int
 vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len, int do_map)
 {
-	struct spapr_walk_param param;
-	struct vfio_iommu_spapr_tce_create create = {
-		.argsz = sizeof(create),
-	};
+	struct vfio_iommu_spapr_tce_create create;
 	struct vfio_config *vfio_cfg;
 	struct user_mem_maps *user_mem_maps;
 	int i, ret = 0;
@@ -1502,43 +1535,59 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	user_mem_maps = &vfio_cfg->mem_maps;
 	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
-	/* check if window size needs to be adjusted */
-	memset(&param, 0, sizeof(param));
-
-	/* we're inside a callback so use thread-unsafe version */
-	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
-				&param) < 0) {
-		RTE_LOG(ERR, EAL, "Could not get window size\n");
-		ret = -1;
-		goto out;
-	}
+	memcpy(&create, &prev_create, sizeof(create));
 
 	/* also check user maps */
 	for (i = 0; i < user_mem_maps->n_maps; i++) {
-		uint64_t max = user_mem_maps->maps[i].iova +
-				user_mem_maps->maps[i].len;
-		create.window_size = RTE_MAX(create.window_size, max);
+		struct user_mem_map *map = &user_mem_maps->maps[i];
+
+		if (vaddr == map->addr && len == map->len)
+			continue;
+		create.window_size = RTE_MAX(create.window_size, map->iova + map->len);
 	}
 
 	/* sPAPR requires window size to be a power of 2 */
-	create.window_size = rte_align64pow2(param.window_size);
-	create.page_shift = __builtin_ctzll(param.hugepage_sz);
-	create.levels = 1;
+	create.window_size = rte_align64pow2(create.window_size);
 
 	if (do_map) {
-		void *addr;
 		/* re-create window and remap the entire memory */
-		if (iova > create.window_size) {
+		if (iova + len > create.window_size) {
+			struct spapr_remap_walk_param param = {
+				.vfio_container_fd = vfio_container_fd,
+			    .window_size = create.window_size,
+			};
+
+			/* we're inside a callback, so use thread-unsafe version
+			 */
+			rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
+					&param);
+			/* destruct all user maps */
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
+				struct user_mem_map *map =
+						&user_mem_maps->maps[i];
+				if (vaddr == map->addr && len == map->len)
+					continue;
+				if (vfio_spapr_dma_do_map(vfio_container_fd,
+						map->addr, map->iova, map->len,
+						0)) {
+					RTE_LOG(ERR, EAL, "Could not destruct user DMA maps\n");
+					ret = -1;
+					goto out;
+				}
+			}
+
+			create.window_size = rte_align64pow2(iova + len);
 			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
 					&create) < 0) {
 				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
 				ret = -1;
 				goto out;
 			}
+			memcpy(&prev_create, &create, sizeof(create));
 			/* we're inside a callback, so use thread-unsafe version
 			 */
 			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
-					&vfio_container_fd) < 0) {
+					&param) < 0) {
 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
 				ret = -1;
 				goto out;
@@ -1547,6 +1596,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 			for (i = 0; i < user_mem_maps->n_maps; i++) {
 				struct user_mem_map *map =
 						&user_mem_maps->maps[i];
+				if (vaddr == map->addr && len == map->len)
+					continue;
 				if (vfio_spapr_dma_do_map(vfio_container_fd,
 						map->addr, map->iova, map->len,
 						1)) {
@@ -1556,23 +1607,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				}
 			}
 		}
-
-		/* now that we've remapped all of the memory that was present
-		 * before, map the segment that we were requested to map.
-		 *
-		 * however, if we were called by the callback, the memory we
-		 * were called with was already in the memseg list, so previous
-		 * mapping should've mapped that segment already.
-		 *
-		 * virt2memseg_list is a relatively cheap check, so use that. if
-		 * memory is within any memseg list, it's a memseg, so it's
-		 * already mapped.
-		 */
-		addr = (void *)(uintptr_t)vaddr;
-		if (rte_mem_virt2memseg_list(addr) == NULL &&
-				vfio_spapr_dma_do_map(vfio_container_fd,
-					vaddr, iova, len, 1) < 0) {
-			RTE_LOG(ERR, EAL, "Could not map segment\n");
+		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
+			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
 			ret = -1;
 			goto out;
 		}
@@ -1613,6 +1649,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
 		RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
 		return -1;
 	}
+	memcpy(&prev_create, &create, sizeof(create));
 
 	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
 	if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
@@ -1620,6 +1657,21 @@ vfio_spapr_dma_map(int vfio_container_fd)
 
 	return 0;
 }
+#else
+static int
+vfio_spapr_dma_mem_map(int __rte_unused vfio_container_fd,
+			uint64_t __rte_unused vaddr,
+			uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			int __rte_unused do_map)
+{
+	return 0;
+}
+static int
+vfio_spapr_dma_map(int __rte_unused vfio_container_fd)
+{
+	return 0;
+}
+#endif
 
 static int
 vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-12  6:33 [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le Takeshi Yoshimura
  2019-06-12 14:06 ` Aaron Conole
  2019-06-13  2:22 ` Takeshi Yoshimura
@ 2019-06-13  2:30 ` Takeshi T Yoshimura
  2 siblings, 0 replies; 16+ messages in thread
From: Takeshi T Yoshimura @ 2019-06-13  2:30 UTC (permalink / raw)
  To: dev; +Cc: David Christensen, Pradeep Satyanarayana, aconole

>宛先: dev@dpdk.org
>送信元: Takeshi Yoshimura <tyos@jp.ibm.com>
>日付: 2019/06/13 11:22AM
>Cc: drc@ibm.com, pradeep@us.ibm.com, Takeshi Yoshimura
><tyos@jp.ibm.com>
>件名: [EXTERNAL] [PATCH] vfio: fix expanding DMA area in ppc64le
>
>In ppc64le, expanding DMA areas always fail because we cannot remove
>a DMA window. As a result, we cannot allocate more than one memseg in
>ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
>the mapped DMA before removing the window. This patch fixes this
>incorrect behavior.
>
>I added a global variable to track current window size since we do
>not have better ways to get exact size of it than doing so. sPAPR
>IOMMU seems not to provide any ways to get window size with ioctl
>interfaces. rte_memseg_walk*() is currently used to calculate window
>size, but it walks memsegs that are marked as used, not mapped. So,
>we need to determine if a given memseg is mapped or not, otherwise
>the ioctl reports errors due to attempting to unregister memory
>addresses that are not registered. The global variable is excluded
>in non-ppc64le binaries.
>
>Similar problems happen in user maps. We need to avoid attempting to
>unmap the address that is given as the function's parameter. The
>compaction of user maps prevents us from passing correct length for
>unmapping DMA at the window recreation. So, I removed it in ppc64le.
>
>I also fixed the order of ioctl for unregister and unmap. The ioctl
>for unregister sometimes report device busy errors due to the
>existence of mapped area.
>
>Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
>---
> lib/librte_eal/linux/eal/eal_vfio.c | 154
>+++++++++++++++++++---------
> 1 file changed, 103 insertions(+), 51 deletions(-)
>
>diff --git a/lib/librte_eal/linux/eal/eal_vfio.c
>b/lib/librte_eal/linux/eal/eal_vfio.c
>index f16c5c3c0..c1b275b56 100644
>--- a/lib/librte_eal/linux/eal/eal_vfio.c
>+++ b/lib/librte_eal/linux/eal/eal_vfio.c
>@@ -93,6 +93,7 @@ is_null_map(const struct user_mem_map *map)
> 	return map->addr == 0 && map->iova == 0 && map->len == 0;
> }
> 
>+#ifndef RTE_ARCH_PPC_64
> /* we may need to merge user mem maps together in case of user
>mapping/unmapping
>  * chunks of memory, so we'll need a comparator function to sort
>segments.
>  */
>@@ -126,6 +127,7 @@ user_mem_map_cmp(const void *a, const void *b)
> 
> 	return 0;
> }
>+#endif
> 
> /* adjust user map entry. this may result in shortening of existing
>map, or in
>  * splitting existing map in two pieces.
>@@ -162,6 +164,7 @@ adjust_map(struct user_mem_map *src, struct
>user_mem_map *end,
> 	}
> }
> 
>+#ifndef RTE_ARCH_PPC_64
> /* try merging two maps into one, return 1 if succeeded */
> static int
> merge_map(struct user_mem_map *left, struct user_mem_map *right)
>@@ -177,6 +180,7 @@ merge_map(struct user_mem_map *left, struct
>user_mem_map *right)
> 
> 	return 1;
> }
>+#endif
> 
> static struct user_mem_map *
> find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t
>addr,
>@@ -211,6 +215,16 @@ find_user_mem_map(struct user_mem_maps
>*user_mem_maps, uint64_t addr,
> 	return NULL;
> }
> 
>+#ifdef RTE_ARCH_PPC_64
>+/* Recreation of DMA window requires unregistering DMA memory.
>+ * Compaction confuses the logic and causes false reports in the
>recreation.
>+ * For now, we do not compact user maps in ppc64le.
>+ */
>+static void
>+compact_user_maps(__rte_unused struct user_mem_maps *user_mem_maps)
>+{
>+}
>+#else
> /* this will sort all user maps, and merge/compact any adjacent maps
>*/
> static void
> compact_user_maps(struct user_mem_maps *user_mem_maps)
>@@ -256,6 +270,7 @@ compact_user_maps(struct user_mem_maps
>*user_mem_maps)
> 		user_mem_maps->n_maps = cur_idx;
> 	}
> }
>+#endif
> 
> static int
> vfio_open_group_fd(int iommu_group_num)
>@@ -1306,6 +1321,7 @@ vfio_type1_dma_map(int vfio_container_fd)
> 	return rte_memseg_walk(type1_map, &vfio_container_fd);
> }
> 
>+#ifdef RTE_ARCH_PPC_64
> static int
> vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr,
>uint64_t iova,
> 		uint64_t len, int do_map)
>@@ -1357,14 +1373,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd,
>uint64_t vaddr, uint64_t iova,
> 		}
> 
> 	} else {
>-		ret = ioctl(vfio_container_fd,
>-				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
>-		if (ret) {
>-			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i
>(%s)\n",
>-					errno, strerror(errno));
>-			return -1;
>-		}
>-
> 		memset(&dma_unmap, 0, sizeof(dma_unmap));
> 		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
> 		dma_unmap.size = len;
>@@ -1377,24 +1385,50 @@ vfio_spapr_dma_do_map(int vfio_container_fd,
>uint64_t vaddr, uint64_t iova,
> 					errno, strerror(errno));
> 			return -1;
> 		}
>+
>+		ret = ioctl(vfio_container_fd,
>+				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
>+		if (ret) {
>+			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i
>(%s)\n",
>+					errno, strerror(errno));
>+			return -1;
>+		}
> 	}
> 
> 	return 0;
> }
> 
>+struct spapr_remap_walk_param {
>+	int vfio_container_fd;
>+	uint64_t window_size;
>+};
>+
> static int
> vfio_spapr_map_walk(const struct rte_memseg_list *msl,
> 		const struct rte_memseg *ms, void *arg)
> {
>-	int *vfio_container_fd = arg;
>+	struct spapr_remap_walk_param *p = arg;
> 
>-	if (msl->external)
>+	if (msl->external || ms->iova >= p->window_size)
> 		return 0;
> 
>-	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64,
>ms->iova,
>+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64,
>ms->iova,
> 			ms->len, 1);
> }
> 
>+static int
>+vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
>+		const struct rte_memseg *ms, void *arg)
>+{
>+	struct spapr_remap_walk_param *p = arg;
>+
>+	if (msl->external || ms->iova >= p->window_size)
>+		return 0;
>+
>+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64,
>ms->iova,
>+			ms->len, 0);
>+}
>+
> struct spapr_walk_param {
> 	uint64_t window_size;
> 	uint64_t hugepage_sz;
>@@ -1481,14 +1515,13 @@ vfio_spapr_create_new_dma_window(int
>vfio_container_fd,
> 	return 0;
> }
> 
>+static struct vfio_iommu_spapr_tce_create prev_create;
>+
> static int
> vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr,
>uint64_t iova,
> 		uint64_t len, int do_map)
> {
>-	struct spapr_walk_param param;
>-	struct vfio_iommu_spapr_tce_create create = {
>-		.argsz = sizeof(create),
>-	};
>+	struct vfio_iommu_spapr_tce_create create;
> 	struct vfio_config *vfio_cfg;
> 	struct user_mem_maps *user_mem_maps;
> 	int i, ret = 0;
>@@ -1502,43 +1535,59 @@ vfio_spapr_dma_mem_map(int vfio_container_fd,
>uint64_t vaddr, uint64_t iova,
> 	user_mem_maps = &vfio_cfg->mem_maps;
> 	rte_spinlock_recursive_lock(&user_mem_maps->lock);
> 
>-	/* check if window size needs to be adjusted */
>-	memset(&param, 0, sizeof(param));
>-
>-	/* we're inside a callback so use thread-unsafe version */
>-	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
>-				&param) < 0) {
>-		RTE_LOG(ERR, EAL, "Could not get window size\n");
>-		ret = -1;
>-		goto out;
>-	}
>+	memcpy(&create, &prev_create, sizeof(create));
> 
> 	/* also check user maps */
> 	for (i = 0; i < user_mem_maps->n_maps; i++) {
>-		uint64_t max = user_mem_maps->maps[i].iova +
>-				user_mem_maps->maps[i].len;
>-		create.window_size = RTE_MAX(create.window_size, max);
>+		struct user_mem_map *map = &user_mem_maps->maps[i];
>+
>+		if (vaddr == map->addr && len == map->len)
>+			continue;
>+		create.window_size = RTE_MAX(create.window_size, map->iova +
>map->len);
> 	}
> 
> 	/* sPAPR requires window size to be a power of 2 */
>-	create.window_size = rte_align64pow2(param.window_size);
>-	create.page_shift = __builtin_ctzll(param.hugepage_sz);
>-	create.levels = 1;
>+	create.window_size = rte_align64pow2(create.window_size);
> 
> 	if (do_map) {
>-		void *addr;
> 		/* re-create window and remap the entire memory */
>-		if (iova > create.window_size) {
>+		if (iova + len > create.window_size) {
>+			struct spapr_remap_walk_param param = {
>+				.vfio_container_fd = vfio_container_fd,
>+			    .window_size = create.window_size,
>+			};
>+
>+			/* we're inside a callback, so use thread-unsafe version
>+			 */
>+			rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
>+					&param);
>+			/* destruct all user maps */
>+			for (i = 0; i < user_mem_maps->n_maps; i++) {
>+				struct user_mem_map *map =
>+						&user_mem_maps->maps[i];
>+				if (vaddr == map->addr && len == map->len)
>+					continue;
>+				if (vfio_spapr_dma_do_map(vfio_container_fd,
>+						map->addr, map->iova, map->len,
>+						0)) {
>+					RTE_LOG(ERR, EAL, "Could not destruct user DMA maps\n");
>+					ret = -1;
>+					goto out;
>+				}
>+			}
>+
>+			create.window_size = rte_align64pow2(iova + len);
> 			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
> 					&create) < 0) {
> 				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
> 				ret = -1;
> 				goto out;
> 			}
>+			memcpy(&prev_create, &create, sizeof(create));
> 			/* we're inside a callback, so use thread-unsafe version
> 			 */
> 			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
>-					&vfio_container_fd) < 0) {
>+					&param) < 0) {
> 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
> 				ret = -1;
> 				goto out;
>@@ -1547,6 +1596,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd,
>uint64_t vaddr, uint64_t iova,
> 			for (i = 0; i < user_mem_maps->n_maps; i++) {
> 				struct user_mem_map *map =
> 						&user_mem_maps->maps[i];
>+				if (vaddr == map->addr && len == map->len)
>+					continue;
> 				if (vfio_spapr_dma_do_map(vfio_container_fd,
> 						map->addr, map->iova, map->len,
> 						1)) {
>@@ -1556,23 +1607,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd,
>uint64_t vaddr, uint64_t iova,
> 				}
> 			}
> 		}
>-
>-		/* now that we've remapped all of the memory that was present
>-		 * before, map the segment that we were requested to map.
>-		 *
>-		 * however, if we were called by the callback, the memory we
>-		 * were called with was already in the memseg list, so previous
>-		 * mapping should've mapped that segment already.
>-		 *
>-		 * virt2memseg_list is a relatively cheap check, so use that. if
>-		 * memory is within any memseg list, it's a memseg, so it's
>-		 * already mapped.
>-		 */
>-		addr = (void *)(uintptr_t)vaddr;
>-		if (rte_mem_virt2memseg_list(addr) == NULL &&
>-				vfio_spapr_dma_do_map(vfio_container_fd,
>-					vaddr, iova, len, 1) < 0) {
>-			RTE_LOG(ERR, EAL, "Could not map segment\n");
>+		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1))
>{
>+			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
> 			ret = -1;
> 			goto out;
> 		}
>@@ -1613,6 +1649,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
> 		RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
> 		return -1;
> 	}
>+	memcpy(&prev_create, &create, sizeof(create));
> 
> 	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
> 	if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
>@@ -1620,6 +1657,21 @@ vfio_spapr_dma_map(int vfio_container_fd)
> 
> 	return 0;
> }
>+#else
>+static int
>+vfio_spapr_dma_mem_map(int __rte_unused vfio_container_fd,
>+			uint64_t __rte_unused vaddr,
>+			uint64_t __rte_unused iova, uint64_t __rte_unused len,
>+			int __rte_unused do_map)
>+{
>+	return 0;
>+}
>+static int
>+vfio_spapr_dma_map(int __rte_unused vfio_container_fd)
>+{
>+	return 0;
>+}
>+#endif
> 
> static int
> vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
>-- 
>2.17.1
>
>

Added CC: aconole@redhat.com
I updated the patch not to break builds in x86.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-13  2:22 ` Takeshi Yoshimura
@ 2019-06-13 17:37   ` David Christensen
  2019-06-14  7:34   ` David Marchand
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: David Christensen @ 2019-06-13 17:37 UTC (permalink / raw)
  To: Takeshi Yoshimura <tyos@jp.ibm.com>; Anatoly Burakov, dev
  Cc: drc, pradeep


Adding the vfio maintainer on the To: line.

On 6/12/19 7:22 PM, Takeshi Yoshimura wrote:
> In ppc64le, expanding DMA areas always fail because we cannot remove
> a DMA window. As a result, we cannot allocate more than one memseg in
> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> the mapped DMA before removing the window. This patch fixes this
> incorrect behavior.
> 
> I added a global variable to track current window size since we do
> not have better ways to get exact size of it than doing so. sPAPR
> IOMMU seems not to provide any ways to get window size with ioctl
> interfaces. rte_memseg_walk*() is currently used to calculate window
> size, but it walks memsegs that are marked as used, not mapped. So,
> we need to determine if a given memseg is mapped or not, otherwise
> the ioctl reports errors due to attempting to unregister memory
> addresses that are not registered. The global variable is excluded
> in non-ppc64le binaries.
> 
> Similar problems happen in user maps. We need to avoid attempting to
> unmap the address that is given as the function's parameter. The
> compaction of user maps prevents us from passing correct length for
> unmapping DMA at the window recreation. So, I removed it in ppc64le.
> 
> I also fixed the order of ioctl for unregister and unmap. The ioctl
> for unregister sometimes report device busy errors due to the
> existence of mapped area.

I count at least three different changes happening in this commit.  Can 
you break it up into a multi-part patchset that targets each change 
individually?  It would be best if you break out the PPC64 changes 
separately from the changes that affect all architectures.

> 
> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> ---
>   lib/librte_eal/linux/eal/eal_vfio.c | 154 +++++++++++++++++++---------
>   1 file changed, 103 insertions(+), 51 deletions(-)
> 
> diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
> index f16c5c3c0..c1b275b56 100644
> --- a/lib/librte_eal/linux/eal/eal_vfio.c
> +++ b/lib/librte_eal/linux/eal/eal_vfio.c
> @@ -93,6 +93,7 @@ is_null_map(const struct user_mem_map *map)
>   	return map->addr == 0 && map->iova == 0 && map->len == 0;
>   }
> 
> +#ifndef RTE_ARCH_PPC_64
>   /* we may need to merge user mem maps together in case of user mapping/unmapping
>    * chunks of memory, so we'll need a comparator function to sort segments.
>    */
> @@ -126,6 +127,7 @@ user_mem_map_cmp(const void *a, const void *b)
> 
>   	return 0;
>   }
> +#endif
> 
>   /* adjust user map entry. this may result in shortening of existing map, or in
>    * splitting existing map in two pieces.
> @@ -162,6 +164,7 @@ adjust_map(struct user_mem_map *src, struct user_mem_map *end,
>   	}
>   }
> 
> +#ifndef RTE_ARCH_PPC_64
>   /* try merging two maps into one, return 1 if succeeded */
>   static int
>   merge_map(struct user_mem_map *left, struct user_mem_map *right)
> @@ -177,6 +180,7 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right)
> 
>   	return 1;
>   }
> +#endif
> 
>   static struct user_mem_map *
>   find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
> @@ -211,6 +215,16 @@ find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
>   	return NULL;
>   }
> 
> +#ifdef RTE_ARCH_PPC_64
> +/* Recreation of DMA window requires unregistering DMA memory.
> + * Compaction confuses the logic and causes false reports in the recreation.
> + * For now, we do not compact user maps in ppc64le.
> + */
> +static void
> +compact_user_maps(__rte_unused struct user_mem_maps *user_mem_maps)
> +{
> +}
> +#else
>   /* this will sort all user maps, and merge/compact any adjacent maps */
>   static void
>   compact_user_maps(struct user_mem_maps *user_mem_maps)
> @@ -256,6 +270,7 @@ compact_user_maps(struct user_mem_maps *user_mem_maps)
>   		user_mem_maps->n_maps = cur_idx;
>   	}
>   }
> +#endif
> 
>   static int
>   vfio_open_group_fd(int iommu_group_num)
> @@ -1306,6 +1321,7 @@ vfio_type1_dma_map(int vfio_container_fd)
>   	return rte_memseg_walk(type1_map, &vfio_container_fd);
>   }

The changes below starting with this #ifdef hide a lot of code on 
x86/ARM.  Was that your intent?  Does x86/ARM still work without it?

> +#ifdef RTE_ARCH_PPC_64
>   static int
>   vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>   		uint64_t len, int do_map)
> @@ -1357,14 +1373,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>   		}
> 
>   	} else {
> -		ret = ioctl(vfio_container_fd,
> -				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
> -		if (ret) {
> -			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
> -					errno, strerror(errno));
> -			return -1;
> -		}
> -
>   		memset(&dma_unmap, 0, sizeof(dma_unmap));
>   		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
>   		dma_unmap.size = len;
> @@ -1377,24 +1385,50 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>   					errno, strerror(errno));
>   			return -1;
>   		}
> +
> +		ret = ioctl(vfio_container_fd,
> +				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
> +		if (ret) {
> +			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
> +					errno, strerror(errno));
> +			return -1;
> +		}
>   	}
> 
>   	return 0;
>   }
> 
> +struct spapr_remap_walk_param {
> +	int vfio_container_fd;
> +	uint64_t window_size;
> +};
> +
>   static int
>   vfio_spapr_map_walk(const struct rte_memseg_list *msl,
>   		const struct rte_memseg *ms, void *arg)
>   {
> -	int *vfio_container_fd = arg;
> +	struct spapr_remap_walk_param *p = arg;
> 
> -	if (msl->external)
> +	if (msl->external || ms->iova >= p->window_size)
>   		return 0;
> 
> -	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
> +	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
>   			ms->len, 1);
>   }
> 
> +static int
> +vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
> +		const struct rte_memseg *ms, void *arg)
> +{
> +	struct spapr_remap_walk_param *p = arg;
> +
> +	if (msl->external || ms->iova >= p->window_size)
> +		return 0;
> +
> +	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
> +			ms->len, 0);
> +}
> +
>   struct spapr_walk_param {
>   	uint64_t window_size;
>   	uint64_t hugepage_sz;
> @@ -1481,14 +1515,13 @@ vfio_spapr_create_new_dma_window(int vfio_container_fd,
>   	return 0;
>   }
> 
> +static struct vfio_iommu_spapr_tce_create prev_create;

Not a fan of global variables.  Also, you're using the value of this 
uninitialized variable in the first invocation of vfio_spapr_dma_mem_map().

> +
>   static int
>   vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>   		uint64_t len, int do_map)
>   {
> -	struct spapr_walk_param param;
> -	struct vfio_iommu_spapr_tce_create create = {
> -		.argsz = sizeof(create),
> -	};
> +	struct vfio_iommu_spapr_tce_create create;
>   	struct vfio_config *vfio_cfg;
>   	struct user_mem_maps *user_mem_maps;
>   	int i, ret = 0;
> @@ -1502,43 +1535,59 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>   	user_mem_maps = &vfio_cfg->mem_maps;
>   	rte_spinlock_recursive_lock(&user_mem_maps->lock);
> 
> -	/* check if window size needs to be adjusted */
> -	memset(&param, 0, sizeof(param));
> -
> -	/* we're inside a callback so use thread-unsafe version */
> -	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
> -				&param) < 0) {
> -		RTE_LOG(ERR, EAL, "Could not get window size\n");
> -		ret = -1;
> -		goto out;
> -	}
> +	memcpy(&create, &prev_create, sizeof(create));
> 
>   	/* also check user maps */
>   	for (i = 0; i < user_mem_maps->n_maps; i++) {
> -		uint64_t max = user_mem_maps->maps[i].iova +
> -				user_mem_maps->maps[i].len;
> -		create.window_size = RTE_MAX(create.window_size, max);
> +		struct user_mem_map *map = &user_mem_maps->maps[i];
> +
> +		if (vaddr == map->addr && len == map->len)
> +			continue;
> +		create.window_size = RTE_MAX(create.window_size, map->iova + map->len);
>   	}
> 
>   	/* sPAPR requires window size to be a power of 2 */
> -	create.window_size = rte_align64pow2(param.window_size);
> -	create.page_shift = __builtin_ctzll(param.hugepage_sz);
> -	create.levels = 1;
> +	create.window_size = rte_align64pow2(create.window_size);
> 
>   	if (do_map) {
> -		void *addr;
>   		/* re-create window and remap the entire memory */
> -		if (iova > create.window_size) {
> +		if (iova + len > create.window_size) {
> +			struct spapr_remap_walk_param param = {
> +				.vfio_container_fd = vfio_container_fd,
> +			    .window_size = create.window_size,
> +			};
> +
> +			/* we're inside a callback, so use thread-unsafe version
> +			 */
> +			rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
> +					&param);
> +			/* destruct all user maps */
> +			for (i = 0; i < user_mem_maps->n_maps; i++) {
> +				struct user_mem_map *map =
> +						&user_mem_maps->maps[i];
> +				if (vaddr == map->addr && len == map->len)
> +					continue;
> +				if (vfio_spapr_dma_do_map(vfio_container_fd,
> +						map->addr, map->iova, map->len,
> +						0)) {
> +					RTE_LOG(ERR, EAL, "Could not destruct user DMA maps\n");
> +					ret = -1;
> +					goto out;
> +				}
> +			}
> +
> +			create.window_size = rte_align64pow2(iova + len);
>   			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
>   					&create) < 0) {
>   				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
>   				ret = -1;
>   				goto out;
>   			}
> +			memcpy(&prev_create, &create, sizeof(create));
>   			/* we're inside a callback, so use thread-unsafe version
>   			 */
>   			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
> -					&vfio_container_fd) < 0) {
> +					&param) < 0) {
>   				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
>   				ret = -1;
>   				goto out;
> @@ -1547,6 +1596,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>   			for (i = 0; i < user_mem_maps->n_maps; i++) {
>   				struct user_mem_map *map =
>   						&user_mem_maps->maps[i];
> +				if (vaddr == map->addr && len == map->len)
> +					continue;
>   				if (vfio_spapr_dma_do_map(vfio_container_fd,
>   						map->addr, map->iova, map->len,
>   						1)) {
> @@ -1556,23 +1607,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>   				}
>   			}
>   		}
> -
> -		/* now that we've remapped all of the memory that was present
> -		 * before, map the segment that we were requested to map.
> -		 *
> -		 * however, if we were called by the callback, the memory we
> -		 * were called with was already in the memseg list, so previous
> -		 * mapping should've mapped that segment already.
> -		 *
> -		 * virt2memseg_list is a relatively cheap check, so use that. if
> -		 * memory is within any memseg list, it's a memseg, so it's
> -		 * already mapped.
> -		 */
> -		addr = (void *)(uintptr_t)vaddr;
> -		if (rte_mem_virt2memseg_list(addr) == NULL &&
> -				vfio_spapr_dma_do_map(vfio_container_fd,
> -					vaddr, iova, len, 1) < 0) {
> -			RTE_LOG(ERR, EAL, "Could not map segment\n");
> +		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
> +			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
>   			ret = -1;
>   			goto out;
>   		}
> @@ -1613,6 +1649,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
>   		RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
>   		return -1;
>   	}
> +	memcpy(&prev_create, &create, sizeof(create));
> 
>   	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
>   	if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
> @@ -1620,6 +1657,21 @@ vfio_spapr_dma_map(int vfio_container_fd)
> 
>   	return 0;
>   }
> +#else
> +static int
> +vfio_spapr_dma_mem_map(int __rte_unused vfio_container_fd,
> +			uint64_t __rte_unused vaddr,
> +			uint64_t __rte_unused iova, uint64_t __rte_unused len,
> +			int __rte_unused do_map)
> +{
> +	return 0;
> +}
> +static int
> +vfio_spapr_dma_map(int __rte_unused vfio_container_fd)
> +{
> +	return 0;
> +}
> +#endif
> 
>   static int
>   vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-13  2:22 ` Takeshi Yoshimura
  2019-06-13 17:37   ` David Christensen
@ 2019-06-14  7:34   ` David Marchand
  2019-06-14  7:49   ` [dpdk-dev] [PATCH v2] " Takeshi Yoshimura
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: David Marchand @ 2019-06-14  7:34 UTC (permalink / raw)
  To: Takeshi Yoshimura; +Cc: dev, drc, pradeep

Before submitting further revisions, please check the documentation at
http://doc.dpdk.org/guides/contributing/patches.html

You are supposed to version your patches and prune old superseded patches
in patchwork.

Thanks.

-- 
David Marchand

On Thu, Jun 13, 2019 at 4:23 AM Takeshi Yoshimura <tyos@jp.ibm.com> wrote:

> In ppc64le, expanding DMA areas always fail because we cannot remove
> a DMA window. As a result, we cannot allocate more than one memseg in
> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> the mapped DMA before removing the window. This patch fixes this
> incorrect behavior.
>
> I added a global variable to track current window size since we do
> not have better ways to get exact size of it than doing so. sPAPR
> IOMMU seems not to provide any ways to get window size with ioctl
> interfaces. rte_memseg_walk*() is currently used to calculate window
> size, but it walks memsegs that are marked as used, not mapped. So,
> we need to determine if a given memseg is mapped or not, otherwise
> the ioctl reports errors due to attempting to unregister memory
> addresses that are not registered. The global variable is excluded
> in non-ppc64le binaries.
>
> Similar problems happen in user maps. We need to avoid attempting to
> unmap the address that is given as the function's parameter. The
> compaction of user maps prevents us from passing correct length for
> unmapping DMA at the window recreation. So, I removed it in ppc64le.
>
> I also fixed the order of ioctl for unregister and unmap. The ioctl
> for unregister sometimes report device busy errors due to the
> existence of mapped area.
>
> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> ---
>  lib/librte_eal/linux/eal/eal_vfio.c | 154 +++++++++++++++++++---------
>  1 file changed, 103 insertions(+), 51 deletions(-)
>
> diff --git a/lib/librte_eal/linux/eal/eal_vfio.c
> b/lib/librte_eal/linux/eal/eal_vfio.c
> index f16c5c3c0..c1b275b56 100644
> --- a/lib/librte_eal/linux/eal/eal_vfio.c
> +++ b/lib/librte_eal/linux/eal/eal_vfio.c
> @@ -93,6 +93,7 @@ is_null_map(const struct user_mem_map *map)
>         return map->addr == 0 && map->iova == 0 && map->len == 0;
>  }
>
> +#ifndef RTE_ARCH_PPC_64
>  /* we may need to merge user mem maps together in case of user
> mapping/unmapping
>   * chunks of memory, so we'll need a comparator function to sort segments.
>   */
> @@ -126,6 +127,7 @@ user_mem_map_cmp(const void *a, const void *b)
>
>         return 0;
>  }
> +#endif
>
>  /* adjust user map entry. this may result in shortening of existing map,
> or in
>   * splitting existing map in two pieces.
> @@ -162,6 +164,7 @@ adjust_map(struct user_mem_map *src, struct
> user_mem_map *end,
>         }
>  }
>
> +#ifndef RTE_ARCH_PPC_64
>  /* try merging two maps into one, return 1 if succeeded */
>  static int
>  merge_map(struct user_mem_map *left, struct user_mem_map *right)
> @@ -177,6 +180,7 @@ merge_map(struct user_mem_map *left, struct
> user_mem_map *right)
>
>         return 1;
>  }
> +#endif
>
>  static struct user_mem_map *
>  find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
> @@ -211,6 +215,16 @@ find_user_mem_map(struct user_mem_maps
> *user_mem_maps, uint64_t addr,
>         return NULL;
>  }
>
> +#ifdef RTE_ARCH_PPC_64
> +/* Recreation of DMA window requires unregistering DMA memory.
> + * Compaction confuses the logic and causes false reports in the
> recreation.
> + * For now, we do not compact user maps in ppc64le.
> + */
> +static void
> +compact_user_maps(__rte_unused struct user_mem_maps *user_mem_maps)
> +{
> +}
> +#else
>  /* this will sort all user maps, and merge/compact any adjacent maps */
>  static void
>  compact_user_maps(struct user_mem_maps *user_mem_maps)
> @@ -256,6 +270,7 @@ compact_user_maps(struct user_mem_maps *user_mem_maps)
>                 user_mem_maps->n_maps = cur_idx;
>         }
>  }
> +#endif
>
>  static int
>  vfio_open_group_fd(int iommu_group_num)
> @@ -1306,6 +1321,7 @@ vfio_type1_dma_map(int vfio_container_fd)
>         return rte_memseg_walk(type1_map, &vfio_container_fd);
>  }
>
> +#ifdef RTE_ARCH_PPC_64
>  static int
>  vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t
> iova,
>                 uint64_t len, int do_map)
> @@ -1357,14 +1373,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd,
> uint64_t vaddr, uint64_t iova,
>                 }
>
>         } else {
> -               ret = ioctl(vfio_container_fd,
> -                               VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
> -               if (ret) {
> -                       RTE_LOG(ERR, EAL, "  cannot unregister vaddr for
> IOMMU, error %i (%s)\n",
> -                                       errno, strerror(errno));
> -                       return -1;
> -               }
> -
>                 memset(&dma_unmap, 0, sizeof(dma_unmap));
>                 dma_unmap.argsz = sizeof(struct
> vfio_iommu_type1_dma_unmap);
>                 dma_unmap.size = len;
> @@ -1377,24 +1385,50 @@ vfio_spapr_dma_do_map(int vfio_container_fd,
> uint64_t vaddr, uint64_t iova,
>                                         errno, strerror(errno));
>                         return -1;
>                 }
> +
> +               ret = ioctl(vfio_container_fd,
> +                               VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
> +               if (ret) {
> +                       RTE_LOG(ERR, EAL, "  cannot unregister vaddr for
> IOMMU, error %i (%s)\n",
> +                                       errno, strerror(errno));
> +                       return -1;
> +               }
>         }
>
>         return 0;
>  }
>
> +struct spapr_remap_walk_param {
> +       int vfio_container_fd;
> +       uint64_t window_size;
> +};
> +
>  static int
>  vfio_spapr_map_walk(const struct rte_memseg_list *msl,
>                 const struct rte_memseg *ms, void *arg)
>  {
> -       int *vfio_container_fd = arg;
> +       struct spapr_remap_walk_param *p = arg;
>
> -       if (msl->external)
> +       if (msl->external || ms->iova >= p->window_size)
>                 return 0;
>
> -       return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64,
> ms->iova,
> +       return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64,
> ms->iova,
>                         ms->len, 1);
>  }
>
> +static int
> +vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
> +               const struct rte_memseg *ms, void *arg)
> +{
> +       struct spapr_remap_walk_param *p = arg;
> +
> +       if (msl->external || ms->iova >= p->window_size)
> +               return 0;
> +
> +       return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64,
> ms->iova,
> +                       ms->len, 0);
> +}
> +
>  struct spapr_walk_param {
>         uint64_t window_size;
>         uint64_t hugepage_sz;
> @@ -1481,14 +1515,13 @@ vfio_spapr_create_new_dma_window(int
> vfio_container_fd,
>         return 0;
>  }
>
> +static struct vfio_iommu_spapr_tce_create prev_create;
> +
>  static int
>  vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t
> iova,
>                 uint64_t len, int do_map)
>  {
> -       struct spapr_walk_param param;
> -       struct vfio_iommu_spapr_tce_create create = {
> -               .argsz = sizeof(create),
> -       };
> +       struct vfio_iommu_spapr_tce_create create;
>         struct vfio_config *vfio_cfg;
>         struct user_mem_maps *user_mem_maps;
>         int i, ret = 0;
> @@ -1502,43 +1535,59 @@ vfio_spapr_dma_mem_map(int vfio_container_fd,
> uint64_t vaddr, uint64_t iova,
>         user_mem_maps = &vfio_cfg->mem_maps;
>         rte_spinlock_recursive_lock(&user_mem_maps->lock);
>
> -       /* check if window size needs to be adjusted */
> -       memset(&param, 0, sizeof(param));
> -
> -       /* we're inside a callback so use thread-unsafe version */
> -       if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
> -                               &param) < 0) {
> -               RTE_LOG(ERR, EAL, "Could not get window size\n");
> -               ret = -1;
> -               goto out;
> -       }
> +       memcpy(&create, &prev_create, sizeof(create));
>
>         /* also check user maps */
>         for (i = 0; i < user_mem_maps->n_maps; i++) {
> -               uint64_t max = user_mem_maps->maps[i].iova +
> -                               user_mem_maps->maps[i].len;
> -               create.window_size = RTE_MAX(create.window_size, max);
> +               struct user_mem_map *map = &user_mem_maps->maps[i];
> +
> +               if (vaddr == map->addr && len == map->len)
> +                       continue;
> +               create.window_size = RTE_MAX(create.window_size, map->iova
> + map->len);
>         }
>
>         /* sPAPR requires window size to be a power of 2 */
> -       create.window_size = rte_align64pow2(param.window_size);
> -       create.page_shift = __builtin_ctzll(param.hugepage_sz);
> -       create.levels = 1;
> +       create.window_size = rte_align64pow2(create.window_size);
>
>         if (do_map) {
> -               void *addr;
>                 /* re-create window and remap the entire memory */
> -               if (iova > create.window_size) {
> +               if (iova + len > create.window_size) {
> +                       struct spapr_remap_walk_param param = {
> +                               .vfio_container_fd = vfio_container_fd,
> +                           .window_size = create.window_size,
> +                       };
> +
> +                       /* we're inside a callback, so use thread-unsafe
> version
> +                        */
> +
>  rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
> +                                       &param);
> +                       /* destruct all user maps */
> +                       for (i = 0; i < user_mem_maps->n_maps; i++) {
> +                               struct user_mem_map *map =
> +                                               &user_mem_maps->maps[i];
> +                               if (vaddr == map->addr && len == map->len)
> +                                       continue;
> +                               if
> (vfio_spapr_dma_do_map(vfio_container_fd,
> +                                               map->addr, map->iova,
> map->len,
> +                                               0)) {
> +                                       RTE_LOG(ERR, EAL, "Could not
> destruct user DMA maps\n");
> +                                       ret = -1;
> +                                       goto out;
> +                               }
> +                       }
> +
> +                       create.window_size = rte_align64pow2(iova + len);
>                         if
> (vfio_spapr_create_new_dma_window(vfio_container_fd,
>                                         &create) < 0) {
>                                 RTE_LOG(ERR, EAL, "Could not create new
> DMA window\n");
>                                 ret = -1;
>                                 goto out;
>                         }
> +                       memcpy(&prev_create, &create, sizeof(create));
>                         /* we're inside a callback, so use thread-unsafe
> version
>                          */
>                         if
> (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
> -                                       &vfio_container_fd) < 0) {
> +                                       &param) < 0) {
>                                 RTE_LOG(ERR, EAL, "Could not recreate DMA
> maps\n");
>                                 ret = -1;
>                                 goto out;
> @@ -1547,6 +1596,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd,
> uint64_t vaddr, uint64_t iova,
>                         for (i = 0; i < user_mem_maps->n_maps; i++) {
>                                 struct user_mem_map *map =
>                                                 &user_mem_maps->maps[i];
> +                               if (vaddr == map->addr && len == map->len)
> +                                       continue;
>                                 if
> (vfio_spapr_dma_do_map(vfio_container_fd,
>                                                 map->addr, map->iova,
> map->len,
>                                                 1)) {
> @@ -1556,23 +1607,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd,
> uint64_t vaddr, uint64_t iova,
>                                 }
>                         }
>                 }
> -
> -               /* now that we've remapped all of the memory that was
> present
> -                * before, map the segment that we were requested to map.
> -                *
> -                * however, if we were called by the callback, the memory
> we
> -                * were called with was already in the memseg list, so
> previous
> -                * mapping should've mapped that segment already.
> -                *
> -                * virt2memseg_list is a relatively cheap check, so use
> that. if
> -                * memory is within any memseg list, it's a memseg, so it's
> -                * already mapped.
> -                */
> -               addr = (void *)(uintptr_t)vaddr;
> -               if (rte_mem_virt2memseg_list(addr) == NULL &&
> -                               vfio_spapr_dma_do_map(vfio_container_fd,
> -                                       vaddr, iova, len, 1) < 0) {
> -                       RTE_LOG(ERR, EAL, "Could not map segment\n");
> +               if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova,
> len, 1)) {
> +                       RTE_LOG(ERR, EAL, "Failed to map DMA\n");
>                         ret = -1;
>                         goto out;
>                 }
> @@ -1613,6 +1649,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
>                 RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
>                 return -1;
>         }
> +       memcpy(&prev_create, &create, sizeof(create));
>
>         /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
>         if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
> @@ -1620,6 +1657,21 @@ vfio_spapr_dma_map(int vfio_container_fd)
>
>         return 0;
>  }
> +#else
> +static int
> +vfio_spapr_dma_mem_map(int __rte_unused vfio_container_fd,
> +                       uint64_t __rte_unused vaddr,
> +                       uint64_t __rte_unused iova, uint64_t __rte_unused
> len,
> +                       int __rte_unused do_map)
> +{
> +       return 0;
> +}
> +static int
> +vfio_spapr_dma_map(int __rte_unused vfio_container_fd)
> +{
> +       return 0;
> +}
> +#endif
>
>  static int
>  vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
> --
> 2.17.1
>
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v2] vfio: fix expanding DMA area in ppc64le
  2019-06-13  2:22 ` Takeshi Yoshimura
  2019-06-13 17:37   ` David Christensen
  2019-06-14  7:34   ` David Marchand
@ 2019-06-14  7:49   ` " Takeshi Yoshimura
  2019-07-13  1:15     ` [dpdk-dev] [PATCH v3] " Takeshi Yoshimura
  2019-06-18  2:37   ` [dpdk-dev] [PATCH] " Mo, YufengX
                     ` (3 subsequent siblings)
  6 siblings, 1 reply; 16+ messages in thread
From: Takeshi Yoshimura @ 2019-06-14  7:49 UTC (permalink / raw)
  To: dev; +Cc: drc, pradeep, david.marchand, Takeshi Yoshimura

In ppc64le, expanding DMA areas always fail because we cannot remove
a DMA window. As a result, we cannot allocate more than one memseg in
ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
the mapped DMA before removing the window. This patch fixes this
incorrect behavior.

I added a global variable to track current window size since we do
not have better ways to get exact size of it than doing so. sPAPR
IOMMU seems not to provide any ways to get window size with ioctl
interfaces. rte_memseg_walk*() is currently used to calculate window
size, but it walks memsegs that are marked as used, not mapped. So,
we need to determine if a given memseg is mapped or not, otherwise
the ioctl reports errors due to attempting to unregister memory
addresses that are not registered. The global variable is excluded
in non-ppc64le binaries.

Similar problems happen in user maps. We need to avoid attempting to
unmap the address that is given as the function's parameter. The
compaction of user maps prevents us from passing correct length for
unmapping DMA at the window recreation. So, I removed it in ppc64le.

I also fixed the order of ioctl for unregister and unmap. The ioctl
for unregister sometimes report device busy errors due to the
existence of mapped area.

Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
---
 lib/librte_eal/linux/eal/eal_vfio.c | 154 +++++++++++++++++++---------
 1 file changed, 103 insertions(+), 51 deletions(-)

diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
index 6892a2c14..5587854b8 100644
--- a/lib/librte_eal/linux/eal/eal_vfio.c
+++ b/lib/librte_eal/linux/eal/eal_vfio.c
@@ -93,6 +93,7 @@ is_null_map(const struct user_mem_map *map)
 	return map->addr == 0 && map->iova == 0 && map->len == 0;
 }
 
+#ifndef RTE_ARCH_PPC_64
 /* we may need to merge user mem maps together in case of user mapping/unmapping
  * chunks of memory, so we'll need a comparator function to sort segments.
  */
@@ -126,6 +127,7 @@ user_mem_map_cmp(const void *a, const void *b)
 
 	return 0;
 }
+#endif
 
 /* adjust user map entry. this may result in shortening of existing map, or in
  * splitting existing map in two pieces.
@@ -162,6 +164,7 @@ adjust_map(struct user_mem_map *src, struct user_mem_map *end,
 	}
 }
 
+#ifndef RTE_ARCH_PPC_64
 /* try merging two maps into one, return 1 if succeeded */
 static int
 merge_map(struct user_mem_map *left, struct user_mem_map *right)
@@ -177,6 +180,7 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right)
 
 	return 1;
 }
+#endif
 
 static struct user_mem_map *
 find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
@@ -211,6 +215,16 @@ find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
 	return NULL;
 }
 
+#ifdef RTE_ARCH_PPC_64
+/* Recreation of DMA window requires unregistering DMA memory.
+ * Compaction confuses the logic and causes false reports in the recreation.
+ * For now, we do not compact user maps in ppc64le.
+ */
+static void
+compact_user_maps(__rte_unused struct user_mem_maps *user_mem_maps)
+{
+}
+#else
 /* this will sort all user maps, and merge/compact any adjacent maps */
 static void
 compact_user_maps(struct user_mem_maps *user_mem_maps)
@@ -256,6 +270,7 @@ compact_user_maps(struct user_mem_maps *user_mem_maps)
 		user_mem_maps->n_maps = cur_idx;
 	}
 }
+#endif
 
 static int
 vfio_open_group_fd(int iommu_group_num)
@@ -1306,6 +1321,7 @@ vfio_type1_dma_map(int vfio_container_fd)
 	return rte_memseg_walk(type1_map, &vfio_container_fd);
 }
 
+#ifdef RTE_ARCH_PPC_64
 static int
 vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len, int do_map)
@@ -1357,14 +1373,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		}
 
 	} else {
-		ret = ioctl(vfio_container_fd,
-				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
-		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
-					errno, strerror(errno));
-			return -1;
-		}
-
 		memset(&dma_unmap, 0, sizeof(dma_unmap));
 		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
 		dma_unmap.size = len;
@@ -1377,24 +1385,50 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 					errno, strerror(errno));
 			return -1;
 		}
+
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
 	}
 
 	return 0;
 }
 
+struct spapr_remap_walk_param {
+	int vfio_container_fd;
+	uint64_t window_size;
+};
+
 static int
 vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
-	int *vfio_container_fd = arg;
+	struct spapr_remap_walk_param *p = arg;
 
-	if (msl->external)
+	if (msl->external || ms->iova >= p->window_size)
 		return 0;
 
-	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
 
+static int
+vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct spapr_remap_walk_param *p = arg;
+
+	if (msl->external || ms->iova >= p->window_size)
+		return 0;
+
+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
+			ms->len, 0);
+}
+
 struct spapr_walk_param {
 	uint64_t window_size;
 	uint64_t hugepage_sz;
@@ -1461,14 +1495,13 @@ vfio_spapr_create_new_dma_window(int vfio_container_fd,
 	return 0;
 }
 
+static struct vfio_iommu_spapr_tce_create prev_create;
+
 static int
 vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len, int do_map)
 {
-	struct spapr_walk_param param;
-	struct vfio_iommu_spapr_tce_create create = {
-		.argsz = sizeof(create),
-	};
+	struct vfio_iommu_spapr_tce_create create;
 	struct vfio_config *vfio_cfg;
 	struct user_mem_maps *user_mem_maps;
 	int i, ret = 0;
@@ -1482,43 +1515,59 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	user_mem_maps = &vfio_cfg->mem_maps;
 	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
-	/* check if window size needs to be adjusted */
-	memset(&param, 0, sizeof(param));
-
-	/* we're inside a callback so use thread-unsafe version */
-	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
-				&param) < 0) {
-		RTE_LOG(ERR, EAL, "Could not get window size\n");
-		ret = -1;
-		goto out;
-	}
+	memcpy(&create, &prev_create, sizeof(create));
 
 	/* also check user maps */
 	for (i = 0; i < user_mem_maps->n_maps; i++) {
-		uint64_t max = user_mem_maps->maps[i].iova +
-				user_mem_maps->maps[i].len;
-		create.window_size = RTE_MAX(create.window_size, max);
+		struct user_mem_map *map = &user_mem_maps->maps[i];
+
+		if (vaddr == map->addr && len == map->len)
+			continue;
+		create.window_size = RTE_MAX(create.window_size, map->iova + map->len);
 	}
 
 	/* sPAPR requires window size to be a power of 2 */
-	create.window_size = rte_align64pow2(param.window_size);
-	create.page_shift = __builtin_ctzll(param.hugepage_sz);
-	create.levels = 1;
+	create.window_size = rte_align64pow2(create.window_size);
 
 	if (do_map) {
-		void *addr;
 		/* re-create window and remap the entire memory */
-		if (iova > create.window_size) {
+		if (iova + len > create.window_size) {
+			struct spapr_remap_walk_param param = {
+				.vfio_container_fd = vfio_container_fd,
+			    .window_size = create.window_size,
+			};
+
+			/* we're inside a callback, so use thread-unsafe version
+			 */
+			rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
+					&param);
+			/* destruct all user maps */
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
+				struct user_mem_map *map =
+						&user_mem_maps->maps[i];
+				if (vaddr == map->addr && len == map->len)
+					continue;
+				if (vfio_spapr_dma_do_map(vfio_container_fd,
+						map->addr, map->iova, map->len,
+						0)) {
+					RTE_LOG(ERR, EAL, "Could not destruct user DMA maps\n");
+					ret = -1;
+					goto out;
+				}
+			}
+
+			create.window_size = rte_align64pow2(iova + len);
 			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
 					&create) < 0) {
 				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
 				ret = -1;
 				goto out;
 			}
+			memcpy(&prev_create, &create, sizeof(create));
 			/* we're inside a callback, so use thread-unsafe version
 			 */
 			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
-					&vfio_container_fd) < 0) {
+					&param) < 0) {
 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
 				ret = -1;
 				goto out;
@@ -1527,6 +1576,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 			for (i = 0; i < user_mem_maps->n_maps; i++) {
 				struct user_mem_map *map =
 						&user_mem_maps->maps[i];
+				if (vaddr == map->addr && len == map->len)
+					continue;
 				if (vfio_spapr_dma_do_map(vfio_container_fd,
 						map->addr, map->iova, map->len,
 						1)) {
@@ -1536,23 +1587,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				}
 			}
 		}
-
-		/* now that we've remapped all of the memory that was present
-		 * before, map the segment that we were requested to map.
-		 *
-		 * however, if we were called by the callback, the memory we
-		 * were called with was already in the memseg list, so previous
-		 * mapping should've mapped that segment already.
-		 *
-		 * virt2memseg_list is a relatively cheap check, so use that. if
-		 * memory is within any memseg list, it's a memseg, so it's
-		 * already mapped.
-		 */
-		addr = (void *)(uintptr_t)vaddr;
-		if (rte_mem_virt2memseg_list(addr) == NULL &&
-				vfio_spapr_dma_do_map(vfio_container_fd,
-					vaddr, iova, len, 1) < 0) {
-			RTE_LOG(ERR, EAL, "Could not map segment\n");
+		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
+			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
 			ret = -1;
 			goto out;
 		}
@@ -1593,6 +1629,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
 		RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
 		return -1;
 	}
+	memcpy(&prev_create, &create, sizeof(create));
 
 	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
 	if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
@@ -1600,6 +1637,21 @@ vfio_spapr_dma_map(int vfio_container_fd)
 
 	return 0;
 }
+#else
+static int
+vfio_spapr_dma_mem_map(int __rte_unused vfio_container_fd,
+			uint64_t __rte_unused vaddr,
+			uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			int __rte_unused do_map)
+{
+	return 0;
+}
+static int
+vfio_spapr_dma_map(int __rte_unused vfio_container_fd)
+{
+	return 0;
+}
+#endif
 
 static int
 vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
-- 
2.17.2 (Apple Git-113)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev]  [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-13  2:22 ` Takeshi Yoshimura
                     ` (2 preceding siblings ...)
  2019-06-14  7:49   ` [dpdk-dev] [PATCH v2] " Takeshi Yoshimura
@ 2019-06-18  2:37   ` " Mo, YufengX
  2019-06-18  2:39   ` Mo, YufengX
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: Mo, YufengX @ 2019-06-18  2:37 UTC (permalink / raw)
  To: yufengx.mo, dev; +Cc: drc, pradeep, Takeshi Yoshimura

From: Takeshi Yoshimura <tyos@jp.ibm.com>

In ppc64le, expanding DMA areas always fail because we cannot remove
a DMA window. As a result, we cannot allocate more than one memseg in
ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
the mapped DMA before removing the window. This patch fixes this
incorrect behavior.

I added a global variable to track current window size since we do
not have better ways to get exact size of it than doing so. sPAPR
IOMMU seems not to provide any ways to get window size with ioctl
interfaces. rte_memseg_walk*() is currently used to calculate window
size, but it walks memsegs that are marked as used, not mapped. So,
we need to determine if a given memseg is mapped or not, otherwise
the ioctl reports errors due to attempting to unregister memory
addresses that are not registered. The global variable is excluded
in non-ppc64le binaries.

Similar problems happen in user maps. We need to avoid attempting to
unmap the address that is given as the function's parameter. The
compaction of user maps prevents us from passing correct length for
unmapping DMA at the window recreation. So, I removed it in ppc64le.

I also fixed the order of ioctl for unregister and unmap. The ioctl
for unregister sometimes report device busy errors due to the
existence of mapped area.

Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
---
 lib/librte_eal/linux/eal/eal_vfio.c | 154 +++++++++++++++++++---------
 1 file changed, 103 insertions(+), 51 deletions(-)

diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
index f16c5c3c0..c1b275b56 100644
--- a/lib/librte_eal/linux/eal/eal_vfio.c
+++ b/lib/librte_eal/linux/eal/eal_vfio.c
@@ -93,6 +93,7 @@ is_null_map(const struct user_mem_map *map)
 	return map->addr == 0 && map->iova == 0 && map->len == 0;
 }
 
+#ifndef RTE_ARCH_PPC_64
 /* we may need to merge user mem maps together in case of user mapping/unmapping
  * chunks of memory, so we'll need a comparator function to sort segments.
  */
@@ -126,6 +127,7 @@ user_mem_map_cmp(const void *a, const void *b)
 
 	return 0;
 }
+#endif
 
 /* adjust user map entry. this may result in shortening of existing map, or in
  * splitting existing map in two pieces.
@@ -162,6 +164,7 @@ adjust_map(struct user_mem_map *src, struct user_mem_map *end,
 	}
 }
 
+#ifndef RTE_ARCH_PPC_64
 /* try merging two maps into one, return 1 if succeeded */
 static int
 merge_map(struct user_mem_map *left, struct user_mem_map *right)
@@ -177,6 +180,7 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right)
 
 	return 1;
 }
+#endif
 
 static struct user_mem_map *
 find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
@@ -211,6 +215,16 @@ find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
 	return NULL;
 }
 
+#ifdef RTE_ARCH_PPC_64
+/* Recreation of DMA window requires unregistering DMA memory.
+ * Compaction confuses the logic and causes false reports in the recreation.
+ * For now, we do not compact user maps in ppc64le.
+ */
+static void
+compact_user_maps(__rte_unused struct user_mem_maps *user_mem_maps)
+{
+}
+#else
 /* this will sort all user maps, and merge/compact any adjacent maps */
 static void
 compact_user_maps(struct user_mem_maps *user_mem_maps)
@@ -256,6 +270,7 @@ compact_user_maps(struct user_mem_maps *user_mem_maps)
 		user_mem_maps->n_maps = cur_idx;
 	}
 }
+#endif
 
 static int
 vfio_open_group_fd(int iommu_group_num)
@@ -1306,6 +1321,7 @@ vfio_type1_dma_map(int vfio_container_fd)
 	return rte_memseg_walk(type1_map, &vfio_container_fd);
 }
 
+#ifdef RTE_ARCH_PPC_64
 static int
 vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len, int do_map)
@@ -1357,14 +1373,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		}
 
 	} else {
-		ret = ioctl(vfio_container_fd,
-				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
-		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
-					errno, strerror(errno));
-			return -1;
-		}
-
 		memset(&dma_unmap, 0, sizeof(dma_unmap));
 		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
 		dma_unmap.size = len;
@@ -1377,24 +1385,50 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 					errno, strerror(errno));
 			return -1;
 		}
+
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
 	}
 
 	return 0;
 }
 
+struct spapr_remap_walk_param {
+	int vfio_container_fd;
+	uint64_t window_size;
+};
+
 static int
 vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
-	int *vfio_container_fd = arg;
+	struct spapr_remap_walk_param *p = arg;
 
-	if (msl->external)
+	if (msl->external || ms->iova >= p->window_size)
 		return 0;
 
-	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
 
+static int
+vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct spapr_remap_walk_param *p = arg;
+
+	if (msl->external || ms->iova >= p->window_size)
+		return 0;
+
+	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
+			ms->len, 0);
+}
+
 struct spapr_walk_param {
 	uint64_t window_size;
 	uint64_t hugepage_sz;
@@ -1481,14 +1515,13 @@ vfio_spapr_create_new_dma_window(int vfio_container_fd,
 	return 0;
 }
 
+static struct vfio_iommu_spapr_tce_create prev_create;
+
 static int
 vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		uint64_t len, int do_map)
 {
-	struct spapr_walk_param param;
-	struct vfio_iommu_spapr_tce_create create = {
-		.argsz = sizeof(create),
-	};
+	struct vfio_iommu_spapr_tce_create create;
 	struct vfio_config *vfio_cfg;
 	struct user_mem_maps *user_mem_maps;
 	int i, ret = 0;
@@ -1502,43 +1535,59 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	user_mem_maps = &vfio_cfg->mem_maps;
 	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
-	/* check if window size needs to be adjusted */
-	memset(&param, 0, sizeof(param));
-
-	/* we're inside a callback so use thread-unsafe version */
-	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
-				&param) < 0) {
-		RTE_LOG(ERR, EAL, "Could not get window size\n");
-		ret = -1;
-		goto out;
-	}
+	memcpy(&create, &prev_create, sizeof(create));
 
 	/* also check user maps */
 	for (i = 0; i < user_mem_maps->n_maps; i++) {
-		uint64_t max = user_mem_maps->maps[i].iova +
-				user_mem_maps->maps[i].len;
-		create.window_size = RTE_MAX(create.window_size, max);
+		struct user_mem_map *map = &user_mem_maps->maps[i];
+
+		if (vaddr == map->addr && len == map->len)
+			continue;
+		create.window_size = RTE_MAX(create.window_size, map->iova + map->len);
 	}
 
 	/* sPAPR requires window size to be a power of 2 */
-	create.window_size = rte_align64pow2(param.window_size);
-	create.page_shift = __builtin_ctzll(param.hugepage_sz);
-	create.levels = 1;
+	create.window_size = rte_align64pow2(create.window_size);
 
 	if (do_map) {
-		void *addr;
 		/* re-create window and remap the entire memory */
-		if (iova > create.window_size) {
+		if (iova + len > create.window_size) {
+			struct spapr_remap_walk_param param = {
+				.vfio_container_fd = vfio_container_fd,
+			    .window_size = create.window_size,
+			};
+
+			/* we're inside a callback, so use thread-unsafe version
+			 */
+			rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
+					&param);
+			/* destruct all user maps */
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
+				struct user_mem_map *map =
+						&user_mem_maps->maps[i];
+				if (vaddr == map->addr && len == map->len)
+					continue;
+				if (vfio_spapr_dma_do_map(vfio_container_fd,
+						map->addr, map->iova, map->len,
+						0)) {
+					RTE_LOG(ERR, EAL, "Could not destruct user DMA maps\n");
+					ret = -1;
+					goto out;
+				}
+			}
+
+			create.window_size = rte_align64pow2(iova + len);
 			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
 					&create) < 0) {
 				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
 				ret = -1;
 				goto out;
 			}
+			memcpy(&prev_create, &create, sizeof(create));
 			/* we're inside a callback, so use thread-unsafe version
 			 */
 			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
-					&vfio_container_fd) < 0) {
+					&param) < 0) {
 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
 				ret = -1;
 				goto out;
@@ -1547,6 +1596,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 			for (i = 0; i < user_mem_maps->n_maps; i++) {
 				struct user_mem_map *map =
 						&user_mem_maps->maps[i];
+				if (vaddr == map->addr && len == map->len)
+					continue;
 				if (vfio_spapr_dma_do_map(vfio_container_fd,
 						map->addr, map->iova, map->len,
 						1)) {
@@ -1556,23 +1607,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				}
 			}
 		}
-
-		/* now that we've remapped all of the memory that was present
-		 * before, map the segment that we were requested to map.
-		 *
-		 * however, if we were called by the callback, the memory we
-		 * were called with was already in the memseg list, so previous
-		 * mapping should've mapped that segment already.
-		 *
-		 * virt2memseg_list is a relatively cheap check, so use that. if
-		 * memory is within any memseg list, it's a memseg, so it's
-		 * already mapped.
-		 */
-		addr = (void *)(uintptr_t)vaddr;
-		if (rte_mem_virt2memseg_list(addr) == NULL &&
-				vfio_spapr_dma_do_map(vfio_container_fd,
-					vaddr, iova, len, 1) < 0) {
-			RTE_LOG(ERR, EAL, "Could not map segment\n");
+		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
+			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
 			ret = -1;
 			goto out;
 		}
@@ -1613,6 +1649,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
 		RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
 		return -1;
 	}
+	memcpy(&prev_create, &create, sizeof(create));
 
 	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
 	if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
@@ -1620,6 +1657,21 @@ vfio_spapr_dma_map(int vfio_container_fd)
 
 	return 0;
 }
+#else
+static int
+vfio_spapr_dma_mem_map(int __rte_unused vfio_container_fd,
+			uint64_t __rte_unused vaddr,
+			uint64_t __rte_unused iova, uint64_t __rte_unused len,
+			int __rte_unused do_map)
+{
+	return 0;
+}
+static int
+vfio_spapr_dma_map(int __rte_unused vfio_container_fd)
+{
+	return 0;
+}
+#endif
 
 static int
 vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-13  2:22 ` Takeshi Yoshimura
                     ` (3 preceding siblings ...)
  2019-06-18  2:37   ` [dpdk-dev] [PATCH] " Mo, YufengX
@ 2019-06-18  2:39   ` Mo, YufengX
  2019-06-26  9:43   ` Burakov, Anatoly
  2019-06-28 11:38   ` Takeshi T Yoshimura
  6 siblings, 0 replies; 16+ messages in thread
From: Mo, YufengX @ 2019-06-18  2:39 UTC (permalink / raw)
  To: dev; +Cc: drc, pradeep, Takeshi Yoshimura

Sorry, missed send this mail. Ignore it pls.


> -----Original Message-----
> From: Mo, YufengX
> Sent: Tuesday, June 18, 2019 10:38 AM
> To: Mo, YufengX <yufengx.mo@intel.com>; dev@dpdk.org
> Cc: drc@ibm.com; pradeep@us.ibm.com; Takeshi Yoshimura <tyos@jp.ibm.com>
> Subject: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
> 
> From: Takeshi Yoshimura <tyos@jp.ibm.com>
> 
> In ppc64le, expanding DMA areas always fail because we cannot remove
> a DMA window. As a result, we cannot allocate more than one memseg in
> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> the mapped DMA before removing the window. This patch fixes this
> incorrect behavior.
> 
> I added a global variable to track current window size since we do
> not have better ways to get exact size of it than doing so. sPAPR
> IOMMU seems not to provide any ways to get window size with ioctl
> interfaces. rte_memseg_walk*() is currently used to calculate window
> size, but it walks memsegs that are marked as used, not mapped. So,
> we need to determine if a given memseg is mapped or not, otherwise
> the ioctl reports errors due to attempting to unregister memory
> addresses that are not registered. The global variable is excluded
> in non-ppc64le binaries.
> 
> Similar problems happen in user maps. We need to avoid attempting to
> unmap the address that is given as the function's parameter. The
> compaction of user maps prevents us from passing correct length for
> unmapping DMA at the window recreation. So, I removed it in ppc64le.
> 
> I also fixed the order of ioctl for unregister and unmap. The ioctl
> for unregister sometimes report device busy errors due to the
> existence of mapped area.
> 
> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> ---
>  lib/librte_eal/linux/eal/eal_vfio.c | 154 +++++++++++++++++++---------
>  1 file changed, 103 insertions(+), 51 deletions(-)
> 
> diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
> index f16c5c3c0..c1b275b56 100644
> --- a/lib/librte_eal/linux/eal/eal_vfio.c
> +++ b/lib/librte_eal/linux/eal/eal_vfio.c
> @@ -93,6 +93,7 @@ is_null_map(const struct user_mem_map *map)
>  	return map->addr == 0 && map->iova == 0 && map->len == 0;
>  }
> 
> +#ifndef RTE_ARCH_PPC_64
>  /* we may need to merge user mem maps together in case of user mapping/unmapping
>   * chunks of memory, so we'll need a comparator function to sort segments.
>   */
> @@ -126,6 +127,7 @@ user_mem_map_cmp(const void *a, const void *b)
> 
>  	return 0;
>  }
> +#endif
> 
>  /* adjust user map entry. this may result in shortening of existing map, or in
>   * splitting existing map in two pieces.
> @@ -162,6 +164,7 @@ adjust_map(struct user_mem_map *src, struct user_mem_map *end,
>  	}
>  }
> 
> +#ifndef RTE_ARCH_PPC_64
>  /* try merging two maps into one, return 1 if succeeded */
>  static int
>  merge_map(struct user_mem_map *left, struct user_mem_map *right)
> @@ -177,6 +180,7 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right)
> 
>  	return 1;
>  }
> +#endif
> 
>  static struct user_mem_map *
>  find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
> @@ -211,6 +215,16 @@ find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
>  	return NULL;
>  }
> 
> +#ifdef RTE_ARCH_PPC_64
> +/* Recreation of DMA window requires unregistering DMA memory.
> + * Compaction confuses the logic and causes false reports in the recreation.
> + * For now, we do not compact user maps in ppc64le.
> + */
> +static void
> +compact_user_maps(__rte_unused struct user_mem_maps *user_mem_maps)
> +{
> +}
> +#else
>  /* this will sort all user maps, and merge/compact any adjacent maps */
>  static void
>  compact_user_maps(struct user_mem_maps *user_mem_maps)
> @@ -256,6 +270,7 @@ compact_user_maps(struct user_mem_maps *user_mem_maps)
>  		user_mem_maps->n_maps = cur_idx;
>  	}
>  }
> +#endif
> 
>  static int
>  vfio_open_group_fd(int iommu_group_num)
> @@ -1306,6 +1321,7 @@ vfio_type1_dma_map(int vfio_container_fd)
>  	return rte_memseg_walk(type1_map, &vfio_container_fd);
>  }
> 
> +#ifdef RTE_ARCH_PPC_64
>  static int
>  vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>  		uint64_t len, int do_map)
> @@ -1357,14 +1373,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>  		}
> 
>  	} else {
> -		ret = ioctl(vfio_container_fd,
> -				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
> -		if (ret) {
> -			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
> -					errno, strerror(errno));
> -			return -1;
> -		}
> -
>  		memset(&dma_unmap, 0, sizeof(dma_unmap));
>  		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
>  		dma_unmap.size = len;
> @@ -1377,24 +1385,50 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>  					errno, strerror(errno));
>  			return -1;
>  		}
> +
> +		ret = ioctl(vfio_container_fd,
> +				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
> +		if (ret) {
> +			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
> +					errno, strerror(errno));
> +			return -1;
> +		}
>  	}
> 
>  	return 0;
>  }
> 
> +struct spapr_remap_walk_param {
> +	int vfio_container_fd;
> +	uint64_t window_size;
> +};
> +
>  static int
>  vfio_spapr_map_walk(const struct rte_memseg_list *msl,
>  		const struct rte_memseg *ms, void *arg)
>  {
> -	int *vfio_container_fd = arg;
> +	struct spapr_remap_walk_param *p = arg;
> 
> -	if (msl->external)
> +	if (msl->external || ms->iova >= p->window_size)
>  		return 0;
> 
> -	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
> +	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
>  			ms->len, 1);
>  }
> 
> +static int
> +vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
> +		const struct rte_memseg *ms, void *arg)
> +{
> +	struct spapr_remap_walk_param *p = arg;
> +
> +	if (msl->external || ms->iova >= p->window_size)
> +		return 0;
> +
> +	return vfio_spapr_dma_do_map(p->vfio_container_fd, ms->addr_64, ms->iova,
> +			ms->len, 0);
> +}
> +
>  struct spapr_walk_param {
>  	uint64_t window_size;
>  	uint64_t hugepage_sz;
> @@ -1481,14 +1515,13 @@ vfio_spapr_create_new_dma_window(int vfio_container_fd,
>  	return 0;
>  }
> 
> +static struct vfio_iommu_spapr_tce_create prev_create;
> +
>  static int
>  vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>  		uint64_t len, int do_map)
>  {
> -	struct spapr_walk_param param;
> -	struct vfio_iommu_spapr_tce_create create = {
> -		.argsz = sizeof(create),
> -	};
> +	struct vfio_iommu_spapr_tce_create create;
>  	struct vfio_config *vfio_cfg;
>  	struct user_mem_maps *user_mem_maps;
>  	int i, ret = 0;
> @@ -1502,43 +1535,59 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>  	user_mem_maps = &vfio_cfg->mem_maps;
>  	rte_spinlock_recursive_lock(&user_mem_maps->lock);
> 
> -	/* check if window size needs to be adjusted */
> -	memset(&param, 0, sizeof(param));
> -
> -	/* we're inside a callback so use thread-unsafe version */
> -	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
> -				&param) < 0) {
> -		RTE_LOG(ERR, EAL, "Could not get window size\n");
> -		ret = -1;
> -		goto out;
> -	}
> +	memcpy(&create, &prev_create, sizeof(create));
> 
>  	/* also check user maps */
>  	for (i = 0; i < user_mem_maps->n_maps; i++) {
> -		uint64_t max = user_mem_maps->maps[i].iova +
> -				user_mem_maps->maps[i].len;
> -		create.window_size = RTE_MAX(create.window_size, max);
> +		struct user_mem_map *map = &user_mem_maps->maps[i];
> +
> +		if (vaddr == map->addr && len == map->len)
> +			continue;
> +		create.window_size = RTE_MAX(create.window_size, map->iova + map->len);
>  	}
> 
>  	/* sPAPR requires window size to be a power of 2 */
> -	create.window_size = rte_align64pow2(param.window_size);
> -	create.page_shift = __builtin_ctzll(param.hugepage_sz);
> -	create.levels = 1;
> +	create.window_size = rte_align64pow2(create.window_size);
> 
>  	if (do_map) {
> -		void *addr;
>  		/* re-create window and remap the entire memory */
> -		if (iova > create.window_size) {
> +		if (iova + len > create.window_size) {
> +			struct spapr_remap_walk_param param = {
> +				.vfio_container_fd = vfio_container_fd,
> +			    .window_size = create.window_size,
> +			};
> +
> +			/* we're inside a callback, so use thread-unsafe version
> +			 */
> +			rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
> +					&param);
> +			/* destruct all user maps */
> +			for (i = 0; i < user_mem_maps->n_maps; i++) {
> +				struct user_mem_map *map =
> +						&user_mem_maps->maps[i];
> +				if (vaddr == map->addr && len == map->len)
> +					continue;
> +				if (vfio_spapr_dma_do_map(vfio_container_fd,
> +						map->addr, map->iova, map->len,
> +						0)) {
> +					RTE_LOG(ERR, EAL, "Could not destruct user DMA maps\n");
> +					ret = -1;
> +					goto out;
> +				}
> +			}
> +
> +			create.window_size = rte_align64pow2(iova + len);
>  			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
>  					&create) < 0) {
>  				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
>  				ret = -1;
>  				goto out;
>  			}
> +			memcpy(&prev_create, &create, sizeof(create));
>  			/* we're inside a callback, so use thread-unsafe version
>  			 */
>  			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
> -					&vfio_container_fd) < 0) {
> +					&param) < 0) {
>  				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
>  				ret = -1;
>  				goto out;
> @@ -1547,6 +1596,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>  			for (i = 0; i < user_mem_maps->n_maps; i++) {
>  				struct user_mem_map *map =
>  						&user_mem_maps->maps[i];
> +				if (vaddr == map->addr && len == map->len)
> +					continue;
>  				if (vfio_spapr_dma_do_map(vfio_container_fd,
>  						map->addr, map->iova, map->len,
>  						1)) {
> @@ -1556,23 +1607,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>  				}
>  			}
>  		}
> -
> -		/* now that we've remapped all of the memory that was present
> -		 * before, map the segment that we were requested to map.
> -		 *
> -		 * however, if we were called by the callback, the memory we
> -		 * were called with was already in the memseg list, so previous
> -		 * mapping should've mapped that segment already.
> -		 *
> -		 * virt2memseg_list is a relatively cheap check, so use that. if
> -		 * memory is within any memseg list, it's a memseg, so it's
> -		 * already mapped.
> -		 */
> -		addr = (void *)(uintptr_t)vaddr;
> -		if (rte_mem_virt2memseg_list(addr) == NULL &&
> -				vfio_spapr_dma_do_map(vfio_container_fd,
> -					vaddr, iova, len, 1) < 0) {
> -			RTE_LOG(ERR, EAL, "Could not map segment\n");
> +		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
> +			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
>  			ret = -1;
>  			goto out;
>  		}
> @@ -1613,6 +1649,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
>  		RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
>  		return -1;
>  	}
> +	memcpy(&prev_create, &create, sizeof(create));
> 
>  	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
>  	if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
> @@ -1620,6 +1657,21 @@ vfio_spapr_dma_map(int vfio_container_fd)
> 
>  	return 0;
>  }
> +#else
> +static int
> +vfio_spapr_dma_mem_map(int __rte_unused vfio_container_fd,
> +			uint64_t __rte_unused vaddr,
> +			uint64_t __rte_unused iova, uint64_t __rte_unused len,
> +			int __rte_unused do_map)
> +{
> +	return 0;
> +}
> +static int
> +vfio_spapr_dma_map(int __rte_unused vfio_container_fd)
> +{
> +	return 0;
> +}
> +#endif
> 
>  static int
>  vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-13  2:22 ` Takeshi Yoshimura
                     ` (4 preceding siblings ...)
  2019-06-18  2:39   ` Mo, YufengX
@ 2019-06-26  9:43   ` Burakov, Anatoly
  2019-06-28 11:38   ` Takeshi T Yoshimura
  6 siblings, 0 replies; 16+ messages in thread
From: Burakov, Anatoly @ 2019-06-26  9:43 UTC (permalink / raw)
  To: Mo, YufengX, dev; +Cc: drc, pradeep, Takeshi Yoshimura

On 18-Jun-19 3:37 AM, Mo, YufengX wrote:
> From: Takeshi Yoshimura <tyos@jp.ibm.com>
> 
> In ppc64le, expanding DMA areas always fail because we cannot remove
> a DMA window. As a result, we cannot allocate more than one memseg in
> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> the mapped DMA before removing the window. This patch fixes this
> incorrect behavior.
> 
> I added a global variable to track current window size since we do
> not have better ways to get exact size of it than doing so. sPAPR
> IOMMU seems not to provide any ways to get window size with ioctl
> interfaces. rte_memseg_walk*() is currently used to calculate window
> size, but it walks memsegs that are marked as used, not mapped. So,
> we need to determine if a given memseg is mapped or not, otherwise
> the ioctl reports errors due to attempting to unregister memory
> addresses that are not registered. The global variable is excluded
> in non-ppc64le binaries.
> 
> Similar problems happen in user maps. We need to avoid attempting to
> unmap the address that is given as the function's parameter. The
> compaction of user maps prevents us from passing correct length for
> unmapping DMA at the window recreation. So, I removed it in ppc64le.
> 
> I also fixed the order of ioctl for unregister and unmap. The ioctl
> for unregister sometimes report device busy errors due to the
> existence of mapped area.
> 
> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> ---

OK there are three patches, and two v1's with two different authors in 
reply to the same original patch. There's too much going on here, i 
can't review this. Needs splitting.

Also, #ifdef-ing out the map merging seems highly suspect.

With regards to "walking used memsegs, not mapped", unless i'm 
misunderstanding something, these are the same - whenever a segment is 
mapped, it is marked as used, and whenever it is unmapped, it is marked 
as free. Could you please explain what is the difference and why is this 
needed?

Is the point of contention here being the fact that whenever the unmap 
callback arrives, the segments still appear used when iterating over the 
map? If that's the case, then i think it would be OK to mark them as 
unused *before* triggering callbacks, and chances are some of this code 
wouldn't be needed. That would require a deprecation notice though, 
because the API behavior will change (even if this fact is not 
documented properly).

-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-13  2:22 ` Takeshi Yoshimura
                     ` (5 preceding siblings ...)
  2019-06-26  9:43   ` Burakov, Anatoly
@ 2019-06-28 11:38   ` Takeshi T Yoshimura
  2019-06-28 13:47     ` Burakov, Anatoly
  6 siblings, 1 reply; 16+ messages in thread
From: Takeshi T Yoshimura @ 2019-06-28 11:38 UTC (permalink / raw)
  To: Burakov, Anatoly
  Cc: Mo, YufengX, dev, David Christensen, Pradeep Satyanarayana

>To: "Mo, YufengX" <yufengx.mo@intel.com>, dev@dpdk.org
>From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
>Date: 06/26/2019 06:43PM
>Cc: drc@ibm.com, pradeep@us.ibm.com, Takeshi Yoshimura
><tyos@jp.ibm.com>
>Subject: [EXTERNAL] Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA
>area in ppc64le
>
>On 18-Jun-19 3:37 AM, Mo, YufengX wrote:
>> From: Takeshi Yoshimura <tyos@jp.ibm.com>
>> 
>> In ppc64le, expanding DMA areas always fail because we cannot
>remove
>> a DMA window. As a result, we cannot allocate more than one memseg
>in
>> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
>> the mapped DMA before removing the window. This patch fixes this
>> incorrect behavior.
>> 
>> I added a global variable to track current window size since we do
>> not have better ways to get exact size of it than doing so. sPAPR
>> IOMMU seems not to provide any ways to get window size with ioctl
>> interfaces. rte_memseg_walk*() is currently used to calculate
>window
>> size, but it walks memsegs that are marked as used, not mapped. So,
>> we need to determine if a given memseg is mapped or not, otherwise
>> the ioctl reports errors due to attempting to unregister memory
>> addresses that are not registered. The global variable is excluded
>> in non-ppc64le binaries.
>> 
>> Similar problems happen in user maps. We need to avoid attempting
>to
>> unmap the address that is given as the function's parameter. The
>> compaction of user maps prevents us from passing correct length for
>> unmapping DMA at the window recreation. So, I removed it in
>ppc64le.
>> 
>> I also fixed the order of ioctl for unregister and unmap. The ioctl
>> for unregister sometimes report device busy errors due to the
>> existence of mapped area.
>> 
>> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
>> ---
>
>OK there are three patches, and two v1's with two different authors
>in 
>reply to the same original patch. There's too much going on here, i 
>can't review this. Needs splitting.
>
>Also, #ifdef-ing out the map merging seems highly suspect.
>
>With regards to "walking used memsegs, not mapped", unless i'm 
>misunderstanding something, these are the same - whenever a segment
>is 
>mapped, it is marked as used, and whenever it is unmapped, it is
>marked 
>as free. Could you please explain what is the difference and why is
>this 
>needed?
>
>Is the point of contention here being the fact that whenever the
>unmap 
>callback arrives, the segments still appear used when iterating over
>the 
>map? If that's the case, then i think it would be OK to mark them as 
>unused *before* triggering callbacks, and chances are some of this
>code 
>wouldn't be needed. That would require a deprecation notice though, 
>because the API behavior will change (even if this fact is not 
>documented properly).
>
>-- 
>Thanks,
>Anatoly
>
>

I am the author of this patch. We should ignore a patch from YufengX Mo.

From my code reading, a memsg is at first marked as used when it is allocated. Then, the memseg is passed to vfio_spapr_dma_mem_map(). The callback iterates all the used (i.e., allocated) memsegs and call ioctl for mapping VA to IOVA. So, when vfio_spapr_dma_mem_map() is called, passed memsegs can be non-mapped but marked as used. As a result, an attempt to unmap non-mapped area happens during DMA window expansion. This is the difference and why this fix was needed.

> i think it would be OK to mark them as unused *before* triggering callbacks

Yes, my first idea was the same as yours, but I was also worried that it might cause inconsistent API behavior as you also pointed out. If you think so, I think I can rewrite the patch without ugly #ifdef. 

Unfortunately, I don't have enough time for writing code next week and next next week. So, I will resubmit the revised patch weeks later.

Regards,
Takeshi


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-28 11:38   ` Takeshi T Yoshimura
@ 2019-06-28 13:47     ` Burakov, Anatoly
  2019-06-28 14:04       ` Burakov, Anatoly
  0 siblings, 1 reply; 16+ messages in thread
From: Burakov, Anatoly @ 2019-06-28 13:47 UTC (permalink / raw)
  To: Takeshi T Yoshimura
  Cc: Mo, YufengX, dev, David Christensen, Pradeep Satyanarayana

On 28-Jun-19 12:38 PM, Takeshi T Yoshimura wrote:
>> To: "Mo, YufengX" <yufengx.mo@intel.com>, dev@dpdk.org
>> From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
>> Date: 06/26/2019 06:43PM
>> Cc: drc@ibm.com, pradeep@us.ibm.com, Takeshi Yoshimura
>> <tyos@jp.ibm.com>
>> Subject: [EXTERNAL] Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA
>> area in ppc64le
>>
>> On 18-Jun-19 3:37 AM, Mo, YufengX wrote:
>>> From: Takeshi Yoshimura <tyos@jp.ibm.com>
>>>
>>> In ppc64le, expanding DMA areas always fail because we cannot
>> remove
>>> a DMA window. As a result, we cannot allocate more than one memseg
>> in
>>> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
>>> the mapped DMA before removing the window. This patch fixes this
>>> incorrect behavior.
>>>
>>> I added a global variable to track current window size since we do
>>> not have better ways to get exact size of it than doing so. sPAPR
>>> IOMMU seems not to provide any ways to get window size with ioctl
>>> interfaces. rte_memseg_walk*() is currently used to calculate
>> window
>>> size, but it walks memsegs that are marked as used, not mapped. So,
>>> we need to determine if a given memseg is mapped or not, otherwise
>>> the ioctl reports errors due to attempting to unregister memory
>>> addresses that are not registered. The global variable is excluded
>>> in non-ppc64le binaries.
>>>
>>> Similar problems happen in user maps. We need to avoid attempting
>> to
>>> unmap the address that is given as the function's parameter. The
>>> compaction of user maps prevents us from passing correct length for
>>> unmapping DMA at the window recreation. So, I removed it in
>> ppc64le.
>>>
>>> I also fixed the order of ioctl for unregister and unmap. The ioctl
>>> for unregister sometimes report device busy errors due to the
>>> existence of mapped area.
>>>
>>> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
>>> ---
>>
>> OK there are three patches, and two v1's with two different authors
>> in
>> reply to the same original patch. There's too much going on here, i
>> can't review this. Needs splitting.
>>
>> Also, #ifdef-ing out the map merging seems highly suspect.
>>
>> With regards to "walking used memsegs, not mapped", unless i'm
>> misunderstanding something, these are the same - whenever a segment
>> is
>> mapped, it is marked as used, and whenever it is unmapped, it is
>> marked
>> as free. Could you please explain what is the difference and why is
>> this
>> needed?
>>
>> Is the point of contention here being the fact that whenever the
>> unmap
>> callback arrives, the segments still appear used when iterating over
>> the
>> map? If that's the case, then i think it would be OK to mark them as
>> unused *before* triggering callbacks, and chances are some of this
>> code
>> wouldn't be needed. That would require a deprecation notice though,
>> because the API behavior will change (even if this fact is not
>> documented properly).
>>
>> -- 
>> Thanks,
>> Anatoly
>>
>>
> 
> I am the author of this patch. We should ignore a patch from YufengX Mo.
> 
>>From my code reading, a memsg is at first marked as used when it is allocated. Then, the memseg is passed to vfio_spapr_dma_mem_map(). The callback iterates all the used (i.e., allocated) memsegs and call ioctl for mapping VA to IOVA. So, when vfio_spapr_dma_mem_map() is called, passed memsegs can be non-mapped but marked as used. As a result, an attempt to unmap non-mapped area happens during DMA window expansion. This is the difference and why this fix was needed.
> 
>> i think it would be OK to mark them as unused *before* triggering callbacks
> 
> Yes, my first idea was the same as yours, but I was also worried that it might cause inconsistent API behavior as you also pointed out. If you think so, I think I can rewrite the patch without ugly #ifdef.
> 
> Unfortunately, I don't have enough time for writing code next week and next next week. So, I will resubmit the revised patch weeks later.

I think the approach with fixing the mem callbacks to report the 
unmapped segments as no longer used would be better.

As far as i can remember at the point where callbacks are triggered, the 
memory is already removed from malloc heap and from all processes. Each 
secondary also stores their own shadow copy of the memory map, so 
removing the "used" flags from the main map will not have any 
consequences as far as correctness is concerned. Each callback is also 
getting the memory area being removed as parameters, so if there is code 
that needs to be run taking into account that memory area, it can be done.

Existing code may rely on this behavior (even though it doesn't make 
much sense now that i think of it), so going with this approach *will* 
require a deprecation notice and can only be done in the next release.

> 
> Regards,
> Takeshi
> 
> 


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le
  2019-06-28 13:47     ` Burakov, Anatoly
@ 2019-06-28 14:04       ` Burakov, Anatoly
  0 siblings, 0 replies; 16+ messages in thread
From: Burakov, Anatoly @ 2019-06-28 14:04 UTC (permalink / raw)
  To: Takeshi T Yoshimura
  Cc: Mo, YufengX, dev, David Christensen, Pradeep Satyanarayana

On 28-Jun-19 2:47 PM, Burakov, Anatoly wrote:
> On 28-Jun-19 12:38 PM, Takeshi T Yoshimura wrote:
>>> To: "Mo, YufengX" <yufengx.mo@intel.com>, dev@dpdk.org
>>> From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
>>> Date: 06/26/2019 06:43PM
>>> Cc: drc@ibm.com, pradeep@us.ibm.com, Takeshi Yoshimura
>>> <tyos@jp.ibm.com>
>>> Subject: [EXTERNAL] Re: [dpdk-dev] [PATCH] vfio: fix expanding DMA
>>> area in ppc64le
>>>
>>> On 18-Jun-19 3:37 AM, Mo, YufengX wrote:
>>>> From: Takeshi Yoshimura <tyos@jp.ibm.com>
>>>>
>>>> In ppc64le, expanding DMA areas always fail because we cannot
>>> remove
>>>> a DMA window. As a result, we cannot allocate more than one memseg
>>> in
>>>> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
>>>> the mapped DMA before removing the window. This patch fixes this
>>>> incorrect behavior.
>>>>
>>>> I added a global variable to track current window size since we do
>>>> not have better ways to get exact size of it than doing so. sPAPR
>>>> IOMMU seems not to provide any ways to get window size with ioctl
>>>> interfaces. rte_memseg_walk*() is currently used to calculate
>>> window
>>>> size, but it walks memsegs that are marked as used, not mapped. So,
>>>> we need to determine if a given memseg is mapped or not, otherwise
>>>> the ioctl reports errors due to attempting to unregister memory
>>>> addresses that are not registered. The global variable is excluded
>>>> in non-ppc64le binaries.
>>>>
>>>> Similar problems happen in user maps. We need to avoid attempting
>>> to
>>>> unmap the address that is given as the function's parameter. The
>>>> compaction of user maps prevents us from passing correct length for
>>>> unmapping DMA at the window recreation. So, I removed it in
>>> ppc64le.
>>>>
>>>> I also fixed the order of ioctl for unregister and unmap. The ioctl
>>>> for unregister sometimes report device busy errors due to the
>>>> existence of mapped area.
>>>>
>>>> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
>>>> ---
>>>
>>> OK there are three patches, and two v1's with two different authors
>>> in
>>> reply to the same original patch. There's too much going on here, i
>>> can't review this. Needs splitting.
>>>
>>> Also, #ifdef-ing out the map merging seems highly suspect.
>>>
>>> With regards to "walking used memsegs, not mapped", unless i'm
>>> misunderstanding something, these are the same - whenever a segment
>>> is
>>> mapped, it is marked as used, and whenever it is unmapped, it is
>>> marked
>>> as free. Could you please explain what is the difference and why is
>>> this
>>> needed?
>>>
>>> Is the point of contention here being the fact that whenever the
>>> unmap
>>> callback arrives, the segments still appear used when iterating over
>>> the
>>> map? If that's the case, then i think it would be OK to mark them as
>>> unused *before* triggering callbacks, and chances are some of this
>>> code
>>> wouldn't be needed. That would require a deprecation notice though,
>>> because the API behavior will change (even if this fact is not
>>> documented properly).
>>>
>>> -- 
>>> Thanks,
>>> Anatoly
>>>
>>>
>>
>> I am the author of this patch. We should ignore a patch from YufengX Mo.
>>
>>> From my code reading, a memsg is at first marked as used when it is 
>>> allocated. Then, the memseg is passed to vfio_spapr_dma_mem_map(). 
>>> The callback iterates all the used (i.e., allocated) memsegs and call 
>>> ioctl for mapping VA to IOVA. So, when vfio_spapr_dma_mem_map() is 
>>> called, passed memsegs can be non-mapped but marked as used. As a 
>>> result, an attempt to unmap non-mapped area happens during DMA window 
>>> expansion. This is the difference and why this fix was needed.
>>
>>> i think it would be OK to mark them as unused *before* triggering 
>>> callbacks
>>
>> Yes, my first idea was the same as yours, but I was also worried that 
>> it might cause inconsistent API behavior as you also pointed out. If 
>> you think so, I think I can rewrite the patch without ugly #ifdef.
>>
>> Unfortunately, I don't have enough time for writing code next week and 
>> next next week. So, I will resubmit the revised patch weeks later.
> 
> I think the approach with fixing the mem callbacks to report the 
> unmapped segments as no longer used would be better.
> 
> As far as i can remember at the point where callbacks are triggered, the 
> memory is already removed from malloc heap and from all processes. Each 
> secondary also stores their own shadow copy of the memory map, so 
> removing the "used" flags from the main map will not have any 
> consequences as far as correctness is concerned. Each callback is also 
> getting the memory area being removed as parameters, so if there is code 
> that needs to be run taking into account that memory area, it can be done.
> 
> Existing code may rely on this behavior (even though it doesn't make 
> much sense now that i think of it), so going with this approach *will* 
> require a deprecation notice and can only be done in the next release.


One *other* way to fix this would be to stored the pages being removed 
in a struct, and pass it as parameters to the _walk() window size 
calculation function. This would avoid needless API changes and handle 
this case correctly.


> 
>>
>> Regards,
>> Takeshi
>>
>>
> 
> 


-- 
Thanks,
Anatoly

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [dpdk-dev] [PATCH v3] vfio: fix expanding DMA area in ppc64le
  2019-06-14  7:49   ` [dpdk-dev] [PATCH v2] " Takeshi Yoshimura
@ 2019-07-13  1:15     ` " Takeshi Yoshimura
  2019-07-16  0:20       ` David Christensen
  0 siblings, 1 reply; 16+ messages in thread
From: Takeshi Yoshimura @ 2019-07-13  1:15 UTC (permalink / raw)
  To: dev
  Cc: Burakov, Anatoly, David Christensen, Pradeep Satyanarayana,
	Takeshi Yoshimura

In ppc64le, expanding DMA areas always fail because we cannot remove
a DMA window. As a result, we cannot allocate more than one memseg in
ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
the mapped DMA before removing the window. This patch fixes this
incorrect behavior.

I also fixed the order of ioctl for unregister and unmap. The ioctl
for unregister sometimes report device busy errors due to the
existence of mapped area.

Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
---
 lib/librte_eal/linux/eal/eal_vfio.c | 99 +++++++++++++++++++----------
 1 file changed, 67 insertions(+), 32 deletions(-)

diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
index fadef427f..ed04231b1 100644
--- a/lib/librte_eal/linux/eal/eal_vfio.c
+++ b/lib/librte_eal/linux/eal/eal_vfio.c
@@ -1354,14 +1354,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		}
 
 	} else {
-		ret = ioctl(vfio_container_fd,
-				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
-		if (ret) {
-			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
-					errno, strerror(errno));
-			return -1;
-		}
-
 		memset(&dma_unmap, 0, sizeof(dma_unmap));
 		dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
 		dma_unmap.size = len;
@@ -1374,28 +1366,56 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 					errno, strerror(errno));
 			return -1;
 		}
+
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot unregister vaddr for IOMMU, error %i (%s)\n",
+					errno, strerror(errno));
+			return -1;
+		}
 	}
 
 	return 0;
 }
 
+struct spapr_remap_walk_param {
+	int vfio_container_fd;
+	uint64_t addr_64;
+};
+
 static int
 vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
-	int *vfio_container_fd = arg;
+	struct spapr_remap_walk_param *param = arg;
 
-	if (msl->external)
+	if (msl->external || ms->addr_64 == param->addr_64)
 		return 0;
 
-	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+	return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
 
+static int
+vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, void *arg)
+{
+	struct spapr_remap_walk_param *param = arg;
+
+	if (msl->external || ms->addr_64 == param->addr_64)
+		return 0;
+
+	return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
+			ms->len, 0);
+}
+
 struct spapr_walk_param {
 	uint64_t window_size;
 	uint64_t hugepage_sz;
+	uint64_t addr_64;
 };
+
 static int
 vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
@@ -1406,6 +1426,10 @@ vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
 	if (msl->external)
 		return 0;
 
+	/* do not iterate ms we haven't mapped yet  */
+	if (param->addr_64 && ms->addr_64 == param->addr_64)
+		return 0;
+
 	if (max > param->window_size) {
 		param->hugepage_sz = ms->hugepage_sz;
 		param->window_size = max;
@@ -1503,6 +1527,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 
 	/* check if window size needs to be adjusted */
 	memset(&param, 0, sizeof(param));
+	param.addr_64 = vaddr;
 
 	/* we're inside a callback so use thread-unsafe version */
 	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
@@ -1516,7 +1541,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	for (i = 0; i < user_mem_maps->n_maps; i++) {
 		uint64_t max = user_mem_maps->maps[i].iova +
 				user_mem_maps->maps[i].len;
-		create.window_size = RTE_MAX(create.window_size, max);
+		param.window_size = RTE_MAX(param.window_size, max);
 	}
 
 	/* sPAPR requires window size to be a power of 2 */
@@ -1525,9 +1550,33 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	create.levels = 1;
 
 	if (do_map) {
-		void *addr;
 		/* re-create window and remap the entire memory */
-		if (iova > create.window_size) {
+		if (iova + len > create.window_size) {
+			struct spapr_remap_walk_param remap_param = {
+				.vfio_container_fd = vfio_container_fd,
+				.addr_64 = vaddr,
+			};
+
+			/* release all maps before recreating the window */
+			if (rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
+					&remap_param) < 0) {
+				RTE_LOG(ERR, EAL, "Could not release DMA maps\n");
+				ret = -1;
+				goto out;
+			}
+			/* release all user maps */
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
+				struct user_mem_map *map =
+						&user_mem_maps->maps[i];
+				if (vfio_spapr_dma_do_map(vfio_container_fd,
+						map->addr, map->iova, map->len,
+						0)) {
+					RTE_LOG(ERR, EAL, "Could not release user DMA maps\n");
+					ret = -1;
+					goto out;
+				}
+			}
+			create.window_size = rte_align64pow2(iova + len);
 			if (vfio_spapr_create_new_dma_window(vfio_container_fd,
 					&create) < 0) {
 				RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
@@ -1537,7 +1586,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 			/* we're inside a callback, so use thread-unsafe version
 			 */
 			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
-					&vfio_container_fd) < 0) {
+					&remap_param) < 0) {
 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
 				ret = -1;
 				goto out;
@@ -1555,23 +1604,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				}
 			}
 		}
-
-		/* now that we've remapped all of the memory that was present
-		 * before, map the segment that we were requested to map.
-		 *
-		 * however, if we were called by the callback, the memory we
-		 * were called with was already in the memseg list, so previous
-		 * mapping should've mapped that segment already.
-		 *
-		 * virt2memseg_list is a relatively cheap check, so use that. if
-		 * memory is within any memseg list, it's a memseg, so it's
-		 * already mapped.
-		 */
-		addr = (void *)(uintptr_t)vaddr;
-		if (rte_mem_virt2memseg_list(addr) == NULL &&
-				vfio_spapr_dma_do_map(vfio_container_fd,
-					vaddr, iova, len, 1) < 0) {
-			RTE_LOG(ERR, EAL, "Could not map segment\n");
+		if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
+			RTE_LOG(ERR, EAL, "Failed to map DMA\n");
 			ret = -1;
 			goto out;
 		}
@@ -1599,6 +1633,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
 	struct spapr_walk_param param;
 
 	memset(&param, 0, sizeof(param));
+	param.addr_64 = 0UL;
 
 	/* create DMA window from 0 to max(phys_addr + len) */
 	rte_memseg_walk(vfio_spapr_window_size_walk, &param);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v3] vfio: fix expanding DMA area in ppc64le
  2019-07-13  1:15     ` [dpdk-dev] [PATCH v3] " Takeshi Yoshimura
@ 2019-07-16  0:20       ` David Christensen
  2019-07-16 10:56         ` Thomas Monjalon
  0 siblings, 1 reply; 16+ messages in thread
From: David Christensen @ 2019-07-16  0:20 UTC (permalink / raw)
  To: Takeshi Yoshimura, dev
  Cc: Burakov, Anatoly, David Christensen, Pradeep Satyanarayana

> In ppc64le, expanding DMA areas always fail because we cannot remove
> a DMA window. As a result, we cannot allocate more than one memseg in
> ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> the mapped DMA before removing the window. This patch fixes this
> incorrect behavior.
> 
> I also fixed the order of ioctl for unregister and unmap. The ioctl
> for unregister sometimes report device busy errors due to the
> existence of mapped area.
> 
> Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> ---
Acked-by: David Christensen <drc@linux.vnet.ibm.com>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [dpdk-dev] [PATCH v3] vfio: fix expanding DMA area in ppc64le
  2019-07-16  0:20       ` David Christensen
@ 2019-07-16 10:56         ` Thomas Monjalon
  0 siblings, 0 replies; 16+ messages in thread
From: Thomas Monjalon @ 2019-07-16 10:56 UTC (permalink / raw)
  To: Takeshi Yoshimura
  Cc: dev, David Christensen, Burakov, Anatoly, David Christensen,
	Pradeep Satyanarayana

16/07/2019 02:20, David Christensen:
> > In ppc64le, expanding DMA areas always fail because we cannot remove
> > a DMA window. As a result, we cannot allocate more than one memseg in
> > ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
> > the mapped DMA before removing the window. This patch fixes this
> > incorrect behavior.
> > 
> > I also fixed the order of ioctl for unregister and unmap. The ioctl
> > for unregister sometimes report device busy errors due to the
> > existence of mapped area.
> > 
> > Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
> > ---
> Acked-by: David Christensen <drc@linux.vnet.ibm.com>

Applied, thanks




^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, back to index

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-12  6:33 [dpdk-dev] [PATCH] vfio: fix expanding DMA area in ppc64le Takeshi Yoshimura
2019-06-12 14:06 ` Aaron Conole
2019-06-13  2:22 ` Takeshi Yoshimura
2019-06-13 17:37   ` David Christensen
2019-06-14  7:34   ` David Marchand
2019-06-14  7:49   ` [dpdk-dev] [PATCH v2] " Takeshi Yoshimura
2019-07-13  1:15     ` [dpdk-dev] [PATCH v3] " Takeshi Yoshimura
2019-07-16  0:20       ` David Christensen
2019-07-16 10:56         ` Thomas Monjalon
2019-06-18  2:37   ` [dpdk-dev] [PATCH] " Mo, YufengX
2019-06-18  2:39   ` Mo, YufengX
2019-06-26  9:43   ` Burakov, Anatoly
2019-06-28 11:38   ` Takeshi T Yoshimura
2019-06-28 13:47     ` Burakov, Anatoly
2019-06-28 14:04       ` Burakov, Anatoly
2019-06-13  2:30 ` Takeshi T Yoshimura

DPDK-dev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/dpdk-dev/0 dpdk-dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dpdk-dev dpdk-dev/ https://lore.kernel.org/dpdk-dev \
		dev@dpdk.org dpdk-dev@archiver.kernel.org
	public-inbox-index dpdk-dev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox