All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] vfio: refactor PCI BAR mapping
@ 2017-09-25 15:04 Jonas Pfefferle
  2017-10-04 13:13 ` Burakov, Anatoly
  0 siblings, 1 reply; 3+ messages in thread
From: Jonas Pfefferle @ 2017-09-25 15:04 UTC (permalink / raw)
  To: dev; +Cc: anatoly.burakov, Jonas Pfefferle

Split pci_vfio_map_resource for primary and secondary processes.
Save all relevant mapping data in primary process to allow
the secondary process to perform mappings.

Signed-off-by: Jonas Pfefferle <jpf@zurich.ibm.com>
---
v2:
* fix zero size and offset when trying to mmap non msix bar

 lib/librte_eal/common/include/rte_pci.h    |   7 +
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 446 +++++++++++++++++------------
 2 files changed, 271 insertions(+), 182 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 8b12339..0821af9 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -214,6 +214,12 @@ struct pci_map {
 	uint64_t phaddr;
 };
 
+struct pci_msix_table {
+	int bar_index;
+	uint32_t offset;
+	uint32_t size;
+};
+
 /**
  * A structure describing a mapped PCI resource.
  * For multi-process we need to reproduce all PCI mappings in secondary
@@ -226,6 +232,7 @@ struct mapped_pci_resource {
 	char path[PATH_MAX];
 	int nb_maps;
 	struct pci_map maps[PCI_MAX_RESOURCE];
+	struct pci_msix_table msix_table;
 };
 
 /** mapped pci device list */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index aa9d96e..6443bd5 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -88,8 +88,7 @@ pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
 
 /* get PCI BAR number where MSI-X interrupts are */
 static int
-pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
-		      uint32_t *msix_table_size)
+pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
 {
 	int ret;
 	uint32_t reg;
@@ -161,9 +160,10 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
 				return -1;
 			}
 
-			*msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
-			*msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
-			*msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
+			msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
+			msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
+			msix_table->size =
+				16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
 
 			return 0;
 		}
@@ -300,25 +300,150 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
 	return -1;
 }
 
-/*
- * map the PCI resources of a PCI device in virtual memory (VFIO version).
- * primary and secondary processes follow almost exactly the same path
- */
-int
-pci_vfio_map_resource(struct rte_pci_device *dev)
+static int
+pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
+{
+	uint32_t ioport_bar;
+	int ret;
+
+	ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
+			  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+			  + PCI_BASE_ADDRESS_0 + bar_index*4);
+	if (ret != sizeof(ioport_bar)) {
+		RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
+			PCI_BASE_ADDRESS_0 + bar_index*4);
+		return -1;
+	}
+
+	return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO;
+}
+
+static int
+pci_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+	if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
+		RTE_LOG(ERR, EAL, "Error setting up interrupts!\n");
+		return -1;
+	}
+
+	/* set bus mastering for the device */
+	if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+		RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
+		return -1;
+	}
+
+	/* Reset the device */
+	ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+
+	return 0;
+}
+
+static int
+pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
+		int bar_index, int additional_flags)
+{
+	struct memreg {
+		unsigned long offset, size;
+	} memreg[2] = {};
+	void *bar_addr;
+	struct pci_msix_table *msix_table = &vfio_res->msix_table;
+	struct pci_map *bar = &vfio_res->maps[bar_index];
+
+	if (bar->size == 0)
+		/* Skip this BAR */
+		return 0;
+
+	if (msix_table->bar_index == bar_index) {
+		/*
+		 * VFIO will not let us map the MSI-X table,
+		 * but we can map around it.
+		 */
+		uint32_t table_start = msix_table->offset;
+		uint32_t table_end = table_start + msix_table->size;
+		table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
+		table_start &= PAGE_MASK;
+
+		if (table_start == 0 && table_end >= bar->size) {
+			/* Cannot map this BAR */
+			RTE_LOG(DEBUG, EAL, "Skipping BAR%d\n", bar_index);
+			bar->size = 0;
+			bar->addr = 0;
+			return 0;
+		}
+
+		memreg[0].offset = bar->offset;
+		memreg[0].size = table_start;
+		memreg[1].offset = bar->offset + table_end;
+		memreg[1].size = bar->size - table_end;
+
+		RTE_LOG(DEBUG, EAL,
+			"Trying to map BAR%d that contains the MSI-X "
+			"table. Trying offsets: "
+			"0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", bar_index,
+			memreg[0].offset, memreg[0].size,
+			memreg[1].offset, memreg[1].size);
+	} else {
+		memreg[0].offset = bar->offset;
+		memreg[0].size = bar->size;
+	}
+
+	/* reserve the address using an inaccessible mapping */
+	bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE |
+			MAP_ANONYMOUS | additional_flags, -1, 0);
+	if (bar_addr != MAP_FAILED) {
+		void *map_addr = NULL;
+		if (memreg[0].size) {
+			/* actual map of first part */
+			map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
+							memreg[0].offset,
+							memreg[0].size,
+							MAP_FIXED);
+		}
+
+		/* if there's a second part, try to map it */
+		if (map_addr != MAP_FAILED
+			&& memreg[1].offset && memreg[1].size) {
+			void *second_addr = RTE_PTR_ADD(bar_addr,
+							memreg[1].offset -
+							(uintptr_t)bar->offset);
+			map_addr = pci_map_resource(second_addr,
+							vfio_dev_fd,
+							memreg[1].offset,
+							memreg[1].size,
+							MAP_FIXED);
+		}
+
+		if (map_addr == MAP_FAILED || !map_addr) {
+			munmap(bar_addr, bar->size);
+			bar_addr = MAP_FAILED;
+			RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n",
+					bar_index);
+			return -1;
+		}
+	} else {
+		RTE_LOG(ERR, EAL,
+				"Failed to create inaccessible mapping for BAR%d\n",
+				bar_index);
+		return -1;
+	}
+
+	bar->addr = bar_addr;
+	return 0;
+}
+
+static int
+pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 {
 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
 	char pci_addr[PATH_MAX] = {0};
 	int vfio_dev_fd;
 	struct rte_pci_addr *loc = &dev->addr;
-	int i, ret, msix_bar;
+	int i, ret;
 	struct mapped_pci_resource *vfio_res = NULL;
-	struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+	struct mapped_pci_res_list *vfio_res_list =
+		RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
 
 	struct pci_map *maps;
-	uint32_t msix_table_offset = 0;
-	uint32_t msix_table_size = 0;
-	uint32_t ioport_bar;
 
 	dev->intr_handle.fd = -1;
 	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
@@ -327,91 +452,58 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
 			loc->domain, loc->bus, loc->devid, loc->function);
 
-	if ((ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
-					&vfio_dev_fd, &device_info)))
+	ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
+					&vfio_dev_fd, &device_info);
+	if (ret)
 		return ret;
 
-	/* get MSI-X BAR, if any (we have to know where it is because we can't
-	 * easily mmap it when using VFIO) */
-	msix_bar = -1;
-	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar,
-				    &msix_table_offset, &msix_table_size);
-	if (ret < 0) {
-		RTE_LOG(ERR, EAL, "  %s cannot get MSI-X BAR number!\n", pci_addr);
-		close(vfio_dev_fd);
-		return -1;
+	/* allocate vfio_res and get region info */
+	vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
+	if (vfio_res == NULL) {
+		RTE_LOG(ERR, EAL,
+			"%s(): cannot store uio mmap details\n", __func__);
+		goto err_vfio_dev_fd;
 	}
+	memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
 
-	/* if we're in a primary process, allocate vfio_res and get region info */
-	if (internal_config.process_type == RTE_PROC_PRIMARY) {
-		vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
-		if (vfio_res == NULL) {
-			RTE_LOG(ERR, EAL,
-				"%s(): cannot store uio mmap details\n", __func__);
-			close(vfio_dev_fd);
-			return -1;
-		}
-		memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
-
-		/* get number of registers (up to BAR5) */
-		vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
-				VFIO_PCI_BAR5_REGION_INDEX + 1);
-	} else {
-		/* if we're in a secondary process, just find our tailq entry */
-		TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
-			if (rte_eal_compare_pci_addr(&vfio_res->pci_addr,
-						     &dev->addr))
-				continue;
-			break;
-		}
-		/* if we haven't found our tailq entry, something's wrong */
-		if (vfio_res == NULL) {
-			RTE_LOG(ERR, EAL, "  %s cannot find TAILQ entry for PCI device!\n",
-					pci_addr);
-			close(vfio_dev_fd);
-			return -1;
-		}
-	}
+	/* get number of registers (up to BAR5) */
+	vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
+			VFIO_PCI_BAR5_REGION_INDEX + 1);
 
 	/* map BARs */
 	maps = vfio_res->maps;
 
+	vfio_res->msix_table.bar_index = -1;
+	/* get MSI-X BAR, if any (we have to know where it is because we can't
+	 * easily mmap it when using VFIO)
+	 */
+	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table);
+	if (ret < 0) {
+		RTE_LOG(ERR, EAL, "  %s cannot get MSI-X BAR number!\n",
+				pci_addr);
+		goto err_vfio_dev_fd;
+	}
+
 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
 		struct vfio_region_info reg = { .argsz = sizeof(reg) };
 		void *bar_addr;
-		struct memreg {
-			unsigned long offset, size;
-		} memreg[2] = {};
 
 		reg.index = i;
 
 		ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
-
 		if (ret) {
 			RTE_LOG(ERR, EAL, "  %s cannot get device region info "
 					"error %i (%s)\n", pci_addr, errno, strerror(errno));
-			close(vfio_dev_fd);
-			if (internal_config.process_type == RTE_PROC_PRIMARY)
-				rte_free(vfio_res);
-			return -1;
+			goto err_vfio_res;
 		}
 
 		/* chk for io port region */
-		ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
-			      VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
-			      + PCI_BASE_ADDRESS_0 + i*4);
-
-		if (ret != sizeof(ioport_bar)) {
-			RTE_LOG(ERR, EAL,
-				"Cannot read command (%x) from config space!\n",
-				PCI_BASE_ADDRESS_0 + i*4);
-			return -1;
-		}
-
-		if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) {
-			RTE_LOG(INFO, EAL,
-				"Ignore mapping IO port bar(%d) addr: %x\n",
-				 i, ioport_bar);
+		ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
+		if (ret < 0)
+			goto err_vfio_res;
+		else if (ret) {
+			RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
+					i);
 			continue;
 		}
 
@@ -419,124 +511,114 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
 		if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
 			continue;
 
-		if (i == msix_bar) {
-			/*
-			 * VFIO will not let us map the MSI-X table,
-			 * but we can map around it.
-			 */
-			uint32_t table_start = msix_table_offset;
-			uint32_t table_end = table_start + msix_table_size;
-			table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
-			table_start &= PAGE_MASK;
-
-			if (table_start == 0 && table_end >= reg.size) {
-				/* Cannot map this BAR */
-				RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i);
-				continue;
-			} else {
-				memreg[0].offset = reg.offset;
-				memreg[0].size = table_start;
-				memreg[1].offset = reg.offset + table_end;
-				memreg[1].size = reg.size - table_end;
-
-				RTE_LOG(DEBUG, EAL,
-					"Trying to map BAR %d that contains the MSI-X "
-					"table. Trying offsets: "
-					"0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i,
-					memreg[0].offset, memreg[0].size,
-					memreg[1].offset, memreg[1].size);
-			}
-		} else {
-			memreg[0].offset = reg.offset;
-			memreg[0].size = reg.size;
-		}
+		/* try mapping somewhere close to the end of hugepages */
+		if (pci_map_addr == NULL)
+			pci_map_addr = pci_find_max_end_va();
 
-		/* try to figure out an address */
-		if (internal_config.process_type == RTE_PROC_PRIMARY) {
-			/* try mapping somewhere close to the end of hugepages */
-			if (pci_map_addr == NULL)
-				pci_map_addr = pci_find_max_end_va();
+		bar_addr = pci_map_addr;
+		pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+
+		maps[i].addr = bar_addr;
+		maps[i].offset = reg.offset;
+		maps[i].size = reg.size;
+		maps[i].path = NULL; /* vfio doesn't have per-resource paths */
 
-			bar_addr = pci_map_addr;
-			pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
-		} else {
-			bar_addr = maps[i].addr;
+		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
+		if (ret < 0) {
+			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
+					pci_addr, i, strerror(errno));
+			goto err_vfio_res;
 		}
 
-		/* reserve the address using an inaccessible mapping */
-		bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
-				MAP_ANONYMOUS, -1, 0);
-		if (bar_addr != MAP_FAILED) {
-			void *map_addr = NULL;
-			if (memreg[0].size) {
-				/* actual map of first part */
-				map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
-							    memreg[0].offset,
-							    memreg[0].size,
-							    MAP_FIXED);
-			}
+		dev->mem_resource[i].addr = maps[i].addr;
+	}
 
-			/* if there's a second part, try to map it */
-			if (map_addr != MAP_FAILED
-			    && memreg[1].offset && memreg[1].size) {
-				void *second_addr = RTE_PTR_ADD(bar_addr,
-								memreg[1].offset -
-								(uintptr_t)reg.offset);
-				map_addr = pci_map_resource(second_addr,
-							    vfio_dev_fd, memreg[1].offset,
-							    memreg[1].size,
-							    MAP_FIXED);
-			}
+	if (pci_vfio_setup_device(dev, vfio_dev_fd) < 0) {
+		RTE_LOG(ERR, EAL, "  %s setup device failed\n", pci_addr);
+		goto err_vfio_res;
+	}
 
-			if (map_addr == MAP_FAILED || !map_addr) {
-				munmap(bar_addr, reg.size);
-				bar_addr = MAP_FAILED;
-			}
-		}
+	TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
 
-		if (bar_addr == MAP_FAILED ||
-				(internal_config.process_type == RTE_PROC_SECONDARY &&
-						bar_addr != maps[i].addr)) {
-			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n", pci_addr, i,
-					strerror(errno));
-			close(vfio_dev_fd);
-			if (internal_config.process_type == RTE_PROC_PRIMARY)
-				rte_free(vfio_res);
-			return -1;
-		}
+	return 0;
+err_vfio_res:
+	rte_free(vfio_res);
+err_vfio_dev_fd:
+	close(vfio_dev_fd);
+	return -1;
+}
 
-		maps[i].addr = bar_addr;
-		maps[i].offset = reg.offset;
-		maps[i].size = reg.size;
-		maps[i].path = NULL; /* vfio doesn't have per-resource paths */
-		dev->mem_resource[i].addr = bar_addr;
+static int
+pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
+{
+	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+	char pci_addr[PATH_MAX] = {0};
+	int vfio_dev_fd;
+	struct rte_pci_addr *loc = &dev->addr;
+	int i, ret;
+	struct mapped_pci_resource *vfio_res = NULL;
+	struct mapped_pci_res_list *vfio_res_list =
+		RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+
+	struct pci_map *maps;
+
+	dev->intr_handle.fd = -1;
+	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+	/* store PCI address string */
+	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+			loc->domain, loc->bus, loc->devid, loc->function);
+
+	ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
+					&vfio_dev_fd, &device_info);
+	if (ret)
+		return ret;
+
+	/* if we're in a secondary process, just find our tailq entry */
+	TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+		if (rte_eal_compare_pci_addr(&vfio_res->pci_addr,
+						 &dev->addr))
+			continue;
+		break;
+	}
+	/* if we haven't found our tailq entry, something's wrong */
+	if (vfio_res == NULL) {
+		RTE_LOG(ERR, EAL, "  %s cannot find TAILQ entry for PCI device!\n",
+				pci_addr);
+		goto err_vfio_dev_fd;
 	}
 
-	/* if secondary process, do not set up interrupts */
-	if (internal_config.process_type == RTE_PROC_PRIMARY) {
-		if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
-			RTE_LOG(ERR, EAL, "  %s error setting up interrupts!\n", pci_addr);
-			close(vfio_dev_fd);
-			rte_free(vfio_res);
-			return -1;
-		}
+	/* map BARs */
+	maps = vfio_res->maps;
 
-		/* set bus mastering for the device */
-		if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
-			RTE_LOG(ERR, EAL, "  %s cannot set up bus mastering!\n", pci_addr);
-			close(vfio_dev_fd);
-			rte_free(vfio_res);
-			return -1;
+	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+		ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED);
+		if (ret < 0) {
+			RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
+					pci_addr, i, strerror(errno));
+			goto err_vfio_dev_fd;
 		}
 
-		/* Reset the device */
-		ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+		dev->mem_resource[i].addr = maps[i].addr;
 	}
 
-	if (internal_config.process_type == RTE_PROC_PRIMARY)
-		TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
-
 	return 0;
+err_vfio_dev_fd:
+	close(vfio_dev_fd);
+	return -1;
+}
+
+/*
+ * map the PCI resources of a PCI device in virtual memory (VFIO version).
+ * primary and secondary processes follow almost exactly the same path
+ */
+int
+pci_vfio_map_resource(struct rte_pci_device *dev)
+{
+	if (internal_config.process_type == RTE_PROC_PRIMARY)
+		return pci_vfio_map_resource_primary(dev);
+	else
+		return pci_vfio_map_resource_secondary(dev);
 }
 
 int
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] vfio: refactor PCI BAR mapping
  2017-09-25 15:04 [PATCH v2] vfio: refactor PCI BAR mapping Jonas Pfefferle
@ 2017-10-04 13:13 ` Burakov, Anatoly
  2017-10-06 14:06   ` Jonas Pfefferle1
  0 siblings, 1 reply; 3+ messages in thread
From: Burakov, Anatoly @ 2017-10-04 13:13 UTC (permalink / raw)
  To: Jonas Pfefferle, dev

On 25-Sep-17 4:04 PM, Jonas Pfefferle wrote:
> Split pci_vfio_map_resource for primary and secondary processes.
> Save all relevant mapping data in primary process to allow
> the secondary process to perform mappings.
> 
> Signed-off-by: Jonas Pfefferle <jpf@zurich.ibm.com>
> ---
> v2:
> * fix zero size and offset when trying to mmap non msix bar
> 
>   lib/librte_eal/common/include/rte_pci.h    |   7 +
>   lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 446 +++++++++++++++++------------
>   2 files changed, 271 insertions(+), 182 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
> index 8b12339..0821af9 100644
> --- a/lib/librte_eal/common/include/rte_pci.h
> +++ b/lib/librte_eal/common/include/rte_pci.h
> @@ -214,6 +214,12 @@ struct pci_map {
>   	uint64_t phaddr;
>   };
>   
> +struct pci_msix_table {
> +	int bar_index;
> +	uint32_t offset;
> +	uint32_t size;
> +};
> +
>   /**
>    * A structure describing a mapped PCI resource.
>    * For multi-process we need to reproduce all PCI mappings in secondary
> @@ -226,6 +232,7 @@ struct mapped_pci_resource {
>   	char path[PATH_MAX];
>   	int nb_maps;
>   	struct pci_map maps[PCI_MAX_RESOURCE];
> +	struct pci_msix_table msix_table;
>   };
>   
>   /** mapped pci device list */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index aa9d96e..6443bd5 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -88,8 +88,7 @@ pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
>   
>   /* get PCI BAR number where MSI-X interrupts are */
>   static int
> -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
> -		      uint32_t *msix_table_size)
> +pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
>   {
>   	int ret;
>   	uint32_t reg;
> @@ -161,9 +160,10 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
>   				return -1;
>   			}
>   
> -			*msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
> -			*msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> -			*msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
> +			msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> +			msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> +			msix_table->size =
> +				16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
>   
>   			return 0;
>   		}
> @@ -300,25 +300,150 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
>   	return -1;
>   }
>   
> -/*
> - * map the PCI resources of a PCI device in virtual memory (VFIO version).
> - * primary and secondary processes follow almost exactly the same path
> - */
> -int
> -pci_vfio_map_resource(struct rte_pci_device *dev)
> +static int
> +pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
> +{
> +	uint32_t ioport_bar;
> +	int ret;
> +
> +	ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
> +			  VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
> +			  + PCI_BASE_ADDRESS_0 + bar_index*4);
> +	if (ret != sizeof(ioport_bar)) {
> +		RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
> +			PCI_BASE_ADDRESS_0 + bar_index*4);
> +		return -1;
> +	}
> +
> +	return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO;

Not sure i like this. I think it's better to be explicit, e.g.
return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO != 0;

Makes no difference (both because return value is non-zero and because 
PCI_BASE_ADDRESS_SPACE_IO is 0x01), but still, better to make intentions 
clear i think.

Otherwise, did a quick smoke-test and it works, so

Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>

Keep the ack if you decide to submit a v3 :)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] vfio: refactor PCI BAR mapping
  2017-10-04 13:13 ` Burakov, Anatoly
@ 2017-10-06 14:06   ` Jonas Pfefferle1
  0 siblings, 0 replies; 3+ messages in thread
From: Jonas Pfefferle1 @ 2017-10-06 14:06 UTC (permalink / raw)
  To: Burakov, Anatoly; +Cc: dev

Hi Anatoly,

"Burakov, Anatoly" <anatoly.burakov@intel.com> wrote on 10/04/2017 03:13:22
PM:

> From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
> To: Jonas Pfefferle <jpf@zurich.ibm.com>, dev@dpdk.org
> Date: 10/04/2017 03:14 PM
> Subject: Re: [PATCH v2] vfio: refactor PCI BAR mapping
>
> On 25-Sep-17 4:04 PM, Jonas Pfefferle wrote:
> > Split pci_vfio_map_resource for primary and secondary processes.
> > Save all relevant mapping data in primary process to allow
> > the secondary process to perform mappings.
> >
> > Signed-off-by: Jonas Pfefferle <jpf@zurich.ibm.com>
> > ---
> > v2:
> > * fix zero size and offset when trying to mmap non msix bar
> >
> >   lib/librte_eal/common/include/rte_pci.h    |   7 +
> >   lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 446 +++++++++++++++
> ++------------
> >   2 files changed, 271 insertions(+), 182 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/
> librte_eal/common/include/rte_pci.h
> > index 8b12339..0821af9 100644
> > --- a/lib/librte_eal/common/include/rte_pci.h
> > +++ b/lib/librte_eal/common/include/rte_pci.h
> > @@ -214,6 +214,12 @@ struct pci_map {
> >      uint64_t phaddr;
> >   };
> >
> > +struct pci_msix_table {
> > +   int bar_index;
> > +   uint32_t offset;
> > +   uint32_t size;
> > +};
> > +
> >   /**
> >    * A structure describing a mapped PCI resource.
> >    * For multi-process we need to reproduce all PCI mappings in
secondary
> > @@ -226,6 +232,7 @@ struct mapped_pci_resource {
> >      char path[PATH_MAX];
> >      int nb_maps;
> >      struct pci_map maps[PCI_MAX_RESOURCE];
> > +   struct pci_msix_table msix_table;
> >   };
> >
> >   /** mapped pci device list */
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/
> librte_eal/linuxapp/eal/eal_pci_vfio.c
> > index aa9d96e..6443bd5 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > @@ -88,8 +88,7 @@ pci_vfio_write_config(const struct
> rte_intr_handle *intr_handle,
> >
> >   /* get PCI BAR number where MSI-X interrupts are */
> >   static int
> > -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t
*msix_table_offset,
> > -            uint32_t *msix_table_size)
> > +pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
> >   {
> >      int ret;
> >      uint32_t reg;
> > @@ -161,9 +160,10 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar,
> uint32_t *msix_table_offset,
> >               return -1;
> >            }
> >
> > -         *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
> > -         *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> > -         *msix_table_size = 16 * (1 + (flags &
RTE_PCI_MSIX_FLAGS_QSIZE));
> > +         msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> > +         msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> > +         msix_table->size =
> > +            16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
> >
> >            return 0;
> >         }
> > @@ -300,25 +300,150 @@ pci_vfio_setup_interrupts(struct
> rte_pci_device *dev, int vfio_dev_fd)
> >      return -1;
> >   }
> >
> > -/*
> > - * map the PCI resources of a PCI device in virtual memory (VFIO
version).
> > - * primary and secondary processes follow almost exactly the same path
> > - */
> > -int
> > -pci_vfio_map_resource(struct rte_pci_device *dev)
> > +static int
> > +pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
> > +{
> > +   uint32_t ioport_bar;
> > +   int ret;
> > +
> > +   ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
> > +           VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
> > +           + PCI_BASE_ADDRESS_0 + bar_index*4);
> > +   if (ret != sizeof(ioport_bar)) {
> > +      RTE_LOG(ERR, EAL, "Cannot read command (%x) from config
space!\n",
> > +         PCI_BASE_ADDRESS_0 + bar_index*4);
> > +      return -1;
> > +   }
> > +
> > +   return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO;
>
> Not sure i like this. I think it's better to be explicit, e.g.
> return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO != 0;
>
> Makes no difference (both because return value is non-zero and because
> PCI_BASE_ADDRESS_SPACE_IO is 0x01), but still, better to make intentions
> clear i think.

I agree. I wanted to change this anyway but forgot. V3 is going out in a
sec. Thanks for the ack ;)

>
> Otherwise, did a quick smoke-test and it works, so
>
> Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
>
> Keep the ack if you decide to submit a v3 :)
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-10-06 14:07 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-25 15:04 [PATCH v2] vfio: refactor PCI BAR mapping Jonas Pfefferle
2017-10-04 13:13 ` Burakov, Anatoly
2017-10-06 14:06   ` Jonas Pfefferle1

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.