* [PATCH v2] vfio: refactor PCI BAR mapping
@ 2017-09-25 15:04 Jonas Pfefferle
2017-10-04 13:13 ` Burakov, Anatoly
0 siblings, 1 reply; 3+ messages in thread
From: Jonas Pfefferle @ 2017-09-25 15:04 UTC (permalink / raw)
To: dev; +Cc: anatoly.burakov, Jonas Pfefferle
Split pci_vfio_map_resource for primary and secondary processes.
Save all relevant mapping data in primary process to allow
the secondary process to perform mappings.
Signed-off-by: Jonas Pfefferle <jpf@zurich.ibm.com>
---
v2:
* fix zero size and offset when trying to mmap non msix bar
lib/librte_eal/common/include/rte_pci.h | 7 +
lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 446 +++++++++++++++++------------
2 files changed, 271 insertions(+), 182 deletions(-)
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 8b12339..0821af9 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -214,6 +214,12 @@ struct pci_map {
uint64_t phaddr;
};
+struct pci_msix_table {
+ int bar_index;
+ uint32_t offset;
+ uint32_t size;
+};
+
/**
* A structure describing a mapped PCI resource.
* For multi-process we need to reproduce all PCI mappings in secondary
@@ -226,6 +232,7 @@ struct mapped_pci_resource {
char path[PATH_MAX];
int nb_maps;
struct pci_map maps[PCI_MAX_RESOURCE];
+ struct pci_msix_table msix_table;
};
/** mapped pci device list */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index aa9d96e..6443bd5 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -88,8 +88,7 @@ pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
/* get PCI BAR number where MSI-X interrupts are */
static int
-pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
- uint32_t *msix_table_size)
+pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
{
int ret;
uint32_t reg;
@@ -161,9 +160,10 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
return -1;
}
- *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
- *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
- *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
+ msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
+ msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
+ msix_table->size =
+ 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
return 0;
}
@@ -300,25 +300,150 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
return -1;
}
-/*
- * map the PCI resources of a PCI device in virtual memory (VFIO version).
- * primary and secondary processes follow almost exactly the same path
- */
-int
-pci_vfio_map_resource(struct rte_pci_device *dev)
+static int
+pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
+{
+ uint32_t ioport_bar;
+ int ret;
+
+ ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+ + PCI_BASE_ADDRESS_0 + bar_index*4);
+ if (ret != sizeof(ioport_bar)) {
+ RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
+ PCI_BASE_ADDRESS_0 + bar_index*4);
+ return -1;
+ }
+
+ return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO;
+}
+
+static int
+pci_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+ if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
+ RTE_LOG(ERR, EAL, "Error setting up interrupts!\n");
+ return -1;
+ }
+
+ /* set bus mastering for the device */
+ if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
+ RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n");
+ return -1;
+ }
+
+ /* Reset the device */
+ ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+
+ return 0;
+}
+
+static int
+pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
+ int bar_index, int additional_flags)
+{
+ struct memreg {
+ unsigned long offset, size;
+ } memreg[2] = {};
+ void *bar_addr;
+ struct pci_msix_table *msix_table = &vfio_res->msix_table;
+ struct pci_map *bar = &vfio_res->maps[bar_index];
+
+ if (bar->size == 0)
+ /* Skip this BAR */
+ return 0;
+
+ if (msix_table->bar_index == bar_index) {
+ /*
+ * VFIO will not let us map the MSI-X table,
+ * but we can map around it.
+ */
+ uint32_t table_start = msix_table->offset;
+ uint32_t table_end = table_start + msix_table->size;
+ table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
+ table_start &= PAGE_MASK;
+
+ if (table_start == 0 && table_end >= bar->size) {
+ /* Cannot map this BAR */
+ RTE_LOG(DEBUG, EAL, "Skipping BAR%d\n", bar_index);
+ bar->size = 0;
+ bar->addr = 0;
+ return 0;
+ }
+
+ memreg[0].offset = bar->offset;
+ memreg[0].size = table_start;
+ memreg[1].offset = bar->offset + table_end;
+ memreg[1].size = bar->size - table_end;
+
+ RTE_LOG(DEBUG, EAL,
+ "Trying to map BAR%d that contains the MSI-X "
+ "table. Trying offsets: "
+ "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", bar_index,
+ memreg[0].offset, memreg[0].size,
+ memreg[1].offset, memreg[1].size);
+ } else {
+ memreg[0].offset = bar->offset;
+ memreg[0].size = bar->size;
+ }
+
+ /* reserve the address using an inaccessible mapping */
+ bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE |
+ MAP_ANONYMOUS | additional_flags, -1, 0);
+ if (bar_addr != MAP_FAILED) {
+ void *map_addr = NULL;
+ if (memreg[0].size) {
+ /* actual map of first part */
+ map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
+ memreg[0].offset,
+ memreg[0].size,
+ MAP_FIXED);
+ }
+
+ /* if there's a second part, try to map it */
+ if (map_addr != MAP_FAILED
+ && memreg[1].offset && memreg[1].size) {
+ void *second_addr = RTE_PTR_ADD(bar_addr,
+ memreg[1].offset -
+ (uintptr_t)bar->offset);
+ map_addr = pci_map_resource(second_addr,
+ vfio_dev_fd,
+ memreg[1].offset,
+ memreg[1].size,
+ MAP_FIXED);
+ }
+
+ if (map_addr == MAP_FAILED || !map_addr) {
+ munmap(bar_addr, bar->size);
+ bar_addr = MAP_FAILED;
+ RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n",
+ bar_index);
+ return -1;
+ }
+ } else {
+ RTE_LOG(ERR, EAL,
+ "Failed to create inaccessible mapping for BAR%d\n",
+ bar_index);
+ return -1;
+ }
+
+ bar->addr = bar_addr;
+ return 0;
+}
+
+static int
+pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
char pci_addr[PATH_MAX] = {0};
int vfio_dev_fd;
struct rte_pci_addr *loc = &dev->addr;
- int i, ret, msix_bar;
+ int i, ret;
struct mapped_pci_resource *vfio_res = NULL;
- struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+ struct mapped_pci_res_list *vfio_res_list =
+ RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
struct pci_map *maps;
- uint32_t msix_table_offset = 0;
- uint32_t msix_table_size = 0;
- uint32_t ioport_bar;
dev->intr_handle.fd = -1;
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
@@ -327,91 +452,58 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
- if ((ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
- &vfio_dev_fd, &device_info)))
+ ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
+ &vfio_dev_fd, &device_info);
+ if (ret)
return ret;
- /* get MSI-X BAR, if any (we have to know where it is because we can't
- * easily mmap it when using VFIO) */
- msix_bar = -1;
- ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar,
- &msix_table_offset, &msix_table_size);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr);
- close(vfio_dev_fd);
- return -1;
+ /* allocate vfio_res and get region info */
+ vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
+ if (vfio_res == NULL) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot store uio mmap details\n", __func__);
+ goto err_vfio_dev_fd;
}
+ memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
- /* if we're in a primary process, allocate vfio_res and get region info */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
- if (vfio_res == NULL) {
- RTE_LOG(ERR, EAL,
- "%s(): cannot store uio mmap details\n", __func__);
- close(vfio_dev_fd);
- return -1;
- }
- memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
-
- /* get number of registers (up to BAR5) */
- vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
- VFIO_PCI_BAR5_REGION_INDEX + 1);
- } else {
- /* if we're in a secondary process, just find our tailq entry */
- TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (rte_eal_compare_pci_addr(&vfio_res->pci_addr,
- &dev->addr))
- continue;
- break;
- }
- /* if we haven't found our tailq entry, something's wrong */
- if (vfio_res == NULL) {
- RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
- pci_addr);
- close(vfio_dev_fd);
- return -1;
- }
- }
+ /* get number of registers (up to BAR5) */
+ vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions,
+ VFIO_PCI_BAR5_REGION_INDEX + 1);
/* map BARs */
maps = vfio_res->maps;
+ vfio_res->msix_table.bar_index = -1;
+ /* get MSI-X BAR, if any (we have to know where it is because we can't
+ * easily mmap it when using VFIO)
+ */
+ ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
+ pci_addr);
+ goto err_vfio_dev_fd;
+ }
+
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
struct vfio_region_info reg = { .argsz = sizeof(reg) };
void *bar_addr;
- struct memreg {
- unsigned long offset, size;
- } memreg[2] = {};
reg.index = i;
ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
-
if (ret) {
RTE_LOG(ERR, EAL, " %s cannot get device region info "
"error %i (%s)\n", pci_addr, errno, strerror(errno));
- close(vfio_dev_fd);
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- rte_free(vfio_res);
- return -1;
+ goto err_vfio_res;
}
/* chk for io port region */
- ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
- VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
- + PCI_BASE_ADDRESS_0 + i*4);
-
- if (ret != sizeof(ioport_bar)) {
- RTE_LOG(ERR, EAL,
- "Cannot read command (%x) from config space!\n",
- PCI_BASE_ADDRESS_0 + i*4);
- return -1;
- }
-
- if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) {
- RTE_LOG(INFO, EAL,
- "Ignore mapping IO port bar(%d) addr: %x\n",
- i, ioport_bar);
+ ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
+ if (ret < 0)
+ goto err_vfio_res;
+ else if (ret) {
+ RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
+ i);
continue;
}
@@ -419,124 +511,114 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
continue;
- if (i == msix_bar) {
- /*
- * VFIO will not let us map the MSI-X table,
- * but we can map around it.
- */
- uint32_t table_start = msix_table_offset;
- uint32_t table_end = table_start + msix_table_size;
- table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
- table_start &= PAGE_MASK;
-
- if (table_start == 0 && table_end >= reg.size) {
- /* Cannot map this BAR */
- RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i);
- continue;
- } else {
- memreg[0].offset = reg.offset;
- memreg[0].size = table_start;
- memreg[1].offset = reg.offset + table_end;
- memreg[1].size = reg.size - table_end;
-
- RTE_LOG(DEBUG, EAL,
- "Trying to map BAR %d that contains the MSI-X "
- "table. Trying offsets: "
- "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i,
- memreg[0].offset, memreg[0].size,
- memreg[1].offset, memreg[1].size);
- }
- } else {
- memreg[0].offset = reg.offset;
- memreg[0].size = reg.size;
- }
+ /* try mapping somewhere close to the end of hugepages */
+ if (pci_map_addr == NULL)
+ pci_map_addr = pci_find_max_end_va();
- /* try to figure out an address */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- /* try mapping somewhere close to the end of hugepages */
- if (pci_map_addr == NULL)
- pci_map_addr = pci_find_max_end_va();
+ bar_addr = pci_map_addr;
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+
+ maps[i].addr = bar_addr;
+ maps[i].offset = reg.offset;
+ maps[i].size = reg.size;
+ maps[i].path = NULL; /* vfio doesn't have per-resource paths */
- bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
- } else {
- bar_addr = maps[i].addr;
+ ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
+ pci_addr, i, strerror(errno));
+ goto err_vfio_res;
}
- /* reserve the address using an inaccessible mapping */
- bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
- MAP_ANONYMOUS, -1, 0);
- if (bar_addr != MAP_FAILED) {
- void *map_addr = NULL;
- if (memreg[0].size) {
- /* actual map of first part */
- map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
- memreg[0].offset,
- memreg[0].size,
- MAP_FIXED);
- }
+ dev->mem_resource[i].addr = maps[i].addr;
+ }
- /* if there's a second part, try to map it */
- if (map_addr != MAP_FAILED
- && memreg[1].offset && memreg[1].size) {
- void *second_addr = RTE_PTR_ADD(bar_addr,
- memreg[1].offset -
- (uintptr_t)reg.offset);
- map_addr = pci_map_resource(second_addr,
- vfio_dev_fd, memreg[1].offset,
- memreg[1].size,
- MAP_FIXED);
- }
+ if (pci_vfio_setup_device(dev, vfio_dev_fd) < 0) {
+ RTE_LOG(ERR, EAL, " %s setup device failed\n", pci_addr);
+ goto err_vfio_res;
+ }
- if (map_addr == MAP_FAILED || !map_addr) {
- munmap(bar_addr, reg.size);
- bar_addr = MAP_FAILED;
- }
- }
+ TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
- if (bar_addr == MAP_FAILED ||
- (internal_config.process_type == RTE_PROC_SECONDARY &&
- bar_addr != maps[i].addr)) {
- RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i,
- strerror(errno));
- close(vfio_dev_fd);
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- rte_free(vfio_res);
- return -1;
- }
+ return 0;
+err_vfio_res:
+ rte_free(vfio_res);
+err_vfio_dev_fd:
+ close(vfio_dev_fd);
+ return -1;
+}
- maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
- maps[i].path = NULL; /* vfio doesn't have per-resource paths */
- dev->mem_resource[i].addr = bar_addr;
+static int
+pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
+{
+ struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+ char pci_addr[PATH_MAX] = {0};
+ int vfio_dev_fd;
+ struct rte_pci_addr *loc = &dev->addr;
+ int i, ret;
+ struct mapped_pci_resource *vfio_res = NULL;
+ struct mapped_pci_res_list *vfio_res_list =
+ RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+
+ struct pci_map *maps;
+
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+ /* store PCI address string */
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
+ &vfio_dev_fd, &device_info);
+ if (ret)
+ return ret;
+
+ /* if we're in a secondary process, just find our tailq entry */
+ TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+ if (rte_eal_compare_pci_addr(&vfio_res->pci_addr,
+ &dev->addr))
+ continue;
+ break;
+ }
+ /* if we haven't found our tailq entry, something's wrong */
+ if (vfio_res == NULL) {
+ RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
+ pci_addr);
+ goto err_vfio_dev_fd;
}
- /* if secondary process, do not set up interrupts */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
- RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr);
- close(vfio_dev_fd);
- rte_free(vfio_res);
- return -1;
- }
+ /* map BARs */
+ maps = vfio_res->maps;
- /* set bus mastering for the device */
- if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
- RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr);
- close(vfio_dev_fd);
- rte_free(vfio_res);
- return -1;
+ for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+ ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
+ pci_addr, i, strerror(errno));
+ goto err_vfio_dev_fd;
}
- /* Reset the device */
- ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
+ dev->mem_resource[i].addr = maps[i].addr;
}
- if (internal_config.process_type == RTE_PROC_PRIMARY)
- TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
-
return 0;
+err_vfio_dev_fd:
+ close(vfio_dev_fd);
+ return -1;
+}
+
+/*
+ * map the PCI resources of a PCI device in virtual memory (VFIO version).
+ * primary and secondary processes follow almost exactly the same path
+ */
+int
+pci_vfio_map_resource(struct rte_pci_device *dev)
+{
+ if (internal_config.process_type == RTE_PROC_PRIMARY)
+ return pci_vfio_map_resource_primary(dev);
+ else
+ return pci_vfio_map_resource_secondary(dev);
}
int
--
2.7.4
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] vfio: refactor PCI BAR mapping
2017-09-25 15:04 [PATCH v2] vfio: refactor PCI BAR mapping Jonas Pfefferle
@ 2017-10-04 13:13 ` Burakov, Anatoly
2017-10-06 14:06 ` Jonas Pfefferle1
0 siblings, 1 reply; 3+ messages in thread
From: Burakov, Anatoly @ 2017-10-04 13:13 UTC (permalink / raw)
To: Jonas Pfefferle, dev
On 25-Sep-17 4:04 PM, Jonas Pfefferle wrote:
> Split pci_vfio_map_resource for primary and secondary processes.
> Save all relevant mapping data in primary process to allow
> the secondary process to perform mappings.
>
> Signed-off-by: Jonas Pfefferle <jpf@zurich.ibm.com>
> ---
> v2:
> * fix zero size and offset when trying to mmap non msix bar
>
> lib/librte_eal/common/include/rte_pci.h | 7 +
> lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 446 +++++++++++++++++------------
> 2 files changed, 271 insertions(+), 182 deletions(-)
>
> diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
> index 8b12339..0821af9 100644
> --- a/lib/librte_eal/common/include/rte_pci.h
> +++ b/lib/librte_eal/common/include/rte_pci.h
> @@ -214,6 +214,12 @@ struct pci_map {
> uint64_t phaddr;
> };
>
> +struct pci_msix_table {
> + int bar_index;
> + uint32_t offset;
> + uint32_t size;
> +};
> +
> /**
> * A structure describing a mapped PCI resource.
> * For multi-process we need to reproduce all PCI mappings in secondary
> @@ -226,6 +232,7 @@ struct mapped_pci_resource {
> char path[PATH_MAX];
> int nb_maps;
> struct pci_map maps[PCI_MAX_RESOURCE];
> + struct pci_msix_table msix_table;
> };
>
> /** mapped pci device list */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index aa9d96e..6443bd5 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -88,8 +88,7 @@ pci_vfio_write_config(const struct rte_intr_handle *intr_handle,
>
> /* get PCI BAR number where MSI-X interrupts are */
> static int
> -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
> - uint32_t *msix_table_size)
> +pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
> {
> int ret;
> uint32_t reg;
> @@ -161,9 +160,10 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
> return -1;
> }
>
> - *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
> - *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> - *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
> + msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> + msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> + msix_table->size =
> + 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
>
> return 0;
> }
> @@ -300,25 +300,150 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
> return -1;
> }
>
> -/*
> - * map the PCI resources of a PCI device in virtual memory (VFIO version).
> - * primary and secondary processes follow almost exactly the same path
> - */
> -int
> -pci_vfio_map_resource(struct rte_pci_device *dev)
> +static int
> +pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
> +{
> + uint32_t ioport_bar;
> + int ret;
> +
> + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
> + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
> + + PCI_BASE_ADDRESS_0 + bar_index*4);
> + if (ret != sizeof(ioport_bar)) {
> + RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n",
> + PCI_BASE_ADDRESS_0 + bar_index*4);
> + return -1;
> + }
> +
> + return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO;
Not sure i like this. I think it's better to be explicit, e.g.
return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO != 0;
Makes no difference (both because return value is non-zero and because
PCI_BASE_ADDRESS_SPACE_IO is 0x01), but still, better to make intentions
clear i think.
Otherwise, did a quick smoke-test and it works, so
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
Keep the ack if you decide to submit a v3 :)
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] vfio: refactor PCI BAR mapping
2017-10-04 13:13 ` Burakov, Anatoly
@ 2017-10-06 14:06 ` Jonas Pfefferle1
0 siblings, 0 replies; 3+ messages in thread
From: Jonas Pfefferle1 @ 2017-10-06 14:06 UTC (permalink / raw)
To: Burakov, Anatoly; +Cc: dev
Hi Anatoly,
"Burakov, Anatoly" <anatoly.burakov@intel.com> wrote on 10/04/2017 03:13:22
PM:
> From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
> To: Jonas Pfefferle <jpf@zurich.ibm.com>, dev@dpdk.org
> Date: 10/04/2017 03:14 PM
> Subject: Re: [PATCH v2] vfio: refactor PCI BAR mapping
>
> On 25-Sep-17 4:04 PM, Jonas Pfefferle wrote:
> > Split pci_vfio_map_resource for primary and secondary processes.
> > Save all relevant mapping data in primary process to allow
> > the secondary process to perform mappings.
> >
> > Signed-off-by: Jonas Pfefferle <jpf@zurich.ibm.com>
> > ---
> > v2:
> > * fix zero size and offset when trying to mmap non msix bar
> >
> > lib/librte_eal/common/include/rte_pci.h | 7 +
> > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 446 +++++++++++++++
> ++------------
> > 2 files changed, 271 insertions(+), 182 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/
> librte_eal/common/include/rte_pci.h
> > index 8b12339..0821af9 100644
> > --- a/lib/librte_eal/common/include/rte_pci.h
> > +++ b/lib/librte_eal/common/include/rte_pci.h
> > @@ -214,6 +214,12 @@ struct pci_map {
> > uint64_t phaddr;
> > };
> >
> > +struct pci_msix_table {
> > + int bar_index;
> > + uint32_t offset;
> > + uint32_t size;
> > +};
> > +
> > /**
> > * A structure describing a mapped PCI resource.
> > * For multi-process we need to reproduce all PCI mappings in
secondary
> > @@ -226,6 +232,7 @@ struct mapped_pci_resource {
> > char path[PATH_MAX];
> > int nb_maps;
> > struct pci_map maps[PCI_MAX_RESOURCE];
> > + struct pci_msix_table msix_table;
> > };
> >
> > /** mapped pci device list */
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/
> librte_eal/linuxapp/eal/eal_pci_vfio.c
> > index aa9d96e..6443bd5 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> > @@ -88,8 +88,7 @@ pci_vfio_write_config(const struct
> rte_intr_handle *intr_handle,
> >
> > /* get PCI BAR number where MSI-X interrupts are */
> > static int
> > -pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t
*msix_table_offset,
> > - uint32_t *msix_table_size)
> > +pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table)
> > {
> > int ret;
> > uint32_t reg;
> > @@ -161,9 +160,10 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar,
> uint32_t *msix_table_offset,
> > return -1;
> > }
> >
> > - *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
> > - *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> > - *msix_table_size = 16 * (1 + (flags &
RTE_PCI_MSIX_FLAGS_QSIZE));
> > + msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR;
> > + msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
> > + msix_table->size =
> > + 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
> >
> > return 0;
> > }
> > @@ -300,25 +300,150 @@ pci_vfio_setup_interrupts(struct
> rte_pci_device *dev, int vfio_dev_fd)
> > return -1;
> > }
> >
> > -/*
> > - * map the PCI resources of a PCI device in virtual memory (VFIO
version).
> > - * primary and secondary processes follow almost exactly the same path
> > - */
> > -int
> > -pci_vfio_map_resource(struct rte_pci_device *dev)
> > +static int
> > +pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
> > +{
> > + uint32_t ioport_bar;
> > + int ret;
> > +
> > + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar),
> > + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
> > + + PCI_BASE_ADDRESS_0 + bar_index*4);
> > + if (ret != sizeof(ioport_bar)) {
> > + RTE_LOG(ERR, EAL, "Cannot read command (%x) from config
space!\n",
> > + PCI_BASE_ADDRESS_0 + bar_index*4);
> > + return -1;
> > + }
> > +
> > + return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO;
>
> Not sure i like this. I think it's better to be explicit, e.g.
> return ioport_bar & PCI_BASE_ADDRESS_SPACE_IO != 0;
>
> Makes no difference (both because return value is non-zero and because
> PCI_BASE_ADDRESS_SPACE_IO is 0x01), but still, better to make intentions
> clear i think.
I agree. I wanted to change this anyway but forgot. V3 is going out in a
sec. Thanks for the ack ;)
>
> Otherwise, did a quick smoke-test and it works, so
>
> Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
>
> Keep the ack if you decide to submit a v3 :)
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-10-06 14:07 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-25 15:04 [PATCH v2] vfio: refactor PCI BAR mapping Jonas Pfefferle
2017-10-04 13:13 ` Burakov, Anatoly
2017-10-06 14:06 ` Jonas Pfefferle1
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.