From: Elena Ufimtseva <elena.ufimtseva@oracle.com>
To: qemu-devel@nongnu.org
Cc: elena.ufimtseva@oracle.com, john.g.johnson@oracle.com,
jag.raman@oracle.com, swapnil.ingle@nutanix.com,
john.levon@nutanix.com, alex.williamson@redhat.com,
stefanha@redhat.com, thanos.makatos@nutanix.com
Subject: [PATCH RFC v2 10/16] vfio-user: pci_user_realize PCI setup
Date: Mon, 16 Aug 2021 09:42:43 -0700 [thread overview]
Message-ID: <b6ccfc654915781b1bd16aec4e3f98600f2577ef.1629131628.git.elena.ufimtseva@oracle.com> (raw)
In-Reply-To: <cover.1629131628.git.elena.ufimtseva@oracle.com>
From: John Johnson <john.g.johnson@oracle.com>
PCI BARs read from remote device
PCI config reads/writes sent to remote server
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
hw/vfio/pci.c | 210 +++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 175 insertions(+), 35 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 63aa2441f0..ea0df8be65 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -807,8 +807,14 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
memset(vdev->rom, 0xff, size);
while (size) {
- bytes = pread(vdev->vbasedev.fd, vdev->rom + off,
- size, vdev->rom_offset + off);
+ if (vdev->vbasedev.proxy != NULL) {
+ bytes = vfio_user_region_read(&vdev->vbasedev,
+ VFIO_PCI_ROM_REGION_INDEX,
+ off, size, vdev->rom + off);
+ } else {
+ bytes = pread(vdev->vbasedev.fd, vdev->rom + off,
+ size, vdev->rom_offset + off);
+ }
if (bytes == 0) {
break;
} else if (bytes > 0) {
@@ -927,12 +933,28 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
* Use the same size ROM BAR as the physical device. The contents
* will get filled in later when the guest tries to read it.
*/
- if (pread(fd, &orig, 4, offset) != 4 ||
- pwrite(fd, &size, 4, offset) != 4 ||
- pread(fd, &size, 4, offset) != 4 ||
- pwrite(fd, &orig, 4, offset) != 4) {
- error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
- return;
+ if (vdev->vbasedev.proxy != NULL) {
+ if (vfio_user_region_read(&vdev->vbasedev, VFIO_PCI_CONFIG_REGION_INDEX,
+ PCI_ROM_ADDRESS, 4, &orig) != 4 ||
+ vfio_user_region_write(&vdev->vbasedev,
+ VFIO_PCI_CONFIG_REGION_INDEX,
+ PCI_ROM_ADDRESS, 4, &size) != 4 ||
+ vfio_user_region_read(&vdev->vbasedev, VFIO_PCI_CONFIG_REGION_INDEX,
+ PCI_ROM_ADDRESS, 4, &size) != 4 ||
+ vfio_user_region_write(&vdev->vbasedev,
+ VFIO_PCI_CONFIG_REGION_INDEX,
+ PCI_ROM_ADDRESS, 4, &orig) != 4) {
+ error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
+ return;
+ }
+ } else {
+ if (pread(fd, &orig, 4, offset) != 4 ||
+ pwrite(fd, &size, 4, offset) != 4 ||
+ pread(fd, &size, 4, offset) != 4 ||
+ pwrite(fd, &orig, 4, offset) != 4) {
+ error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
+ return;
+ }
}
size = ~(le32_to_cpu(size) & PCI_ROM_ADDRESS_MASK) + 1;
@@ -1123,8 +1145,14 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
if (~emu_bits & (0xffffffffU >> (32 - len * 8))) {
ssize_t ret;
- ret = pread(vdev->vbasedev.fd, &phys_val, len,
- vdev->config_offset + addr);
+ if (vdev->vbasedev.proxy != NULL) {
+ ret = vfio_user_region_read(&vdev->vbasedev,
+ VFIO_PCI_CONFIG_REGION_INDEX,
+ addr, len, &phys_val);
+ } else {
+ ret = pread(vdev->vbasedev.fd, &phys_val, len,
+ vdev->config_offset + addr);
+ }
if (ret != len) {
error_report("%s(%s, 0x%x, 0x%x) failed: %m",
__func__, vdev->vbasedev.name, addr, len);
@@ -1145,12 +1173,20 @@ void vfio_pci_write_config(PCIDevice *pdev,
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
uint32_t val_le = cpu_to_le32(val);
+ int ret;
trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len);
/* Write everything to VFIO, let it filter out what we can't write */
- if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr)
- != len) {
+ if (vdev->vbasedev.proxy != NULL) {
+ ret = vfio_user_region_write(&vdev->vbasedev,
+ VFIO_PCI_CONFIG_REGION_INDEX,
+ addr, len, &val_le);
+ } else {
+ ret = pwrite(vdev->vbasedev.fd, &val_le, len,
+ vdev->config_offset + addr);
+ }
+ if (ret != len) {
error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m",
__func__, vdev->vbasedev.name, addr, val, len);
}
@@ -1240,10 +1276,15 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
int ret, entries;
Error *err = NULL;
- if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
- vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
- error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS");
- return -errno;
+ if (vdev->vbasedev.proxy != NULL) {
+ /* during setup, config space was initialized from remote */
+ memcpy(&ctrl, vdev->pdev.config + pos + PCI_CAP_FLAGS, sizeof(ctrl));
+ } else {
+ if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
+ vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
+ error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS");
+ return -errno;
+ }
}
ctrl = le16_to_cpu(ctrl);
@@ -1456,22 +1497,30 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
return;
}
- if (pread(fd, &ctrl, sizeof(ctrl),
- vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) {
- error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS");
- return;
- }
+ if (vdev->vbasedev.proxy != NULL) {
+ /* during setup, config space was initialized from remote */
+ memcpy(&ctrl, vdev->pdev.config + pos + PCI_MSIX_FLAGS, sizeof(ctrl));
+ memcpy(&table, vdev->pdev.config + pos + PCI_MSIX_TABLE, sizeof(table));
+ memcpy(&pba, vdev->pdev.config + pos + PCI_MSIX_PBA, sizeof(pba));
+ } else {
+ if (pread(fd, &ctrl, sizeof(ctrl),
+ vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) {
+ error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS");
+ return;
+ }
- if (pread(fd, &table, sizeof(table),
- vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) {
- error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE");
- return;
- }
+ if (pread(fd, &table, sizeof(table),
+ vdev->config_offset + pos +
+ PCI_MSIX_TABLE) != sizeof(table)) {
+ error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE");
+ return;
+ }
- if (pread(fd, &pba, sizeof(pba),
- vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
- error_setg_errno(errp, errno, "failed to read PCI MSIX PBA");
- return;
+ if (pread(fd, &pba, sizeof(pba),
+ vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
+ error_setg_errno(errp, errno, "failed to read PCI MSIX PBA");
+ return;
+ }
}
ctrl = le16_to_cpu(ctrl);
@@ -1619,11 +1668,17 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
}
/* Determine what type of BAR this is for registration */
- ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
- vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
- if (ret != sizeof(pci_bar)) {
- error_report("vfio: Failed to read BAR %d (%m)", nr);
- return;
+ if (vdev->vbasedev.proxy != NULL) {
+ /* during setup, config space was initialized from remote */
+ memcpy(&pci_bar, vdev->pdev.config + PCI_BASE_ADDRESS_0 + (4 * nr),
+ sizeof(pci_bar));
+ } else {
+ ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
+ vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
+ if (ret != sizeof(pci_bar)) {
+ error_report("vfio: Failed to read BAR %d (%m)", nr);
+ return;
+ }
}
pci_bar = le32_to_cpu(pci_bar);
@@ -3423,6 +3478,91 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
goto error;
}
+ /* Get a copy of config space */
+ ret = vfio_user_region_read(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, 0,
+ MIN(pci_config_size(pdev), vdev->config_size),
+ pdev->config);
+ if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
+ error_setg_errno(errp, -ret, "failed to read device config space");
+ goto error;
+ }
+
+ /* vfio emulates a lot for us, but some bits need extra love */
+ vdev->emulated_config_bits = g_malloc0(vdev->config_size);
+
+ /* QEMU can choose to expose the ROM or not */
+ memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4);
+ /* QEMU can also add or extend BARs */
+ memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4);
+ vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
+ vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
+
+ /* QEMU can change multi-function devices to single function, or reverse */
+ vdev->emulated_config_bits[PCI_HEADER_TYPE] =
+ PCI_HEADER_TYPE_MULTI_FUNCTION;
+
+ /* Restore or clear multifunction, this is always controlled by QEMU */
+ if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
+ } else {
+ vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
+ }
+
+ /*
+ * Clear host resource mapping info. If we choose not to register a
+ * BAR, such as might be the case with the option ROM, we can get
+ * confusing, unwritable, residual addresses from the host here.
+ */
+ memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
+ memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
+
+ vfio_pci_size_rom(vdev);
+
+ vfio_bars_prepare(vdev);
+
+ vfio_msix_early_setup(vdev, &err);
+ if (err) {
+ error_propagate(errp, err);
+ goto error;
+ }
+
+ vfio_bars_register(vdev);
+
+ ret = vfio_add_capabilities(vdev, errp);
+ if (ret) {
+ goto out_teardown;
+ }
+
+ /* QEMU emulates all of MSI & MSIX */
+ if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
+ memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
+ MSIX_CAP_LENGTH);
+ }
+
+ if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
+ memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
+ vdev->msi_cap_size);
+ }
+
+ if (vdev->pdev.config[PCI_INTERRUPT_PIN] != 0) {
+ vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+ vfio_intx_mmap_enable, vdev);
+ pci_device_set_intx_routing_notifier(&vdev->pdev,
+ vfio_intx_routing_notifier);
+ vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
+ kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
+ ret = vfio_intx_enable(vdev, errp);
+ if (ret) {
+ goto out_deregister;
+ }
+ }
+
+out_deregister:
+ pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+out_teardown:
+ vfio_teardown_msi(vdev);
+ vfio_bars_exit(vdev);
error:
vfio_user_disconnect(proxy);
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
--
2.25.1
next prev parent reply other threads:[~2021-08-16 16:54 UTC|newest]
Thread overview: 108+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-16 16:42 [PATCH RFC v2 00/16] vfio-user implementation Elena Ufimtseva
2021-08-16 16:42 ` [PATCH RFC v2 01/16] vfio-user: introduce vfio-user protocol specification Elena Ufimtseva
2021-08-17 23:04 ` Alex Williamson
2021-08-19 9:28 ` Swapnil Ingle
2021-08-19 15:32 ` John Johnson
2021-08-19 16:26 ` Alex Williamson
2021-08-16 16:42 ` [PATCH RFC v2 02/16] vfio-user: add VFIO base abstract class Elena Ufimtseva
2021-08-16 16:42 ` [PATCH RFC v2 03/16] vfio-user: Define type vfio_user_pci_dev_info Elena Ufimtseva
2021-08-24 13:52 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 04/16] vfio-user: connect vfio proxy to remote server Elena Ufimtseva
2021-08-18 18:47 ` Alex Williamson
2021-08-19 14:10 ` John Johnson
2021-08-24 14:15 ` Stefan Hajnoczi
2021-08-30 3:00 ` John Johnson
2021-09-07 13:21 ` Stefan Hajnoczi
2021-09-09 5:11 ` John Johnson
2021-09-09 6:29 ` Stefan Hajnoczi
2021-09-10 5:25 ` John Johnson
2021-09-13 12:35 ` Stefan Hajnoczi
2021-09-13 17:23 ` John Johnson
2021-09-14 13:06 ` Stefan Hajnoczi
2021-09-15 0:21 ` John Johnson
2021-09-15 13:04 ` Stefan Hajnoczi
2021-09-15 19:14 ` John Johnson
2021-09-16 11:49 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 05/16] vfio-user: define VFIO Proxy and communication functions Elena Ufimtseva
2021-08-24 15:14 ` Stefan Hajnoczi
2021-08-30 3:04 ` John Johnson
2021-09-07 13:35 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 06/16] vfio-user: negotiate version with remote server Elena Ufimtseva
2021-08-24 15:59 ` Stefan Hajnoczi
2021-08-30 3:08 ` John Johnson
2021-09-07 13:52 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 07/16] vfio-user: get device info Elena Ufimtseva
2021-08-24 16:04 ` Stefan Hajnoczi
2021-08-30 3:11 ` John Johnson
2021-09-07 13:54 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 08/16] vfio-user: get region info Elena Ufimtseva
2021-09-07 14:31 ` Stefan Hajnoczi
2021-09-09 5:35 ` John Johnson
2021-09-09 5:59 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 09/16] vfio-user: region read/write Elena Ufimtseva
2021-09-07 14:41 ` Stefan Hajnoczi
2021-09-07 17:24 ` John Levon
2021-09-09 6:00 ` John Johnson
2021-09-09 12:05 ` John Levon
2021-09-10 6:07 ` John Johnson
2021-09-10 12:16 ` John Levon
2021-08-16 16:42 ` Elena Ufimtseva [this message]
2021-09-07 15:00 ` [PATCH RFC v2 10/16] vfio-user: pci_user_realize PCI setup Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 11/16] vfio-user: get and set IRQs Elena Ufimtseva
2021-09-07 15:14 ` Stefan Hajnoczi
2021-09-09 5:50 ` John Johnson
2021-09-09 13:50 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 12/16] vfio-user: proxy container connect/disconnect Elena Ufimtseva
2021-09-08 8:30 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 13/16] vfio-user: dma map/unmap operations Elena Ufimtseva
2021-09-08 9:16 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 14/16] vfio-user: dma read/write operations Elena Ufimtseva
2021-09-08 9:51 ` Stefan Hajnoczi
2021-09-08 11:03 ` John Levon
2021-08-16 16:42 ` [PATCH RFC v2 15/16] vfio-user: pci reset Elena Ufimtseva
2021-09-08 9:56 ` Stefan Hajnoczi
2021-08-16 16:42 ` [PATCH RFC v2 16/16] vfio-user: migration support Elena Ufimtseva
2021-09-08 10:04 ` Stefan Hajnoczi
2021-08-27 17:53 ` [PATCH RFC server v2 00/11] vfio-user server in QEMU Jagannathan Raman
2021-08-27 17:53 ` [PATCH RFC server v2 01/11] vfio-user: build library Jagannathan Raman
2021-08-27 18:05 ` Jag Raman
2021-09-08 12:25 ` Stefan Hajnoczi
2021-09-10 15:21 ` Philippe Mathieu-Daudé
2021-09-13 12:15 ` Stefan Hajnoczi
2021-09-10 15:20 ` Philippe Mathieu-Daudé
2021-09-10 17:08 ` Jag Raman
2021-09-11 22:29 ` John Levon
2021-09-13 10:19 ` Philippe Mathieu-Daudé
2021-08-27 17:53 ` [PATCH RFC server v2 02/11] vfio-user: define vfio-user object Jagannathan Raman
2021-09-08 12:37 ` Stefan Hajnoczi
2021-09-10 14:04 ` Jag Raman
2021-08-27 17:53 ` [PATCH RFC server v2 03/11] vfio-user: instantiate vfio-user context Jagannathan Raman
2021-09-08 12:40 ` Stefan Hajnoczi
2021-09-10 14:58 ` Jag Raman
2021-08-27 17:53 ` [PATCH RFC server v2 04/11] vfio-user: find and init PCI device Jagannathan Raman
2021-09-08 12:43 ` Stefan Hajnoczi
2021-09-10 15:02 ` Jag Raman
2021-08-27 17:53 ` [PATCH RFC server v2 05/11] vfio-user: run vfio-user context Jagannathan Raman
2021-09-08 12:58 ` Stefan Hajnoczi
2021-09-08 13:37 ` John Levon
2021-09-08 15:02 ` Stefan Hajnoczi
2021-09-08 15:21 ` John Levon
2021-09-08 15:46 ` Stefan Hajnoczi
2021-08-27 17:53 ` [PATCH RFC server v2 06/11] vfio-user: handle PCI config space accesses Jagannathan Raman
2021-09-09 7:27 ` Stefan Hajnoczi
2021-09-10 16:22 ` Jag Raman
2021-09-13 12:13 ` Stefan Hajnoczi
2021-08-27 17:53 ` [PATCH RFC server v2 07/11] vfio-user: handle DMA mappings Jagannathan Raman
2021-09-09 7:29 ` Stefan Hajnoczi
2021-08-27 17:53 ` [PATCH RFC server v2 08/11] vfio-user: handle PCI BAR accesses Jagannathan Raman
2021-09-09 7:37 ` Stefan Hajnoczi
2021-09-10 16:36 ` Jag Raman
2021-08-27 17:53 ` [PATCH RFC server v2 09/11] vfio-user: handle device interrupts Jagannathan Raman
2021-09-09 7:40 ` Stefan Hajnoczi
2021-08-27 17:53 ` [PATCH RFC server v2 10/11] vfio-user: register handlers to facilitate migration Jagannathan Raman
2021-09-09 8:14 ` Stefan Hajnoczi
2021-08-27 17:53 ` [PATCH RFC server v2 11/11] vfio-user: acceptance test Jagannathan Raman
2021-09-08 10:08 ` [PATCH RFC server v2 00/11] vfio-user server in QEMU Stefan Hajnoczi
2021-09-08 12:06 ` Jag Raman
2021-09-09 8:17 ` Stefan Hajnoczi
2021-09-10 14:02 ` Jag Raman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b6ccfc654915781b1bd16aec4e3f98600f2577ef.1629131628.git.elena.ufimtseva@oracle.com \
--to=elena.ufimtseva@oracle.com \
--cc=alex.williamson@redhat.com \
--cc=jag.raman@oracle.com \
--cc=john.g.johnson@oracle.com \
--cc=john.levon@nutanix.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
--cc=swapnil.ingle@nutanix.com \
--cc=thanos.makatos@nutanix.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).