All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/4] vhost: register and change IOMMU flag depending on ATS state
@ 2023-04-24 11:21 Viktor Prutyanov
  2023-04-24 11:21 ` [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register Viktor Prutyanov
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Viktor Prutyanov @ 2023-04-24 11:21 UTC (permalink / raw)
  To: mst, jasowang, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich, Viktor Prutyanov

When IOMMU and vhost are enabled together, QEMU tracks IOTLB or
Device-TLB unmap events depending on whether Device-TLB is enabled. But
even if Device-TLB and PCI ATS is enabled, the guest can reject to use
it. For example, this situation appears when Windows Server 2022 is
running with intel-iommu with device-iotlb=on and virtio-net-pci with
vhost=on. The guest implies that no address translation info cached in
device IOTLB and doesn't send device IOTLB invalidation commands. So,
it leads to irrelevant address translations in vhost-net in the host
kernel. Therefore network frames from the guest in host tap interface
contains wrong payload data.

This series adds checking of ATS state for proper unmap flag register
(IOMMU_NOTIFIER_UNMAP or IOMMU_NOTIFIER_DEVIOTLB_UNMAP).

Tested on Windows Server 2022, Windows 11 and Fedora guests with
 -device virtio-net-pci,bus=pci.3,netdev=nd0,iommu_platform=on,ats=on
 -netdev tap,id=nd0,ifname=tap1,script=no,downscript=no,vhost=on
 -device intel-iommu,intremap=on,eim=on,device-iotlb=on/off

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2001312

Viktor Prutyanov (4):
  pci: add handling of Enable bit in ATS Control Register
  virtio-pci: add handling of ATS state change
  memory: add interface for triggering IOMMU notify_flag_changed handler
  vhost: register and change IOMMU flag depending on ATS state

 hw/pci/pci.c                |  1 +
 hw/pci/pcie.c               | 21 +++++++++++++++++++++
 hw/virtio/vhost.c           | 23 +++++++++++++++++++++--
 hw/virtio/virtio-pci.c      | 12 ++++++++++++
 include/exec/memory.h       |  2 ++
 include/hw/pci/pci_device.h |  3 +++
 include/hw/pci/pcie.h       |  4 ++++
 include/hw/virtio/virtio.h  |  2 ++
 softmmu/memory.c            | 12 ++++++++++++
 9 files changed, 78 insertions(+), 2 deletions(-)

-- 
2.21.0



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register
  2023-04-24 11:21 [RFC PATCH 0/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
@ 2023-04-24 11:21 ` Viktor Prutyanov
  2023-04-26  5:31   ` Jason Wang
  2023-04-24 11:21 ` [RFC PATCH 2/4] virtio-pci: add handling of ATS state change Viktor Prutyanov
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Viktor Prutyanov @ 2023-04-24 11:21 UTC (permalink / raw)
  To: mst, jasowang, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich, Viktor Prutyanov

According to PCIe Address Translation Services specification 5.1.3.,
ATS Control Register has Enable bit to enable/disable ATS.
Add a new field for a trigger function which is called at the Enable
bit change, so that PCIe devices can handle ATS enable/disable.

Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
---
 hw/pci/pci.c                |  1 +
 hw/pci/pcie.c               | 21 +++++++++++++++++++++
 include/hw/pci/pci_device.h |  3 +++
 include/hw/pci/pcie.h       |  4 ++++
 4 files changed, 29 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 208c16f450..79a47d2589 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1550,6 +1550,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
     msi_write_config(d, addr, val_in, l);
     msix_write_config(d, addr, val_in, l);
     pcie_sriov_config_write(d, addr, val_in, l);
+    pcie_ats_config_write(d, addr, val_in, l);
 }
 
 /***********************************************************/
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 924fdabd15..e0217161e5 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1057,6 +1057,27 @@ void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned)
     pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
 }
 
+void pcie_ats_config_write(PCIDevice *dev, uint32_t address, uint32_t val,
+                           int len)
+{
+    uint32_t off;
+    uint16_t ats_cap = dev->exp.ats_cap;
+
+    if (!ats_cap || address < ats_cap) {
+        return;
+    }
+    off = address - ats_cap;
+    if (off >= PCI_EXT_CAP_ATS_SIZEOF) {
+        return;
+    }
+
+    if (range_covers_byte(off, len, PCI_ATS_CTRL + 1)) {
+        if (dev->ats_ctrl_trigger) {
+            dev->ats_ctrl_trigger(dev, !!(val & PCI_ATS_CTRL_ENABLE));
+        }
+    }
+}
+
 /* ACS (Access Control Services) */
 void pcie_acs_init(PCIDevice *dev, uint16_t offset)
 {
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index d3dd0f64b2..2bb1d68f3b 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -160,6 +160,9 @@ struct PCIDevice {
     /* ID of standby device in net_failover pair */
     char *failover_pair_id;
     uint32_t acpi_index;
+
+    /* PCI ATS enable/disable trigger */
+    void (*ats_ctrl_trigger)(PCIDevice *dev, bool enable);
 };
 
 static inline int pci_intx(PCIDevice *pci_dev)
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 798a262a0a..5f2dbd87cf 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -154,4 +154,8 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
                              Error **errp);
 void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp);
+
+void pcie_ats_config_write(PCIDevice *dev, uint32_t address, uint32_t val,
+                           int len);
+
 #endif /* QEMU_PCIE_H */
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH 2/4] virtio-pci: add handling of ATS state change
  2023-04-24 11:21 [RFC PATCH 0/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
  2023-04-24 11:21 ` [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register Viktor Prutyanov
@ 2023-04-24 11:21 ` Viktor Prutyanov
  2023-04-26  5:50   ` Jason Wang
  2023-04-24 11:21 ` [RFC PATCH 3/4] memory: add interface for triggering IOMMU notify_flag_changed handler Viktor Prutyanov
  2023-04-24 11:21 ` [RFC PATCH 4/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
  3 siblings, 1 reply; 11+ messages in thread
From: Viktor Prutyanov @ 2023-04-24 11:21 UTC (permalink / raw)
  To: mst, jasowang, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich, Viktor Prutyanov

Guest may enable or disable ATS for the device. Add logic for handling
these events.

Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
---
 hw/virtio/virtio-pci.c     | 12 ++++++++++++
 include/hw/virtio/virtio.h |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 247325c193..70f63a4986 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1731,6 +1731,17 @@ static void virtio_pci_device_write(void *opaque, hwaddr addr,
     }
 }
 
+static void virtio_pci_ats_ctrl_trigger(PCIDevice *pci_dev, bool enable)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    vdev->ats_enabled = enable;
+
+    if (vdev->ats_ctrl_trigger)
+        vdev->ats_ctrl_trigger(enable, vdev);
+}
+
 static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
                                            const char *vdev_name)
 {
@@ -2166,6 +2177,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
         if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
             pcie_ats_init(pci_dev, last_pcie_cap_offset,
                           proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
+            pci_dev->ats_ctrl_trigger = virtio_pci_ats_ctrl_trigger;
             last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
         }
 
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 77c6c55929..2812561aae 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -155,6 +155,8 @@ struct VirtIODevice
     QLIST_HEAD(, VirtQueue) *vector_queues;
     QTAILQ_ENTRY(VirtIODevice) next;
     EventNotifier config_notifier;
+    void (*ats_ctrl_trigger)(bool enable, VirtIODevice *vdev);
+    bool ats_enabled;
 };
 
 struct VirtioDeviceClass {
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH 3/4] memory: add interface for triggering IOMMU notify_flag_changed handler
  2023-04-24 11:21 [RFC PATCH 0/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
  2023-04-24 11:21 ` [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register Viktor Prutyanov
  2023-04-24 11:21 ` [RFC PATCH 2/4] virtio-pci: add handling of ATS state change Viktor Prutyanov
@ 2023-04-24 11:21 ` Viktor Prutyanov
  2023-04-26 14:20   ` Peter Xu
  2023-04-24 11:21 ` [RFC PATCH 4/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
  3 siblings, 1 reply; 11+ messages in thread
From: Viktor Prutyanov @ 2023-04-24 11:21 UTC (permalink / raw)
  To: mst, jasowang, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich, Viktor Prutyanov

This interface helps if IOMMU notify_flag_changed should be triggered
after changing IOMMU notifier flags in the runtime.

Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
---
 include/exec/memory.h |  2 ++
 softmmu/memory.c      | 12 ++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 2e602a2fad..c8d5f69add 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1772,6 +1772,8 @@ void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n);
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
                                              IOMMUNotifier *n);
 
+void memory_region_iommu_notify_flags_changed(MemoryRegion *mr);
+
 /**
  * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is
  * defined on the IOMMU.
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 9d64efca26..e4a53cde6c 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -1950,6 +1950,18 @@ void memory_region_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
     }
 }
 
+void memory_region_iommu_notify_flags_changed(MemoryRegion *mr)
+{
+    IOMMUMemoryRegion *iommu_mr;
+
+    if (mr->alias) {
+        memory_region_iommu_notify_flags_changed(mr->alias);
+        return;
+    }
+    iommu_mr = IOMMU_MEMORY_REGION(mr);
+    memory_region_update_iommu_notify_flags(iommu_mr, NULL);
+}
+
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
                                              IOMMUNotifier *n)
 {
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH 4/4] vhost: register and change IOMMU flag depending on ATS state
  2023-04-24 11:21 [RFC PATCH 0/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
                   ` (2 preceding siblings ...)
  2023-04-24 11:21 ` [RFC PATCH 3/4] memory: add interface for triggering IOMMU notify_flag_changed handler Viktor Prutyanov
@ 2023-04-24 11:21 ` Viktor Prutyanov
  2023-04-26  5:54   ` Jason Wang
  3 siblings, 1 reply; 11+ messages in thread
From: Viktor Prutyanov @ 2023-04-24 11:21 UTC (permalink / raw)
  To: mst, jasowang, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich, Viktor Prutyanov

The guest can disable or never enable ATS. In these cases, Device-TLB
can't be used even if enabled in QEMU. So, check ATS state before
registering IOMMU notifier and select flag depending on that. Also,
change IOMMU notifier flag if ATS state is changed.

Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
---
 hw/virtio/vhost.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index eb8c4c378c..1b14937020 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -796,7 +796,9 @@ static void vhost_iommu_region_add(MemoryListener *listener,
     iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
                                                    MEMTXATTRS_UNSPECIFIED);
     iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
-                        IOMMU_NOTIFIER_DEVIOTLB_UNMAP,
+                        dev->vdev->ats_enabled ?
+                            IOMMU_NOTIFIER_DEVIOTLB_UNMAP :
+                            IOMMU_NOTIFIER_UNMAP,
                         section->offset_within_region,
                         int128_get64(end),
                         iommu_idx);
@@ -804,7 +806,8 @@ static void vhost_iommu_region_add(MemoryListener *listener,
     iommu->iommu_offset = section->offset_within_address_space -
                           section->offset_within_region;
     iommu->hdev = dev;
-    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
+    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n,
+            dev->vdev->ats_enabled ? NULL : &error_fatal);
     if (ret) {
         /*
          * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
@@ -818,6 +821,19 @@ static void vhost_iommu_region_add(MemoryListener *listener,
     /* TODO: can replay help performance here? */
 }
 
+static void vhost_deviotlb_ctrl_trigger(bool enable, struct VirtIODevice *vdev)
+{
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    struct vhost_dev *hdev = vdc->get_vhost(vdev);
+    struct vhost_iommu *iommu;
+
+    QLIST_FOREACH(iommu, &hdev->iommu_list, iommu_next) {
+        iommu->n.notifier_flags = enable ?
+            IOMMU_NOTIFIER_DEVIOTLB_UNMAP : IOMMU_NOTIFIER_UNMAP;
+        memory_region_iommu_notify_flags_changed(iommu->mr);
+    }
+}
+
 static void vhost_iommu_region_del(MemoryListener *listener,
                                    MemoryRegionSection *section)
 {
@@ -2000,6 +2016,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
             struct vhost_virtqueue *vq = hdev->vqs + i;
             vhost_device_iotlb_miss(hdev, vq->used_phys, true);
         }
+
+        vdev->ats_ctrl_trigger = vhost_deviotlb_ctrl_trigger;
     }
     vhost_start_config_intr(hdev);
     return 0;
@@ -2055,6 +2073,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
             hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
         }
         memory_listener_unregister(&hdev->iommu_listener);
+        vdev->ats_ctrl_trigger = NULL;
     }
     vhost_stop_config_intr(hdev);
     vhost_log_put(hdev, true);
-- 
2.21.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register
  2023-04-24 11:21 ` [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register Viktor Prutyanov
@ 2023-04-26  5:31   ` Jason Wang
  2023-04-26  5:48     ` Jason Wang
  2023-05-02 21:35     ` Viktor Prutyanov
  0 siblings, 2 replies; 11+ messages in thread
From: Jason Wang @ 2023-04-26  5:31 UTC (permalink / raw)
  To: Viktor Prutyanov, mst, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich


在 2023/4/24 19:21, Viktor Prutyanov 写道:
> According to PCIe Address Translation Services specification 5.1.3.,
> ATS Control Register has Enable bit to enable/disable ATS.
> Add a new field for a trigger function which is called at the Enable
> bit change, so that PCIe devices can handle ATS enable/disable.
>
> Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
> ---
>   hw/pci/pci.c                |  1 +
>   hw/pci/pcie.c               | 21 +++++++++++++++++++++
>   include/hw/pci/pci_device.h |  3 +++
>   include/hw/pci/pcie.h       |  4 ++++
>   4 files changed, 29 insertions(+)
>
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 208c16f450..79a47d2589 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -1550,6 +1550,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
>       msi_write_config(d, addr, val_in, l);
>       msix_write_config(d, addr, val_in, l);
>       pcie_sriov_config_write(d, addr, val_in, l);
> +    pcie_ats_config_write(d, addr, val_in, l);
>   }
>   
>   /***********************************************************/
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 924fdabd15..e0217161e5 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -1057,6 +1057,27 @@ void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned)
>       pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
>   }
>   
> +void pcie_ats_config_write(PCIDevice *dev, uint32_t address, uint32_t val,
> +                           int len)
> +{
> +    uint32_t off;
> +    uint16_t ats_cap = dev->exp.ats_cap;
> +
> +    if (!ats_cap || address < ats_cap) {
> +        return;
> +    }
> +    off = address - ats_cap;
> +    if (off >= PCI_EXT_CAP_ATS_SIZEOF) {
> +        return;
> +    }
> +
> +    if (range_covers_byte(off, len, PCI_ATS_CTRL + 1)) {


Do we really need +1 here?

The rest looks good.

Thanks


> +        if (dev->ats_ctrl_trigger) {
> +            dev->ats_ctrl_trigger(dev, !!(val & PCI_ATS_CTRL_ENABLE));
> +        }
> +    }
> +}
> +
>   /* ACS (Access Control Services) */
>   void pcie_acs_init(PCIDevice *dev, uint16_t offset)
>   {
> diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
> index d3dd0f64b2..2bb1d68f3b 100644
> --- a/include/hw/pci/pci_device.h
> +++ b/include/hw/pci/pci_device.h
> @@ -160,6 +160,9 @@ struct PCIDevice {
>       /* ID of standby device in net_failover pair */
>       char *failover_pair_id;
>       uint32_t acpi_index;
> +
> +    /* PCI ATS enable/disable trigger */
> +    void (*ats_ctrl_trigger)(PCIDevice *dev, bool enable);
>   };
>   
>   static inline int pci_intx(PCIDevice *pci_dev)
> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> index 798a262a0a..5f2dbd87cf 100644
> --- a/include/hw/pci/pcie.h
> +++ b/include/hw/pci/pcie.h
> @@ -154,4 +154,8 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
>                                Error **errp);
>   void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
>                                        DeviceState *dev, Error **errp);
> +
> +void pcie_ats_config_write(PCIDevice *dev, uint32_t address, uint32_t val,
> +                           int len);
> +
>   #endif /* QEMU_PCIE_H */



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register
  2023-04-26  5:31   ` Jason Wang
@ 2023-04-26  5:48     ` Jason Wang
  2023-05-02 21:35     ` Viktor Prutyanov
  1 sibling, 0 replies; 11+ messages in thread
From: Jason Wang @ 2023-04-26  5:48 UTC (permalink / raw)
  To: Viktor Prutyanov, mst, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich


在 2023/4/26 13:31, Jason Wang 写道:
>
> 在 2023/4/24 19:21, Viktor Prutyanov 写道:
>> According to PCIe Address Translation Services specification 5.1.3.,
>> ATS Control Register has Enable bit to enable/disable ATS.
>> Add a new field for a trigger function which is called at the Enable
>> bit change, so that PCIe devices can handle ATS enable/disable.
>>
>> Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
>> ---
>>   hw/pci/pci.c                |  1 +
>>   hw/pci/pcie.c               | 21 +++++++++++++++++++++
>>   include/hw/pci/pci_device.h |  3 +++
>>   include/hw/pci/pcie.h       |  4 ++++
>>   4 files changed, 29 insertions(+)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 208c16f450..79a47d2589 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -1550,6 +1550,7 @@ void pci_default_write_config(PCIDevice *d, 
>> uint32_t addr, uint32_t val_in, int
>>       msi_write_config(d, addr, val_in, l);
>>       msix_write_config(d, addr, val_in, l);
>>       pcie_sriov_config_write(d, addr, val_in, l);
>> +    pcie_ats_config_write(d, addr, val_in, l);
>>   }
>> /***********************************************************/
>> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
>> index 924fdabd15..e0217161e5 100644
>> --- a/hw/pci/pcie.c
>> +++ b/hw/pci/pcie.c
>> @@ -1057,6 +1057,27 @@ void pcie_ats_init(PCIDevice *dev, uint16_t 
>> offset, bool aligned)
>>       pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 
>> 0x800f);
>>   }
>>   +void pcie_ats_config_write(PCIDevice *dev, uint32_t address, 
>> uint32_t val,
>> +                           int len)
>> +{
>> +    uint32_t off;
>> +    uint16_t ats_cap = dev->exp.ats_cap;
>> +
>> +    if (!ats_cap || address < ats_cap) {
>> +        return;
>> +    }
>> +    off = address - ats_cap;
>> +    if (off >= PCI_EXT_CAP_ATS_SIZEOF) {
>> +        return;
>> +    }
>> +
>> +    if (range_covers_byte(off, len, PCI_ATS_CTRL + 1)) {
>
>
> Do we really need +1 here?
>
> The rest looks good.
>
> Thanks
>
>
>> +        if (dev->ats_ctrl_trigger) {
>> +            dev->ats_ctrl_trigger(dev, !!(val & PCI_ATS_CTRL_ENABLE));
>> +        }
>> +    }


Speak too fast.

We have virtio_write_config(), can we hook there? (We've already dealt 
with FLR and bus master there)

Thanks


>> +}
>> +
>>   /* ACS (Access Control Services) */
>>   void pcie_acs_init(PCIDevice *dev, uint16_t offset)
>>   {
>> diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
>> index d3dd0f64b2..2bb1d68f3b 100644
>> --- a/include/hw/pci/pci_device.h
>> +++ b/include/hw/pci/pci_device.h
>> @@ -160,6 +160,9 @@ struct PCIDevice {
>>       /* ID of standby device in net_failover pair */
>>       char *failover_pair_id;
>>       uint32_t acpi_index;
>> +
>> +    /* PCI ATS enable/disable trigger */
>> +    void (*ats_ctrl_trigger)(PCIDevice *dev, bool enable);
>>   };
>>     static inline int pci_intx(PCIDevice *pci_dev)
>> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
>> index 798a262a0a..5f2dbd87cf 100644
>> --- a/include/hw/pci/pcie.h
>> +++ b/include/hw/pci/pcie.h
>> @@ -154,4 +154,8 @@ void pcie_cap_slot_unplug_cb(HotplugHandler 
>> *hotplug_dev, DeviceState *dev,
>>                                Error **errp);
>>   void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
>>                                        DeviceState *dev, Error **errp);
>> +
>> +void pcie_ats_config_write(PCIDevice *dev, uint32_t address, 
>> uint32_t val,
>> +                           int len);
>> +
>>   #endif /* QEMU_PCIE_H */



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 2/4] virtio-pci: add handling of ATS state change
  2023-04-24 11:21 ` [RFC PATCH 2/4] virtio-pci: add handling of ATS state change Viktor Prutyanov
@ 2023-04-26  5:50   ` Jason Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Jason Wang @ 2023-04-26  5:50 UTC (permalink / raw)
  To: Viktor Prutyanov, mst, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich


在 2023/4/24 19:21, Viktor Prutyanov 写道:
> Guest may enable or disable ATS for the device. Add logic for handling
> these events.
>
> Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
> ---
>   hw/virtio/virtio-pci.c     | 12 ++++++++++++
>   include/hw/virtio/virtio.h |  2 ++
>   2 files changed, 14 insertions(+)
>
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 247325c193..70f63a4986 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1731,6 +1731,17 @@ static void virtio_pci_device_write(void *opaque, hwaddr addr,
>       }
>   }
>   
> +static void virtio_pci_ats_ctrl_trigger(PCIDevice *pci_dev, bool enable)
> +{
> +    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
> +    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> +
> +    vdev->ats_enabled = enable;
> +
> +    if (vdev->ats_ctrl_trigger)
> +        vdev->ats_ctrl_trigger(enable, vdev);
> +}
> +
>   static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
>                                              const char *vdev_name)
>   {
> @@ -2166,6 +2177,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
>           if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
>               pcie_ats_init(pci_dev, last_pcie_cap_offset,
>                             proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED);
> +            pci_dev->ats_ctrl_trigger = virtio_pci_ats_ctrl_trigger;
>               last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF;
>           }
>   
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 77c6c55929..2812561aae 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -155,6 +155,8 @@ struct VirtIODevice
>       QLIST_HEAD(, VirtQueue) *vector_queues;
>       QTAILQ_ENTRY(VirtIODevice) next;
>       EventNotifier config_notifier;
> +    void (*ats_ctrl_trigger)(bool enable, VirtIODevice *vdev);
> +    bool ats_enabled;


ATS is pci specific in the virtio layer we might use something more 
general like "device-iotlb"?

And this method needs to go in VirtioDeviceClass.

Thanks


>   };
>   
>   struct VirtioDeviceClass {



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 4/4] vhost: register and change IOMMU flag depending on ATS state
  2023-04-24 11:21 ` [RFC PATCH 4/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
@ 2023-04-26  5:54   ` Jason Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Jason Wang @ 2023-04-26  5:54 UTC (permalink / raw)
  To: Viktor Prutyanov, mst, marcel.apfelbaum, pbonzini, peterx, david
  Cc: philmd, qemu-devel, yan, yuri.benditovich


在 2023/4/24 19:21, Viktor Prutyanov 写道:
> The guest can disable or never enable ATS. In these cases, Device-TLB
> can't be used even if enabled in QEMU. So, check ATS state before
> registering IOMMU notifier and select flag depending on that. Also,
> change IOMMU notifier flag if ATS state is changed.
>
> Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
> ---
>   hw/virtio/vhost.c | 23 +++++++++++++++++++++--
>   1 file changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index eb8c4c378c..1b14937020 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -796,7 +796,9 @@ static void vhost_iommu_region_add(MemoryListener *listener,
>       iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
>                                                      MEMTXATTRS_UNSPECIFIED);
>       iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
> -                        IOMMU_NOTIFIER_DEVIOTLB_UNMAP,
> +                        dev->vdev->ats_enabled ?
> +                            IOMMU_NOTIFIER_DEVIOTLB_UNMAP :
> +                            IOMMU_NOTIFIER_UNMAP,
>                           section->offset_within_region,
>                           int128_get64(end),
>                           iommu_idx);
> @@ -804,7 +806,8 @@ static void vhost_iommu_region_add(MemoryListener *listener,
>       iommu->iommu_offset = section->offset_within_address_space -
>                             section->offset_within_region;
>       iommu->hdev = dev;
> -    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> +    ret = memory_region_register_iommu_notifier(section->mr, &iommu->n,
> +            dev->vdev->ats_enabled ? NULL : &error_fatal);
>       if (ret) {
>           /*
>            * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
> @@ -818,6 +821,19 @@ static void vhost_iommu_region_add(MemoryListener *listener,
>       /* TODO: can replay help performance here? */
>   }
>   
> +static void vhost_deviotlb_ctrl_trigger(bool enable, struct VirtIODevice *vdev)
> +{
> +    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
> +    struct vhost_dev *hdev = vdc->get_vhost(vdev);
> +    struct vhost_iommu *iommu;
> +
> +    QLIST_FOREACH(iommu, &hdev->iommu_list, iommu_next) {
> +        iommu->n.notifier_flags = enable ?
> +            IOMMU_NOTIFIER_DEVIOTLB_UNMAP : IOMMU_NOTIFIER_UNMAP;
> +        memory_region_iommu_notify_flags_changed(iommu->mr);
> +    }
> +}
> +
>   static void vhost_iommu_region_del(MemoryListener *listener,
>                                      MemoryRegionSection *section)
>   {
> @@ -2000,6 +2016,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
>               struct vhost_virtqueue *vq = hdev->vqs + i;
>               vhost_device_iotlb_miss(hdev, vq->used_phys, true);
>           }
> +
> +        vdev->ats_ctrl_trigger = vhost_deviotlb_ctrl_trigger;


Changing virtio method in the vhost seems a layer violation.

I wonder if the following is better

1) have a virtio-net method in VirtioDeviceClass as ats trigger

2) call vhost_ops to enable/disable device IOTLB in this ats trigger 
(and do nothing if there's no vhost attached)

Thanks


>       }
>       vhost_start_config_intr(hdev);
>       return 0;
> @@ -2055,6 +2073,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
>               hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
>           }
>           memory_listener_unregister(&hdev->iommu_listener);
> +        vdev->ats_ctrl_trigger = NULL;
>       }
>       vhost_stop_config_intr(hdev);
>       vhost_log_put(hdev, true);



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 3/4] memory: add interface for triggering IOMMU notify_flag_changed handler
  2023-04-24 11:21 ` [RFC PATCH 3/4] memory: add interface for triggering IOMMU notify_flag_changed handler Viktor Prutyanov
@ 2023-04-26 14:20   ` Peter Xu
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Xu @ 2023-04-26 14:20 UTC (permalink / raw)
  To: Viktor Prutyanov
  Cc: mst, jasowang, marcel.apfelbaum, pbonzini, david, philmd,
	qemu-devel, yan, yuri.benditovich

On Mon, Apr 24, 2023 at 02:21:46PM +0300, Viktor Prutyanov wrote:
> +void memory_region_iommu_notify_flags_changed(MemoryRegion *mr)
> +{
> +    IOMMUMemoryRegion *iommu_mr;
> +
> +    if (mr->alias) {
> +        memory_region_iommu_notify_flags_changed(mr->alias);
> +        return;
> +    }
> +    iommu_mr = IOMMU_MEMORY_REGION(mr);
> +    memory_region_update_iommu_notify_flags(iommu_mr, NULL);

Do we still want to trap the error if the update failed?

The other question: whether vhost can simply use the existing register /
unregister calls for iommu notifiers, rather than modifying the flags on
its own?  I'd assume this happens very rare anyway.  Or is there other
concerns?

-- 
Peter Xu



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register
  2023-04-26  5:31   ` Jason Wang
  2023-04-26  5:48     ` Jason Wang
@ 2023-05-02 21:35     ` Viktor Prutyanov
  1 sibling, 0 replies; 11+ messages in thread
From: Viktor Prutyanov @ 2023-05-02 21:35 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, marcel.apfelbaum, pbonzini, peterx, david, philmd,
	qemu-devel, yan, yuri.benditovich

On Wed, Apr 26, 2023 at 8:32 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/4/24 19:21, Viktor Prutyanov 写道:
> > According to PCIe Address Translation Services specification 5.1.3.,
> > ATS Control Register has Enable bit to enable/disable ATS.
> > Add a new field for a trigger function which is called at the Enable
> > bit change, so that PCIe devices can handle ATS enable/disable.
> >
> > Signed-off-by: Viktor Prutyanov <viktor@daynix.com>
> > ---
> >   hw/pci/pci.c                |  1 +
> >   hw/pci/pcie.c               | 21 +++++++++++++++++++++
> >   include/hw/pci/pci_device.h |  3 +++
> >   include/hw/pci/pcie.h       |  4 ++++
> >   4 files changed, 29 insertions(+)
> >
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index 208c16f450..79a47d2589 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -1550,6 +1550,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
> >       msi_write_config(d, addr, val_in, l);
> >       msix_write_config(d, addr, val_in, l);
> >       pcie_sriov_config_write(d, addr, val_in, l);
> > +    pcie_ats_config_write(d, addr, val_in, l);
> >   }
> >
> >   /***********************************************************/
> > diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> > index 924fdabd15..e0217161e5 100644
> > --- a/hw/pci/pcie.c
> > +++ b/hw/pci/pcie.c
> > @@ -1057,6 +1057,27 @@ void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned)
> >       pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
> >   }
> >
> > +void pcie_ats_config_write(PCIDevice *dev, uint32_t address, uint32_t val,
> > +                           int len)
> > +{
> > +    uint32_t off;
> > +    uint16_t ats_cap = dev->exp.ats_cap;
> > +
> > +    if (!ats_cap || address < ats_cap) {
> > +        return;
> > +    }
> > +    off = address - ats_cap;
> > +    if (off >= PCI_EXT_CAP_ATS_SIZEOF) {
> > +        return;
> > +    }
> > +
> > +    if (range_covers_byte(off, len, PCI_ATS_CTRL + 1)) {
>
>
> Do we really need +1 here?

The Enable bit is the 15th in the ATS Control Register, so it is in a
byte next to PCI_ATS_CTRL. Although I'm not sure that this is the
best way to test this bit.

All other comments I tried to take into account in the v2.

Thanks,
Viktor Prutyanov

>
> The rest looks good.
>
> Thanks
>
>
> > +        if (dev->ats_ctrl_trigger) {
> > +            dev->ats_ctrl_trigger(dev, !!(val & PCI_ATS_CTRL_ENABLE));
> > +        }
> > +    }
> > +}
> > +
> >   /* ACS (Access Control Services) */
> >   void pcie_acs_init(PCIDevice *dev, uint16_t offset)
> >   {
> > diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
> > index d3dd0f64b2..2bb1d68f3b 100644
> > --- a/include/hw/pci/pci_device.h
> > +++ b/include/hw/pci/pci_device.h
> > @@ -160,6 +160,9 @@ struct PCIDevice {
> >       /* ID of standby device in net_failover pair */
> >       char *failover_pair_id;
> >       uint32_t acpi_index;
> > +
> > +    /* PCI ATS enable/disable trigger */
> > +    void (*ats_ctrl_trigger)(PCIDevice *dev, bool enable);
> >   };
> >
> >   static inline int pci_intx(PCIDevice *pci_dev)
> > diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> > index 798a262a0a..5f2dbd87cf 100644
> > --- a/include/hw/pci/pcie.h
> > +++ b/include/hw/pci/pcie.h
> > @@ -154,4 +154,8 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
> >                                Error **errp);
> >   void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
> >                                        DeviceState *dev, Error **errp);
> > +
> > +void pcie_ats_config_write(PCIDevice *dev, uint32_t address, uint32_t val,
> > +                           int len);
> > +
> >   #endif /* QEMU_PCIE_H */
>


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2023-05-02 21:36 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-24 11:21 [RFC PATCH 0/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
2023-04-24 11:21 ` [RFC PATCH 1/4] pci: add handling of Enable bit in ATS Control Register Viktor Prutyanov
2023-04-26  5:31   ` Jason Wang
2023-04-26  5:48     ` Jason Wang
2023-05-02 21:35     ` Viktor Prutyanov
2023-04-24 11:21 ` [RFC PATCH 2/4] virtio-pci: add handling of ATS state change Viktor Prutyanov
2023-04-26  5:50   ` Jason Wang
2023-04-24 11:21 ` [RFC PATCH 3/4] memory: add interface for triggering IOMMU notify_flag_changed handler Viktor Prutyanov
2023-04-26 14:20   ` Peter Xu
2023-04-24 11:21 ` [RFC PATCH 4/4] vhost: register and change IOMMU flag depending on ATS state Viktor Prutyanov
2023-04-26  5:54   ` Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.