From: Kunkun Jiang <jiangkunkun@huawei.com>
To: Eric Auger <eric.auger@redhat.com>, <eric.auger.pro@gmail.com>,
<qemu-devel@nongnu.org>, <qemu-arm@nongnu.org>,
<alex.williamson@redhat.com>
Cc: peter.maydell@linaro.org, jacob.jun.pan@linux.intel.com,
jean-philippe@linaro.org, tn@semihalf.com,
chenxiang66@hisilicon.com, shameerali.kolothum.thodi@huawei.com,
nicoleotsuka@gmail.com, vivek.gautam@arm.com, vdumpa@nvidia.com,
yi.l.liu@intel.com, peterx@redhat.com, zhangfei.gao@gmail.com,
yuzenghui@huawei.com, zhukeqian1@huawei.com
Subject: Re: [RFC v9 14/29] vfio: Introduce helpers to DMA map/unmap a RAM section
Date: Tue, 27 Apr 2021 22:05:11 +0800 [thread overview]
Message-ID: <7e3defd3-50b5-c2df-77a7-9fa85fc3a448@huawei.com> (raw)
In-Reply-To: <20210411120912.15770-15-eric.auger@redhat.com>
Hi Eric,
On 2021/4/11 20:08, Eric Auger wrote:
> Let's introduce two helpers that allow to DMA map/unmap a RAM
> section. Those helpers will be called for nested stage setup in
> another call site. Also the vfio_listener_region_add/del()
> structure may be clearer.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>
> ---
>
> v8 -> v9
> - rebase on top of
> 1eb7f642750c ("vfio: Support host translation granule size")
>
> v5 -> v6:
> - add Error **
> ---
> hw/vfio/common.c | 199 +++++++++++++++++++++++++------------------
> hw/vfio/trace-events | 4 +-
> 2 files changed, 119 insertions(+), 84 deletions(-)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index a8f835328e..0cd7ef2139 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -662,13 +662,126 @@ hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end)
> return NULL;
> }
>
> +static int vfio_dma_map_ram_section(VFIOContainer *container,
> + MemoryRegionSection *section, Error **err)
> +{
> + VFIOHostDMAWindow *hostwin;
> + Int128 llend, llsize;
> + hwaddr iova, end;
> + void *vaddr;
> + int ret;
> +
> + assert(memory_region_is_ram(section->mr));
> +
> + iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> + llend = int128_make64(section->offset_within_address_space);
> + llend = int128_add(llend, section->size);
> + llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> + end = int128_get64(int128_sub(llend, int128_one()));
> +
> + vaddr = memory_region_get_ram_ptr(section->mr) +
> + section->offset_within_region +
> + (iova - section->offset_within_address_space);
> +
> + hostwin = hostwin_from_range(container, iova, end);
> + if (!hostwin) {
> + error_setg(err, "Container %p can't map guest IOVA region"
> + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
> + return -EFAULT;
> + }
> +
> + trace_vfio_dma_map_ram(iova, end, vaddr);
> +
> + llsize = int128_sub(llend, int128_make64(iova));
> +
> + if (memory_region_is_ram_device(section->mr)) {
> + hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> +
> + if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
> + trace_vfio_listener_region_add_no_dma_map(
> + memory_region_name(section->mr),
> + section->offset_within_address_space,
> + int128_getlo(section->size),
> + pgmask + 1);
> + return 0;
> + }
> + }
> +
> + ret = vfio_dma_map(container, iova, int128_get64(llsize),
> + vaddr, section->readonly);
> + if (ret) {
> + error_setg(err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> + "0x%"HWADDR_PRIx", %p) = %d (%m)",
> + container, iova, int128_get64(llsize), vaddr, ret);
> + if (memory_region_is_ram_device(section->mr)) {
> + /* Allow unexpected mappings not to be fatal for RAM devices */
> + error_report_err(*err);
> + return 0;
> + }
> + return ret;
> + }
> + return 0;
> +}
> +
> +static void vfio_dma_unmap_ram_section(VFIOContainer *container,
> + MemoryRegionSection *section)
> +{
> + Int128 llend, llsize;
> + hwaddr iova, end;
> + bool try_unmap = true;
> + int ret;
> +
> + iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
> + llend = int128_make64(section->offset_within_address_space);
> + llend = int128_add(llend, section->size);
> + llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
> +
> + if (int128_ge(int128_make64(iova), llend)) {
> + return;
> + }
> + end = int128_get64(int128_sub(llend, int128_one()));
> +
> + llsize = int128_sub(llend, int128_make64(iova));
> +
> + trace_vfio_dma_unmap_ram(iova, end);
> +
> + if (memory_region_is_ram_device(section->mr)) {
> + hwaddr pgmask;
> + VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
> +
> + assert(hostwin); /* or region_add() would have failed */
> +
> + pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> + try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
> + }
> +
> + if (try_unmap) {
> + if (int128_eq(llsize, int128_2_64())) {
> + /* The unmap ioctl doesn't accept a full 64-bit span. */
> + llsize = int128_rshift(llsize, 1);
> + ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> + if (ret) {
> + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> + "0x%"HWADDR_PRIx") = %d (%m)",
> + container, iova, int128_get64(llsize), ret);
> + }
> + iova += int128_get64(llsize);
> + }
> + ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> + if (ret) {
> + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> + "0x%"HWADDR_PRIx") = %d (%m)",
> + container, iova, int128_get64(llsize), ret);
> + }
> + }
> +}
> +
> static void vfio_listener_region_add(MemoryListener *listener,
> MemoryRegionSection *section)
> {
> VFIOContainer *container = container_of(listener, VFIOContainer, listener);
> hwaddr iova, end;
> - Int128 llend, llsize;
> - void *vaddr;
> + Int128 llend;
> int ret;
> VFIOHostDMAWindow *hostwin;
> Error *err = NULL;
> @@ -814,39 +927,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
> }
>
> /* Here we assume that memory_region_is_ram(section->mr)==true */
> -
> - vaddr = memory_region_get_ram_ptr(section->mr) +
> - section->offset_within_region +
> - (iova - section->offset_within_address_space);
> -
> - trace_vfio_listener_region_add_ram(iova, end, vaddr);
> -
> - llsize = int128_sub(llend, int128_make64(iova));
> -
> - if (memory_region_is_ram_device(section->mr)) {
> - hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> -
> - if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
> - trace_vfio_listener_region_add_no_dma_map(
> - memory_region_name(section->mr),
> - section->offset_within_address_space,
> - int128_getlo(section->size),
> - pgmask + 1);
> - return;
> - }
> - }
> -
> - ret = vfio_dma_map(container, iova, int128_get64(llsize),
> - vaddr, section->readonly);
> - if (ret) {
> - error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> - "0x%"HWADDR_PRIx", %p) = %d (%m)",
> - container, iova, int128_get64(llsize), vaddr, ret);
> - if (memory_region_is_ram_device(section->mr)) {
> - /* Allow unexpected mappings not to be fatal for RAM devices */
> - error_report_err(err);
> - return;
> - }
> + if (vfio_dma_map_ram_section(container, section, &err)) {
> goto fail;
> }
>
> @@ -880,10 +961,6 @@ static void vfio_listener_region_del(MemoryListener *listener,
> MemoryRegionSection *section)
> {
> VFIOContainer *container = container_of(listener, VFIOContainer, listener);
> - hwaddr iova, end;
> - Int128 llend, llsize;
> - int ret;
> - bool try_unmap = true;
>
> if (vfio_listener_skipped_section(section)) {
> trace_vfio_listener_region_del_skip(
> @@ -923,49 +1000,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
> */
> }
There are some codes of vfio_listener_region_del() that just doesn't
show up.
I post it below.
> if (memory_region_is_iommu(section->mr)) {
> VFIOGuestIOMMU *giommu;
>
> QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
> if (MEMORY_REGION(giommu->iommu) == section->mr &&
> giommu->n.start == section->offset_within_region) {
> memory_region_unregister_iommu_notifier(section->mr,
> &giommu->n);
> QLIST_REMOVE(giommu, giommu_next);
> g_free(giommu);
> break;
> }
> }
>
> /*
> * FIXME: We assume the one big unmap below is adequate to
> * remove any individual page mappings in the IOMMU which
> * might have been copied into VFIO. This works for a page table
> * based IOMMU where a big unmap flattens a large range of
> IO-PTEs.
> * That may not be true for all IOMMU types.
> */
> }
I think we need a check here. If it is nested mode, just return after
g_free(giommu). Because in nested mode, stage 2 (gpa->hpa) and the
stage 1 (giova->gpa) are set separately.
When hot delete a pci device, we are going to call
vfio_listener_region_del() and vfio_prereg_listener_region_del().
So it's not appropriate to unmap RAM section in
vfio_listener_region_del(). The RAM section will be unmap in
vfio_prereg_listener_region_del().
Thanks,
Kunkun Jiang
>
> - iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space);
> - llend = int128_make64(section->offset_within_address_space);
> - llend = int128_add(llend, section->size);
> - llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
> -
> - if (int128_ge(int128_make64(iova), llend)) {
> - return;
> - }
> - end = int128_get64(int128_sub(llend, int128_one()));
> -
> - llsize = int128_sub(llend, int128_make64(iova));
> -
> - trace_vfio_listener_region_del(iova, end);
> -
> - if (memory_region_is_ram_device(section->mr)) {
> - hwaddr pgmask;
> - VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end);
> -
> - assert(hostwin); /* or region_add() would have failed */
> -
> - pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
> - try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
> - }
> -
> - if (try_unmap) {
> - if (int128_eq(llsize, int128_2_64())) {
> - /* The unmap ioctl doesn't accept a full 64-bit span. */
> - llsize = int128_rshift(llsize, 1);
> - ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> - if (ret) {
> - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> - "0x%"HWADDR_PRIx") = %d (%m)",
> - container, iova, int128_get64(llsize), ret);
> - }
> - iova += int128_get64(llsize);
> - }
> - ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
> - if (ret) {
> - error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> - "0x%"HWADDR_PRIx") = %d (%m)",
> - container, iova, int128_get64(llsize), ret);
> - }
> - }
> + vfio_dma_unmap_ram_section(container, section);
>
> memory_region_unref(section->mr);
>
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 2a41326c0f..936d29d150 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -99,10 +99,10 @@ vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "i
> vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64
> vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
> vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
> -vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
> +vfio_dma_map_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
> vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
> vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
> -vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
> +vfio_dma_unmap_ram(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
> vfio_disconnect_container(int fd) "close container->fd=%d"
> vfio_connect_existing_container(int groupid, int container_fd) "group=%d existing container fd=%d"
> vfio_connect_new_container(int groupid, int container_fd) "group=%d new container fd=%d"
next prev parent reply other threads:[~2021-04-27 14:07 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-11 12:08 [RFC v9 00/29] vSMMUv3/pSMMUv3 2 stage VFIO integration Eric Auger
2021-04-11 12:08 ` [RFC v9 01/29] hw/vfio/common: trace vfio_connect_container operations Eric Auger
2021-04-11 12:08 ` [RFC v9 02/29] update-linux-headers: Import iommu.h Eric Auger
2021-04-11 12:08 ` [RFC v9 03/29] header update against 5.12-rc6 and IOMMU/VFIO nested stage APIs Eric Auger
2021-04-11 12:08 ` [RFC v9 04/29] memory: Add new fields in IOTLBEntry Eric Auger
2021-04-11 12:08 ` [RFC v9 05/29] hw/arm/smmuv3: Improve stage1 ASID invalidation Eric Auger
2021-04-11 12:08 ` [RFC v9 06/29] hw/arm/smmu-common: Allow domain invalidation for NH_ALL/NSNH_ALL Eric Auger
2021-04-11 12:08 ` [RFC v9 07/29] memory: Add IOMMU_ATTR_VFIO_NESTED IOMMU memory region attribute Eric Auger
2021-04-11 12:08 ` [RFC v9 08/29] memory: Add IOMMU_ATTR_MSI_TRANSLATE " Eric Auger
2021-04-11 12:08 ` [RFC v9 09/29] memory: Introduce IOMMU Memory Region inject_faults API Eric Auger
2021-04-11 12:08 ` [RFC v9 10/29] iommu: Introduce generic header Eric Auger
2021-04-11 12:08 ` [RFC v9 11/29] pci: introduce PCIPASIDOps to PCIDevice Eric Auger
2021-04-11 12:08 ` [RFC v9 12/29] vfio: Force nested if iommu requires it Eric Auger
2021-04-11 12:08 ` [RFC v9 13/29] vfio: Introduce hostwin_from_range helper Eric Auger
2021-04-11 12:08 ` [RFC v9 14/29] vfio: Introduce helpers to DMA map/unmap a RAM section Eric Auger
2021-04-27 14:05 ` Kunkun Jiang [this message]
2021-09-03 8:22 ` Kunkun Jiang
2021-04-11 12:08 ` [RFC v9 15/29] vfio: Set up nested stage mappings Eric Auger
2021-04-13 12:10 ` Kunkun Jiang
2021-04-13 12:57 ` Auger Eric
2021-04-14 1:45 ` Kunkun Jiang
2021-04-14 8:05 ` Auger Eric
2021-04-15 2:03 ` Kunkun Jiang
2021-04-26 19:16 ` Auger Eric
2021-04-28 9:51 ` Kunkun Jiang
2021-04-29 13:58 ` Auger Eric
2021-04-26 12:30 ` Auger Eric
2021-04-27 8:58 ` Kunkun Jiang
2021-10-07 16:58 ` Eric Auger
2021-10-08 2:13 ` Kunkun Jiang
2021-04-11 12:08 ` [RFC v9 16/29] vfio: Pass stage 1 MSI bindings to the host Eric Auger
2021-10-15 10:54 ` Shameerali Kolothum Thodi
2021-04-11 12:09 ` [RFC v9 17/29] vfio: Helper to get IRQ info including capabilities Eric Auger
2021-04-11 12:09 ` [RFC v9 18/29] vfio/pci: Register handler for iommu fault Eric Auger
2021-04-11 12:09 ` [RFC v9 19/29] vfio/pci: Set up the DMA FAULT region Eric Auger
2021-04-11 12:09 ` [RFC v9 20/29] vfio/pci: Implement the DMA fault handler Eric Auger
2021-04-11 12:09 ` [RFC v9 21/29] hw/arm/smmuv3: Advertise MSI_TRANSLATE attribute Eric Auger
2021-04-11 12:09 ` [RFC v9 22/29] hw/arm/smmuv3: Store the PASID table GPA in the translation config Eric Auger
2021-04-11 12:09 ` [RFC v9 23/29] hw/arm/smmuv3: Fill the IOTLBEntry arch_id on NH_VA invalidation Eric Auger
2021-04-11 12:09 ` [RFC v9 24/29] hw/arm/smmuv3: Fill the IOTLBEntry leaf field " Eric Auger
2021-05-13 7:09 ` Kunkun Jiang
2021-04-11 12:09 ` [RFC v9 25/29] hw/arm/smmuv3: Pass stage 1 configurations to the host Eric Auger
2021-04-11 12:09 ` [RFC v9 26/29] hw/arm/smmuv3: Implement fault injection Eric Auger
2021-04-11 12:09 ` [RFC v9 27/29] hw/arm/smmuv3: Allow MAP notifiers Eric Auger
2021-04-11 12:09 ` [RFC v9 28/29] pci: Add return_page_response pci ops Eric Auger
2021-04-11 12:09 ` [RFC v9 29/29] vfio/pci: Implement return_page_response page response callback Eric Auger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7e3defd3-50b5-c2df-77a7-9fa85fc3a448@huawei.com \
--to=jiangkunkun@huawei.com \
--cc=alex.williamson@redhat.com \
--cc=chenxiang66@hisilicon.com \
--cc=eric.auger.pro@gmail.com \
--cc=eric.auger@redhat.com \
--cc=jacob.jun.pan@linux.intel.com \
--cc=jean-philippe@linaro.org \
--cc=nicoleotsuka@gmail.com \
--cc=peter.maydell@linaro.org \
--cc=peterx@redhat.com \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=shameerali.kolothum.thodi@huawei.com \
--cc=tn@semihalf.com \
--cc=vdumpa@nvidia.com \
--cc=vivek.gautam@arm.com \
--cc=yi.l.liu@intel.com \
--cc=yuzenghui@huawei.com \
--cc=zhangfei.gao@gmail.com \
--cc=zhukeqian1@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).