From: Alex Williamson <alex.williamson@redhat.com>
To: Lu Baolu <baolu.lu@linux.intel.com>
Cc: iommu@lists.linux-foundation.org, Tom Murphy <murphyt7@tcd.ie>
Subject: [Regression] Re: [PATCH 18/18] iommu/vt-d: Remove IOVA handling code from the non-dma_ops path
Date: Wed, 17 Jun 2020 14:06:39 -0600 [thread overview]
Message-ID: <20200617140639.7ed58926@x1.home> (raw)
In-Reply-To: <20200516062101.29541-19-baolu.lu@linux.intel.com>
On Sat, 16 May 2020 14:21:01 +0800
Lu Baolu <baolu.lu@linux.intel.com> wrote:
> From: Tom Murphy <murphyt7@tcd.ie>
>
> There's no need for the non-dma_ops path to keep track of IOVAs. The
> whole point of the non-dma_ops path is that it allows the IOVAs to be
> handled separately. The IOVA handling code removed in this patch is
> pointless.
>
> Signed-off-by: Tom Murphy <murphyt7@tcd.ie>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> drivers/iommu/intel-iommu.c | 95 +++++++++++++------------------------
> 1 file changed, 32 insertions(+), 63 deletions(-)
This commit results in a massive increase in memory use from the VT-d
code. I have a 16GB system where I reserve 7168 2MB hugespages for VM
usage (14GB), leaving the host with 2GB. I can no longer even boot the
host in this configuration. Bisecting to this commit, I find that
reverting this change shows the following memory usage difference
immediately after boot (no hugepages, nosmp, single user,
intel_iommu=on iommu=pt):
@e70b081c6f37:
total used free shared buff/cache available
Mem: 16090860 2219372 13673044 1040 198444 13602664
Swap: 2097148 0 2097148
reverting e70b081c6f37:
total used free shared buff/cache available
Mem: 16090852 101648 15789156 1040 200048 15719572
Swap: 2097148 0 2097148
More than 2GB of additional memory used! There's also a notable stall
during bootup for this allocation:
[ 9.703360] DMAR: No ATSR found
[ 9.709768] DMAR: dmar0: Using Queued invalidation
[ 9.719370] DMAR: dmar1: Using Queued invalidation
### 4+ seconds! ###
[ 14.076387] pci 0000:00:00.0: Adding to iommu group 0
[ 14.086515] pci 0000:00:01.0: Adding to iommu group 1
[ 14.096635] pci 0000:00:02.0: Adding to iommu group 2
Thanks,
Alex
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 3c5cc3424e90..f75d7d9c231f 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1892,11 +1892,6 @@ static int dmar_init_reserved_ranges(void)
> return 0;
> }
>
> -static void domain_reserve_special_ranges(struct dmar_domain *domain)
> -{
> - copy_reserved_iova(&reserved_iova_list, &domain->iovad);
> -}
> -
> static inline int guestwidth_to_adjustwidth(int gaw)
> {
> int agaw;
> @@ -1918,7 +1913,8 @@ static void domain_exit(struct dmar_domain *domain)
> domain_remove_dev_info(domain);
>
> /* destroy iovas */
> - put_iova_domain(&domain->iovad);
> + if (domain->domain.type == IOMMU_DOMAIN_DMA)
> + put_iova_domain(&domain->iovad);
>
> if (domain->pgd) {
> struct page *freelist;
> @@ -2627,19 +2623,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
> }
>
> static int iommu_domain_identity_map(struct dmar_domain *domain,
> - unsigned long long start,
> - unsigned long long end)
> + unsigned long first_vpfn,
> + unsigned long last_vpfn)
> {
> - unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
> - unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
> -
> - if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
> - dma_to_mm_pfn(last_vpfn))) {
> - pr_err("Reserving iova failed\n");
> - return -ENOMEM;
> - }
> -
> - pr_debug("Mapping reserved region %llx-%llx\n", start, end);
> /*
> * RMRR range might have overlap with physical memory range,
> * clear it first
> @@ -2677,7 +2663,8 @@ static int __init si_domain_init(int hw)
>
> for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
> ret = iommu_domain_identity_map(si_domain,
> - PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
> + mm_to_dma_pfn(start_pfn),
> + mm_to_dma_pfn(end_pfn));
> if (ret)
> return ret;
> }
> @@ -4547,58 +4534,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
> unsigned long val, void *v)
> {
> struct memory_notify *mhp = v;
> - unsigned long long start, end;
> - unsigned long start_vpfn, last_vpfn;
> + unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
> + unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
> + mhp->nr_pages - 1);
>
> switch (val) {
> case MEM_GOING_ONLINE:
> - start = mhp->start_pfn << PAGE_SHIFT;
> - end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
> - if (iommu_domain_identity_map(si_domain, start, end)) {
> - pr_warn("Failed to build identity map for [%llx-%llx]\n",
> - start, end);
> + if (iommu_domain_identity_map(si_domain,
> + start_vpfn, last_vpfn)) {
> + pr_warn("Failed to build identity map for [%lx-%lx]\n",
> + start_vpfn, last_vpfn);
> return NOTIFY_BAD;
> }
> break;
>
> case MEM_OFFLINE:
> case MEM_CANCEL_ONLINE:
> - start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
> - last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
> - while (start_vpfn <= last_vpfn) {
> - struct iova *iova;
> + {
> struct dmar_drhd_unit *drhd;
> struct intel_iommu *iommu;
> struct page *freelist;
>
> - iova = find_iova(&si_domain->iovad, start_vpfn);
> - if (iova == NULL) {
> - pr_debug("Failed get IOVA for PFN %lx\n",
> - start_vpfn);
> - break;
> - }
> -
> - iova = split_and_remove_iova(&si_domain->iovad, iova,
> - start_vpfn, last_vpfn);
> - if (iova == NULL) {
> - pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
> - start_vpfn, last_vpfn);
> - return NOTIFY_BAD;
> - }
> -
> - freelist = domain_unmap(si_domain, iova->pfn_lo,
> - iova->pfn_hi);
> + freelist = domain_unmap(si_domain,
> + start_vpfn, last_vpfn);
>
> rcu_read_lock();
> for_each_active_iommu(iommu, drhd)
> iommu_flush_iotlb_psi(iommu, si_domain,
> - iova->pfn_lo, iova_size(iova),
> + start_vpfn, mhp->nr_pages,
> !freelist, 0);
> rcu_read_unlock();
> dma_free_pagelist(freelist);
> -
> - start_vpfn = iova->pfn_hi + 1;
> - free_iova_mem(iova);
> }
> break;
> }
> @@ -4626,8 +4592,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
> for (did = 0; did < cap_ndoms(iommu->cap); did++) {
> domain = get_iommu_domain(iommu, (u16)did);
>
> - if (!domain)
> + if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
> continue;
> +
> free_cpu_cached_iovas(cpu, &domain->iovad);
> }
> }
> @@ -5037,9 +5004,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
> {
> int adjust_width;
>
> - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
> - domain_reserve_special_ranges(domain);
> -
> /* calculate AGAW */
> domain->gaw = guest_width;
> adjust_width = guestwidth_to_adjustwidth(guest_width);
> @@ -5058,11 +5022,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
> return 0;
> }
>
> +static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
> +{
> + init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
> + copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
> +
> + if (!intel_iommu_strict &&
> + init_iova_flush_queue(&dmar_domain->iovad,
> + iommu_flush_iova, iova_entry_free))
> + pr_info("iova flush queue initialization failed\n");
> +}
> +
> static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
> {
> struct dmar_domain *dmar_domain;
> struct iommu_domain *domain;
> - int ret;
>
> switch (type) {
> case IOMMU_DOMAIN_DMA:
> @@ -5079,13 +5053,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
> return NULL;
> }
>
> - if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
> - ret = init_iova_flush_queue(&dmar_domain->iovad,
> - iommu_flush_iova,
> - iova_entry_free);
> - if (ret)
> - pr_info("iova flush queue initialization failed\n");
> - }
> + if (type == IOMMU_DOMAIN_DMA)
> + intel_init_iova_domain(dmar_domain);
>
> domain_update_iommu_cap(dmar_domain);
>
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
next prev parent reply other threads:[~2020-06-17 20:06 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-16 6:20 [PATCH 00/18] [PULL REQUEST] iommu/vt-d: patches for v5.8 Lu Baolu
2020-05-16 6:20 ` [PATCH 01/18] iommu/vt-d: Move domain helper to header Lu Baolu
2020-05-16 6:20 ` [PATCH 02/18] iommu/vt-d: Use a helper function to skip agaw for SL Lu Baolu
2020-05-16 6:20 ` [PATCH 03/18] iommu/vt-d: Add nested translation helper function Lu Baolu
2020-05-16 6:20 ` [PATCH 04/18] iommu/vt-d: Add bind guest PASID support Lu Baolu
2020-05-16 6:20 ` [PATCH 05/18] iommu/vt-d: Support flushing more translation cache types Lu Baolu
2020-05-16 6:20 ` [PATCH 06/18] iommu/vt-d: Add svm/sva invalidate function Lu Baolu
2020-05-16 6:20 ` [PATCH 07/18] iommu/vt-d: Enlightened PASID allocation Lu Baolu
2020-05-16 6:20 ` [PATCH 08/18] iommu/vt-d: Add custom allocator for IOASID Lu Baolu
2020-05-16 6:20 ` [PATCH 09/18] iommu/vt-d: Add get_domain_info() helper Lu Baolu
2020-05-16 6:20 ` [PATCH 10/18] iommu/vt-d: Report SVA feature with generic flag Lu Baolu
2020-05-16 6:20 ` [PATCH 11/18] iommu/vt-d: Replace intel SVM APIs with generic SVA APIs Lu Baolu
2020-05-16 6:20 ` [PATCH 12/18] iommu/vt-d: Multiple descriptors per qi_submit_sync() Lu Baolu
2020-05-16 6:20 ` [PATCH 13/18] iommu/vt-d: debugfs: Add support to show inv queue internals Lu Baolu
2020-05-16 6:20 ` [PATCH 14/18] iommu/vt-d: Disable non-recoverable fault processing before unbind Lu Baolu
2020-05-16 6:20 ` [PATCH 15/18] iommu/vt-d: Add page request draining support Lu Baolu
2020-05-16 6:20 ` [PATCH 16/18] iommu/vt-d: Remove redundant IOTLB flush Lu Baolu
2020-05-16 6:21 ` [PATCH 17/18] iommu/vt-d: Remove duplicated check in intel_svm_bind_mm() Lu Baolu
2020-05-16 6:21 ` [PATCH 18/18] iommu/vt-d: Remove IOVA handling code from the non-dma_ops path Lu Baolu
2020-06-17 20:06 ` Alex Williamson [this message]
2020-06-18 1:52 ` [Regression] " Lu Baolu
2020-06-18 15:03 ` Alex Williamson
2020-06-18 23:38 ` Lu Baolu
2020-05-18 13:38 ` [PATCH 00/18] [PULL REQUEST] iommu/vt-d: patches for v5.8 Joerg Roedel
2020-05-18 18:50 ` Joerg Roedel
2020-05-19 1:46 ` Lu Baolu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200617140639.7ed58926@x1.home \
--to=alex.williamson@redhat.com \
--cc=baolu.lu@linux.intel.com \
--cc=iommu@lists.linux-foundation.org \
--cc=murphyt7@tcd.ie \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).