iommu.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: Lu Baolu <baolu.lu@linux.intel.com>
Cc: iommu@lists.linux-foundation.org, Tom Murphy <murphyt7@tcd.ie>
Subject: [Regression] Re: [PATCH 18/18] iommu/vt-d: Remove IOVA handling code from the non-dma_ops path
Date: Wed, 17 Jun 2020 14:06:39 -0600	[thread overview]
Message-ID: <20200617140639.7ed58926@x1.home> (raw)
In-Reply-To: <20200516062101.29541-19-baolu.lu@linux.intel.com>

On Sat, 16 May 2020 14:21:01 +0800
Lu Baolu <baolu.lu@linux.intel.com> wrote:

> From: Tom Murphy <murphyt7@tcd.ie>
> 
> There's no need for the non-dma_ops path to keep track of IOVAs. The
> whole point of the non-dma_ops path is that it allows the IOVAs to be
> handled separately. The IOVA handling code removed in this patch is
> pointless.
> 
> Signed-off-by: Tom Murphy <murphyt7@tcd.ie>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
>  drivers/iommu/intel-iommu.c | 95 +++++++++++++------------------------
>  1 file changed, 32 insertions(+), 63 deletions(-)

This commit results in a massive increase in memory use from the VT-d
code.  I have a 16GB system where I reserve 7168 2MB hugespages for VM
usage (14GB), leaving the host with 2GB.  I can no longer even boot the
host in this configuration.  Bisecting to this commit, I find that
reverting this change shows the following memory usage difference
immediately after boot (no hugepages, nosmp, single user,
intel_iommu=on iommu=pt):

@e70b081c6f37:
             total        used        free      shared  buff/cache   available
Mem:       16090860     2219372    13673044        1040      198444    13602664
Swap:       2097148           0     2097148

reverting e70b081c6f37:
              total        used        free      shared  buff/cache   available
Mem:       16090852      101648    15789156        1040      200048    15719572
Swap:       2097148           0     2097148

More than 2GB of additional memory used!  There's also a notable stall
during bootup for this allocation:

[    9.703360] DMAR: No ATSR found
[    9.709768] DMAR: dmar0: Using Queued invalidation
[    9.719370] DMAR: dmar1: Using Queued invalidation

### 4+ seconds! ###

[   14.076387] pci 0000:00:00.0: Adding to iommu group 0
[   14.086515] pci 0000:00:01.0: Adding to iommu group 1
[   14.096635] pci 0000:00:02.0: Adding to iommu group 2

Thanks,

Alex

> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 3c5cc3424e90..f75d7d9c231f 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1892,11 +1892,6 @@ static int dmar_init_reserved_ranges(void)
>  	return 0;
>  }
>  
> -static void domain_reserve_special_ranges(struct dmar_domain *domain)
> -{
> -	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
> -}
> -
>  static inline int guestwidth_to_adjustwidth(int gaw)
>  {
>  	int agaw;
> @@ -1918,7 +1913,8 @@ static void domain_exit(struct dmar_domain *domain)
>  	domain_remove_dev_info(domain);
>  
>  	/* destroy iovas */
> -	put_iova_domain(&domain->iovad);
> +	if (domain->domain.type == IOMMU_DOMAIN_DMA)
> +		put_iova_domain(&domain->iovad);
>  
>  	if (domain->pgd) {
>  		struct page *freelist;
> @@ -2627,19 +2623,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
>  }
>  
>  static int iommu_domain_identity_map(struct dmar_domain *domain,
> -				     unsigned long long start,
> -				     unsigned long long end)
> +				     unsigned long first_vpfn,
> +				     unsigned long last_vpfn)
>  {
> -	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
> -	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
> -
> -	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
> -			  dma_to_mm_pfn(last_vpfn))) {
> -		pr_err("Reserving iova failed\n");
> -		return -ENOMEM;
> -	}
> -
> -	pr_debug("Mapping reserved region %llx-%llx\n", start, end);
>  	/*
>  	 * RMRR range might have overlap with physical memory range,
>  	 * clear it first
> @@ -2677,7 +2663,8 @@ static int __init si_domain_init(int hw)
>  
>  		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
>  			ret = iommu_domain_identity_map(si_domain,
> -					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
> +					mm_to_dma_pfn(start_pfn),
> +					mm_to_dma_pfn(end_pfn));
>  			if (ret)
>  				return ret;
>  		}
> @@ -4547,58 +4534,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
>  				       unsigned long val, void *v)
>  {
>  	struct memory_notify *mhp = v;
> -	unsigned long long start, end;
> -	unsigned long start_vpfn, last_vpfn;
> +	unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
> +	unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
> +			mhp->nr_pages - 1);
>  
>  	switch (val) {
>  	case MEM_GOING_ONLINE:
> -		start = mhp->start_pfn << PAGE_SHIFT;
> -		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
> -		if (iommu_domain_identity_map(si_domain, start, end)) {
> -			pr_warn("Failed to build identity map for [%llx-%llx]\n",
> -				start, end);
> +		if (iommu_domain_identity_map(si_domain,
> +					      start_vpfn, last_vpfn)) {
> +			pr_warn("Failed to build identity map for [%lx-%lx]\n",
> +				start_vpfn, last_vpfn);
>  			return NOTIFY_BAD;
>  		}
>  		break;
>  
>  	case MEM_OFFLINE:
>  	case MEM_CANCEL_ONLINE:
> -		start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
> -		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
> -		while (start_vpfn <= last_vpfn) {
> -			struct iova *iova;
> +		{
>  			struct dmar_drhd_unit *drhd;
>  			struct intel_iommu *iommu;
>  			struct page *freelist;
>  
> -			iova = find_iova(&si_domain->iovad, start_vpfn);
> -			if (iova == NULL) {
> -				pr_debug("Failed get IOVA for PFN %lx\n",
> -					 start_vpfn);
> -				break;
> -			}
> -
> -			iova = split_and_remove_iova(&si_domain->iovad, iova,
> -						     start_vpfn, last_vpfn);
> -			if (iova == NULL) {
> -				pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
> -					start_vpfn, last_vpfn);
> -				return NOTIFY_BAD;
> -			}
> -
> -			freelist = domain_unmap(si_domain, iova->pfn_lo,
> -					       iova->pfn_hi);
> +			freelist = domain_unmap(si_domain,
> +						start_vpfn, last_vpfn);
>  
>  			rcu_read_lock();
>  			for_each_active_iommu(iommu, drhd)
>  				iommu_flush_iotlb_psi(iommu, si_domain,
> -					iova->pfn_lo, iova_size(iova),
> +					start_vpfn, mhp->nr_pages,
>  					!freelist, 0);
>  			rcu_read_unlock();
>  			dma_free_pagelist(freelist);
> -
> -			start_vpfn = iova->pfn_hi + 1;
> -			free_iova_mem(iova);
>  		}
>  		break;
>  	}
> @@ -4626,8 +4592,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
>  		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
>  			domain = get_iommu_domain(iommu, (u16)did);
>  
> -			if (!domain)
> +			if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
>  				continue;
> +
>  			free_cpu_cached_iovas(cpu, &domain->iovad);
>  		}
>  	}
> @@ -5037,9 +5004,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
>  {
>  	int adjust_width;
>  
> -	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
> -	domain_reserve_special_ranges(domain);
> -
>  	/* calculate AGAW */
>  	domain->gaw = guest_width;
>  	adjust_width = guestwidth_to_adjustwidth(guest_width);
> @@ -5058,11 +5022,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
>  	return 0;
>  }
>  
> +static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
> +{
> +	init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
> +	copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
> +
> +	if (!intel_iommu_strict &&
> +	    init_iova_flush_queue(&dmar_domain->iovad,
> +				  iommu_flush_iova, iova_entry_free))
> +		pr_info("iova flush queue initialization failed\n");
> +}
> +
>  static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
>  {
>  	struct dmar_domain *dmar_domain;
>  	struct iommu_domain *domain;
> -	int ret;
>  
>  	switch (type) {
>  	case IOMMU_DOMAIN_DMA:
> @@ -5079,13 +5053,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
>  			return NULL;
>  		}
>  
> -		if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
> -			ret = init_iova_flush_queue(&dmar_domain->iovad,
> -						    iommu_flush_iova,
> -						    iova_entry_free);
> -			if (ret)
> -				pr_info("iova flush queue initialization failed\n");
> -		}
> +		if (type == IOMMU_DOMAIN_DMA)
> +			intel_init_iova_domain(dmar_domain);
>  
>  		domain_update_iommu_cap(dmar_domain);
>  

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

  reply	other threads:[~2020-06-17 20:06 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-16  6:20 [PATCH 00/18] [PULL REQUEST] iommu/vt-d: patches for v5.8 Lu Baolu
2020-05-16  6:20 ` [PATCH 01/18] iommu/vt-d: Move domain helper to header Lu Baolu
2020-05-16  6:20 ` [PATCH 02/18] iommu/vt-d: Use a helper function to skip agaw for SL Lu Baolu
2020-05-16  6:20 ` [PATCH 03/18] iommu/vt-d: Add nested translation helper function Lu Baolu
2020-05-16  6:20 ` [PATCH 04/18] iommu/vt-d: Add bind guest PASID support Lu Baolu
2020-05-16  6:20 ` [PATCH 05/18] iommu/vt-d: Support flushing more translation cache types Lu Baolu
2020-05-16  6:20 ` [PATCH 06/18] iommu/vt-d: Add svm/sva invalidate function Lu Baolu
2020-05-16  6:20 ` [PATCH 07/18] iommu/vt-d: Enlightened PASID allocation Lu Baolu
2020-05-16  6:20 ` [PATCH 08/18] iommu/vt-d: Add custom allocator for IOASID Lu Baolu
2020-05-16  6:20 ` [PATCH 09/18] iommu/vt-d: Add get_domain_info() helper Lu Baolu
2020-05-16  6:20 ` [PATCH 10/18] iommu/vt-d: Report SVA feature with generic flag Lu Baolu
2020-05-16  6:20 ` [PATCH 11/18] iommu/vt-d: Replace intel SVM APIs with generic SVA APIs Lu Baolu
2020-05-16  6:20 ` [PATCH 12/18] iommu/vt-d: Multiple descriptors per qi_submit_sync() Lu Baolu
2020-05-16  6:20 ` [PATCH 13/18] iommu/vt-d: debugfs: Add support to show inv queue internals Lu Baolu
2020-05-16  6:20 ` [PATCH 14/18] iommu/vt-d: Disable non-recoverable fault processing before unbind Lu Baolu
2020-05-16  6:20 ` [PATCH 15/18] iommu/vt-d: Add page request draining support Lu Baolu
2020-05-16  6:20 ` [PATCH 16/18] iommu/vt-d: Remove redundant IOTLB flush Lu Baolu
2020-05-16  6:21 ` [PATCH 17/18] iommu/vt-d: Remove duplicated check in intel_svm_bind_mm() Lu Baolu
2020-05-16  6:21 ` [PATCH 18/18] iommu/vt-d: Remove IOVA handling code from the non-dma_ops path Lu Baolu
2020-06-17 20:06   ` Alex Williamson [this message]
2020-06-18  1:52     ` [Regression] " Lu Baolu
2020-06-18 15:03       ` Alex Williamson
2020-06-18 23:38         ` Lu Baolu
2020-05-18 13:38 ` [PATCH 00/18] [PULL REQUEST] iommu/vt-d: patches for v5.8 Joerg Roedel
2020-05-18 18:50 ` Joerg Roedel
2020-05-19  1:46   ` Lu Baolu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200617140639.7ed58926@x1.home \
    --to=alex.williamson@redhat.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=murphyt7@tcd.ie \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).