All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: "xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: "Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Paul Durrant" <paul@xen.org>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [PATCH v7 06/14] VT-d: allow use of superpage mappings
Date: Tue, 5 Jul 2022 14:46:33 +0200	[thread overview]
Message-ID: <629e7e8a-b846-14cd-08eb-75433c8c3369@suse.com> (raw)
In-Reply-To: <4c9d4b32-d4db-3a4f-fe92-6022e63c3f82@suse.com>

... depending on feature availability (and absence of quirks).

Also make the page table dumping function aware of superpages.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
---
v6: Re-base over addition of "iommu=no-superpages" command line option.
v5: In intel_iommu_{,un}map_page() assert page order is supported.
v4: Change type of queue_free_pt()'s 1st parameter. Re-base.
v3: Rename queue_free_pt()'s last parameter. Replace "level > 1" checks
    where possible. Tighten assertion.

--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -779,18 +779,37 @@ static int __must_check cf_check iommu_f
     return ret;
 }
 
+static void queue_free_pt(struct domain_iommu *hd, mfn_t mfn, unsigned int level)
+{
+    if ( level > 1 )
+    {
+        struct dma_pte *pt = map_domain_page(mfn);
+        unsigned int i;
+
+        for ( i = 0; i < PTE_NUM; ++i )
+            if ( dma_pte_present(pt[i]) && !dma_pte_superpage(pt[i]) )
+                queue_free_pt(hd, maddr_to_mfn(dma_pte_addr(pt[i])),
+                              level - 1);
+
+        unmap_domain_page(pt);
+    }
+
+    iommu_queue_free_pgtable(hd, mfn_to_page(mfn));
+}
+
 /* clear one page's page table */
 static int dma_pte_clear_one(struct domain *domain, daddr_t addr,
                              unsigned int order,
                              unsigned int *flush_flags)
 {
     struct domain_iommu *hd = dom_iommu(domain);
-    struct dma_pte *page = NULL, *pte = NULL;
+    struct dma_pte *page = NULL, *pte = NULL, old;
     u64 pg_maddr;
+    unsigned int level = (order / LEVEL_STRIDE) + 1;
 
     spin_lock(&hd->arch.mapping_lock);
-    /* get last level pte */
-    pg_maddr = addr_to_dma_page_maddr(domain, addr, 1, flush_flags, false);
+    /* get target level pte */
+    pg_maddr = addr_to_dma_page_maddr(domain, addr, level, flush_flags, false);
     if ( pg_maddr < PAGE_SIZE )
     {
         spin_unlock(&hd->arch.mapping_lock);
@@ -798,7 +817,7 @@ static int dma_pte_clear_one(struct doma
     }
 
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
-    pte = page + address_level_offset(addr, 1);
+    pte = &page[address_level_offset(addr, level)];
 
     if ( !dma_pte_present(*pte) )
     {
@@ -807,14 +826,20 @@ static int dma_pte_clear_one(struct doma
         return 0;
     }
 
+    old = *pte;
     dma_clear_pte(*pte);
-    *flush_flags |= IOMMU_FLUSHF_modified;
 
     spin_unlock(&hd->arch.mapping_lock);
     iommu_sync_cache(pte, sizeof(struct dma_pte));
 
     unmap_vtd_domain_page(page);
 
+    *flush_flags |= IOMMU_FLUSHF_modified;
+
+    if ( order && !dma_pte_superpage(old) )
+        queue_free_pt(hd, maddr_to_mfn(dma_pte_addr(old)),
+                      order / LEVEL_STRIDE);
+
     return 0;
 }
 
@@ -2092,8 +2117,12 @@ static int __must_check cf_check intel_i
     struct domain_iommu *hd = dom_iommu(d);
     struct dma_pte *page, *pte, old, new = {};
     u64 pg_maddr;
+    unsigned int level = (IOMMUF_order(flags) / LEVEL_STRIDE) + 1;
     int rc = 0;
 
+    ASSERT((hd->platform_ops->page_sizes >> IOMMUF_order(flags)) &
+           PAGE_SIZE_4K);
+
     /* Do nothing if VT-d shares EPT page table */
     if ( iommu_use_hap_pt(d) )
         return 0;
@@ -2116,7 +2145,7 @@ static int __must_check cf_check intel_i
         return 0;
     }
 
-    pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), 1, flush_flags,
+    pg_maddr = addr_to_dma_page_maddr(d, dfn_to_daddr(dfn), level, flush_flags,
                                       true);
     if ( pg_maddr < PAGE_SIZE )
     {
@@ -2125,13 +2154,15 @@ static int __must_check cf_check intel_i
     }
 
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
-    pte = &page[dfn_x(dfn) & LEVEL_MASK];
+    pte = &page[address_level_offset(dfn_to_daddr(dfn), level)];
     old = *pte;
 
     dma_set_pte_addr(new, mfn_to_maddr(mfn));
     dma_set_pte_prot(new,
                      ((flags & IOMMUF_readable) ? DMA_PTE_READ  : 0) |
                      ((flags & IOMMUF_writable) ? DMA_PTE_WRITE : 0));
+    if ( IOMMUF_order(flags) )
+        dma_set_pte_superpage(new);
 
     /* Set the SNP on leaf page table if Snoop Control available */
     if ( iommu_snoop )
@@ -2152,14 +2183,26 @@ static int __must_check cf_check intel_i
 
     *flush_flags |= IOMMU_FLUSHF_added;
     if ( dma_pte_present(old) )
+    {
         *flush_flags |= IOMMU_FLUSHF_modified;
 
+        if ( IOMMUF_order(flags) && !dma_pte_superpage(old) )
+            queue_free_pt(hd, maddr_to_mfn(dma_pte_addr(old)),
+                          IOMMUF_order(flags) / LEVEL_STRIDE);
+    }
+
     return rc;
 }
 
 static int __must_check cf_check intel_iommu_unmap_page(
     struct domain *d, dfn_t dfn, unsigned int order, unsigned int *flush_flags)
 {
+    /*
+     * While really we could unmap at any granularity, for now we assume unmaps
+     * are issued by common code only at the same granularity as maps.
+     */
+    ASSERT((dom_iommu(d)->platform_ops->page_sizes >> order) & PAGE_SIZE_4K);
+
     /* Do nothing if VT-d shares EPT page table */
     if ( iommu_use_hap_pt(d) )
         return 0;
@@ -2516,6 +2559,7 @@ static int __init cf_check vtd_setup(voi
 {
     struct acpi_drhd_unit *drhd;
     struct vtd_iommu *iommu;
+    unsigned int large_sizes = iommu_superpages ? PAGE_SIZE_2M | PAGE_SIZE_1G : 0;
     int ret;
     bool reg_inval_supported = true;
 
@@ -2558,6 +2602,11 @@ static int __init cf_check vtd_setup(voi
                cap_sps_2mb(iommu->cap) ? ", 2MB" : "",
                cap_sps_1gb(iommu->cap) ? ", 1GB" : "");
 
+        if ( !cap_sps_2mb(iommu->cap) )
+            large_sizes &= ~PAGE_SIZE_2M;
+        if ( !cap_sps_1gb(iommu->cap) )
+            large_sizes &= ~PAGE_SIZE_1G;
+
 #ifndef iommu_snoop
         if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
             iommu_snoop = false;
@@ -2629,6 +2678,9 @@ static int __init cf_check vtd_setup(voi
     if ( ret )
         goto error;
 
+    ASSERT(iommu_ops.page_sizes == PAGE_SIZE_4K);
+    iommu_ops.page_sizes |= large_sizes;
+
     register_keyhandler('V', vtd_dump_iommu_info, "dump iommu info", 1);
 
     return 0;
@@ -2961,7 +3013,7 @@ static void vtd_dump_page_table_level(pa
             continue;
 
         address = gpa + offset_level_address(i, level);
-        if ( next_level >= 1 ) 
+        if ( next_level && !dma_pte_superpage(*pte) )
             vtd_dump_page_table_level(dma_pte_addr(*pte), next_level,
                                       address, indent + 1);
         else



  parent reply	other threads:[~2022-07-05 12:48 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-05 12:41 [PATCH v7 00/14] IOMMU: superpage support when not sharing pagetables Jan Beulich
2022-07-05 12:43 ` [PATCH v7 01/14] iommu: add preemption support to iommu_{un,}map() Jan Beulich
2022-07-22 16:31   ` Durrant, Paul
2022-07-05 12:44 ` [PATCH v7 02/14] IOMMU/x86: perform PV Dom0 mappings in batches Jan Beulich
2022-07-22 16:36   ` Durrant, Paul
2022-07-05 12:44 ` [PATCH v7 03/14] IOMMU/x86: support freeing of pagetables Jan Beulich
2022-07-22 16:44   ` Durrant, Paul
2022-07-05 12:45 ` [PATCH v7 04/14] IOMMU/x86: new command line option to suppress use of superpage mappings Jan Beulich
2022-07-07  2:26   ` Tian, Kevin
2022-07-22 16:46   ` Durrant, Paul
2022-07-05 12:45 ` [PATCH v7 05/14] AMD/IOMMU: allow " Jan Beulich
2022-07-22 16:48   ` Durrant, Paul
2022-07-05 12:46 ` Jan Beulich [this message]
2022-07-22 16:51   ` [PATCH v7 06/14] VT-d: " Durrant, Paul
2022-07-05 12:47 ` [PATCH v7 07/14] x86: introduce helper for recording degree of contiguity in page tables Jan Beulich
2022-07-22 16:54   ` Durrant, Paul
2022-07-05 12:47 ` [PATCH v7 08/14] IOMMU/x86: prefill newly allocate " Jan Beulich
2022-07-22 16:56   ` Durrant, Paul
2022-07-05 12:47 ` [PATCH v7 09/14] AMD/IOMMU: free all-empty " Jan Beulich
2022-07-22 16:58   ` Durrant, Paul
2022-07-05 12:48 ` [PATCH v7 10/14] VT-d: " Jan Beulich
2022-07-22 17:00   ` Durrant, Paul
2022-07-05 12:48 ` [PATCH v7 11/14] AMD/IOMMU: replace all-contiguous page tables by superpage mappings Jan Beulich
2022-07-22 17:02   ` Durrant, Paul
2022-07-05 12:48 ` [PATCH v7 12/14] VT-d: " Jan Beulich
2022-07-22 17:09   ` Durrant, Paul
2022-07-05 12:49 ` [PATCH v7 13/14] IOMMU/x86: add perf counters for page table splitting / coalescing Jan Beulich
2022-07-22 17:09   ` Durrant, Paul
2022-07-05 12:49 ` [PATCH v7 14/14] VT-d: fold dma_pte_clear_one() into its only caller Jan Beulich
2022-07-22 17:11   ` Durrant, Paul
2022-07-05 12:51 ` [PATCH v7 00/14] IOMMU: superpage support when not sharing pagetables Jan Beulich
2022-07-07  6:20 ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=629e7e8a-b846-14cd-08eb-75433c8c3369@suse.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=paul@xen.org \
    --cc=roger.pau@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.