All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: "xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: Paul Durrant <paul@xen.org>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	George Dunlap <george.dunlap@citrix.com>,
	Ian Jackson <iwj@xenproject.org>, Julien Grall <julien@xen.org>,
	Stefano Stabellini <sstabellini@kernel.org>, Wei Liu <wl@xen.org>
Subject: [PATCH 13/17] AMD/IOMMU: allow use of superpage mappings
Date: Tue, 24 Aug 2021 16:25:26 +0200	[thread overview]
Message-ID: <ecdf0636-0428-de38-8d6d-fad10c962d5a@suse.com> (raw)
In-Reply-To: <d955d9a3-33f6-f228-d007-a844ed918168@suse.com>

No separate feature flags exist which would control availability of
these; the only restriction is HATS (establishing the maximum number of
page table levels in general), and even that has a lower bound of 4.
Thus we can unconditionally announce 2M, 1G, and 512G mappings. (Via
non-default page sizes the implementation in principle permits arbitrary
size mappings, but these require multiple identical leaf PTEs to be
written, which isn't all that different from having to write multiple
consecutive PTEs with increasing frame numbers. IMO that's therefore
beneficial only on hardware where suitable TLBs exist; I'm unaware of
such hardware.)

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
I'm not fully sure about allowing 512G mappings: The scheduling-for-
freeing of intermediate page tables can take quite a while when
replacing a tree of 4k mappings by a single 512G one. Plus (or otoh)
there's no present code path via which 512G chunks of memory could be
allocated (and hence mapped) anyway.

--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -32,12 +32,13 @@ static unsigned int pfn_to_pde_idx(unsig
 }
 
 static union amd_iommu_pte clear_iommu_pte_present(unsigned long l1_mfn,
-                                                   unsigned long dfn)
+                                                   unsigned long dfn,
+                                                   unsigned int level)
 {
     union amd_iommu_pte *table, *pte, old;
 
     table = map_domain_page(_mfn(l1_mfn));
-    pte = &table[pfn_to_pde_idx(dfn, 1)];
+    pte = &table[pfn_to_pde_idx(dfn, level)];
     old = *pte;
 
     write_atomic(&pte->raw, 0);
@@ -288,10 +289,31 @@ static int iommu_pde_from_dfn(struct dom
     return 0;
 }
 
+static void queue_free_pt(struct domain *d, mfn_t mfn, unsigned int next_level)
+{
+    if ( next_level > 1 )
+    {
+        union amd_iommu_pte *pt = map_domain_page(mfn);
+        unsigned int i;
+
+        for ( i = 0; i < PTE_PER_TABLE_SIZE; ++i )
+            if ( pt[i].pr && pt[i].next_level )
+            {
+                ASSERT(pt[i].next_level < next_level);
+                queue_free_pt(d, _mfn(pt[i].mfn), pt[i].next_level);
+            }
+
+        unmap_domain_page(pt);
+    }
+
+    iommu_queue_free_pgtable(d, mfn_to_page(mfn));
+}
+
 int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t mfn,
                        unsigned int flags, unsigned int *flush_flags)
 {
     struct domain_iommu *hd = dom_iommu(d);
+    unsigned int level = (IOMMUF_order(flags) / PTE_PER_TABLE_SHIFT) + 1;
     int rc;
     unsigned long pt_mfn = 0;
     union amd_iommu_pte old;
@@ -320,7 +342,7 @@ int amd_iommu_map_page(struct domain *d,
         return rc;
     }
 
-    if ( iommu_pde_from_dfn(d, dfn_x(dfn), 1, &pt_mfn, flush_flags, true) ||
+    if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags, true) ||
          !pt_mfn )
     {
         spin_unlock(&hd->arch.mapping_lock);
@@ -330,8 +352,8 @@ int amd_iommu_map_page(struct domain *d,
         return -EFAULT;
     }
 
-    /* Install 4k mapping */
-    old = set_iommu_pte_present(pt_mfn, dfn_x(dfn), mfn_x(mfn), 1,
+    /* Install mapping */
+    old = set_iommu_pte_present(pt_mfn, dfn_x(dfn), mfn_x(mfn), level,
                                 (flags & IOMMUF_writable),
                                 (flags & IOMMUF_readable));
 
@@ -339,8 +361,13 @@ int amd_iommu_map_page(struct domain *d,
 
     *flush_flags |= IOMMU_FLUSHF_added;
     if ( old.pr )
+    {
         *flush_flags |= IOMMU_FLUSHF_modified;
 
+        if ( level > 1 && old.next_level )
+            queue_free_pt(d, _mfn(old.mfn), old.next_level);
+    }
+
     return 0;
 }
 
@@ -349,6 +376,7 @@ int amd_iommu_unmap_page(struct domain *
 {
     unsigned long pt_mfn = 0;
     struct domain_iommu *hd = dom_iommu(d);
+    unsigned int level = (order / PTE_PER_TABLE_SHIFT) + 1;
     union amd_iommu_pte old = {};
 
     spin_lock(&hd->arch.mapping_lock);
@@ -359,7 +387,7 @@ int amd_iommu_unmap_page(struct domain *
         return 0;
     }
 
-    if ( iommu_pde_from_dfn(d, dfn_x(dfn), 1, &pt_mfn, flush_flags, false) )
+    if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags, false) )
     {
         spin_unlock(&hd->arch.mapping_lock);
         AMD_IOMMU_DEBUG("Invalid IO pagetable entry dfn = %"PRI_dfn"\n",
@@ -371,14 +399,19 @@ int amd_iommu_unmap_page(struct domain *
     if ( pt_mfn )
     {
         /* Mark PTE as 'page not present'. */
-        old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn));
+        old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level);
     }
 
     spin_unlock(&hd->arch.mapping_lock);
 
     if ( old.pr )
+    {
         *flush_flags |= IOMMU_FLUSHF_modified;
 
+        if ( level > 1 && old.next_level )
+            queue_free_pt(d, _mfn(old.mfn), old.next_level);
+    }
+
     return 0;
 }
 
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -584,7 +584,7 @@ static void amd_dump_page_tables(struct
 }
 
 static const struct iommu_ops __initconstrel _iommu_ops = {
-    .page_sizes = PAGE_SIZE_4K,
+    .page_sizes = PAGE_SIZE_4K | PAGE_SIZE_2M | PAGE_SIZE_1G | PAGE_SIZE_512G,
     .init = amd_iommu_domain_init,
     .hwdom_init = amd_iommu_hwdom_init,
     .quarantine_init = amd_iommu_quarantine_init,
--- a/xen/include/xen/page-defs.h
+++ b/xen/include/xen/page-defs.h
@@ -21,4 +21,19 @@
 #define PAGE_MASK_64K               PAGE_MASK_GRAN(64K)
 #define PAGE_ALIGN_64K(addr)        PAGE_ALIGN_GRAN(64K, addr)
 
+#define PAGE_SHIFT_2M               21
+#define PAGE_SIZE_2M                PAGE_SIZE_GRAN(2M)
+#define PAGE_MASK_2M                PAGE_MASK_GRAN(2M)
+#define PAGE_ALIGN_2M(addr)         PAGE_ALIGN_GRAN(2M, addr)
+
+#define PAGE_SHIFT_1G               30
+#define PAGE_SIZE_1G                PAGE_SIZE_GRAN(1G)
+#define PAGE_MASK_1G                PAGE_MASK_GRAN(1G)
+#define PAGE_ALIGN_1G(addr)         PAGE_ALIGN_GRAN(1G, addr)
+
+#define PAGE_SHIFT_512G             39
+#define PAGE_SIZE_512G              PAGE_SIZE_GRAN(512G)
+#define PAGE_MASK_512G              PAGE_MASK_GRAN(512G)
+#define PAGE_ALIGN_512G(addr)       PAGE_ALIGN_GRAN(512G, addr)
+
 #endif /* __XEN_PAGE_DEFS_H__ */



  parent reply	other threads:[~2021-08-24 14:25 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-24 14:13 [PATCH 00/17] IOMMU: superpage support when not sharing pagetables Jan Beulich
2021-08-24 14:15 ` [PATCH 01/17] AMD/IOMMU: avoid recording each level's MFN when walking page table Jan Beulich
2021-08-25 13:29   ` Andrew Cooper
2021-08-24 14:15 ` [PATCH 02/17] AMD/IOMMU: have callers specify the target level for page table walks Jan Beulich
2021-08-24 14:16 ` [PATCH 03/17] VT-d: " Jan Beulich
2021-08-24 14:17 ` [PATCH 04/17] IOMMU: have vendor code announce supported page sizes Jan Beulich
2021-09-16  7:38   ` Tian, Kevin
2021-08-24 14:18 ` [PATCH 05/17] IOMMU: add order parameter to ->{,un}map_page() hooks Jan Beulich
2021-09-16  7:41   ` Tian, Kevin
2021-08-24 14:19 ` [PATCH 06/17] IOMMU: have iommu_{,un}map() split requests into largest possible chunks Jan Beulich
2021-08-24 14:21 ` [PATCH 07/17] IOMMU/x86: restrict IO-APIC mappings for PV Dom0 Jan Beulich
2021-08-26 11:57   ` Andrew Cooper
2021-08-26 12:55     ` Jan Beulich
2021-09-07 17:13       ` Andrew Cooper
2021-09-08  9:44         ` Jan Beulich
2021-08-24 14:21 ` [PATCH 08/17] IOMMU/x86: perform PV Dom0 mappings in batches Jan Beulich
2021-08-24 14:22 ` [PATCH 09/17] IOMMU/x86: support freeing of pagetables Jan Beulich
2021-08-24 14:22 ` [PATCH 10/17] AMD/IOMMU: drop stray TLB flush Jan Beulich
2021-08-24 14:23 ` [PATCH 11/17] AMD/IOMMU: walk trees upon page fault Jan Beulich
2021-08-24 14:24 ` [PATCH 12/17] AMD/IOMMU: return old PTE from {set,clear}_iommu_pte_present() Jan Beulich
2021-08-24 14:25 ` Jan Beulich [this message]
2021-08-24 14:25 ` [PATCH 14/17] VT-d: allow use of superpage mappings Jan Beulich
2021-08-24 14:26 ` [PATCH 15/17] IOMMU: page table dumping adjustments Jan Beulich
2021-08-24 14:28   ` Jan Beulich
2021-09-16  7:33     ` Tian, Kevin
2021-08-24 14:27 ` [PATCH 16/17] VT-d: show permissions during page table walks Jan Beulich
2021-09-16  7:36   ` Tian, Kevin
2021-08-24 14:27 ` [PATCH 17/17] IOMMU/x86: drop pointless NULL checks Jan Beulich
2021-08-26 12:05   ` Andrew Cooper
2021-08-26 12:36     ` Jan Beulich
2021-09-15 12:42   ` Ping: " Jan Beulich
2021-09-16  7:47     ` Tian, Kevin
2021-09-16  8:24       ` Jan Beulich
2021-09-16  7:29   ` Tian, Kevin
2021-08-25 12:06 ` [PATCH 00/17] IOMMU: superpage support when not sharing pagetables Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ecdf0636-0428-de38-8d6d-fad10c962d5a@suse.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=george.dunlap@citrix.com \
    --cc=iwj@xenproject.org \
    --cc=julien@xen.org \
    --cc=paul@xen.org \
    --cc=sstabellini@kernel.org \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.