All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] amd-iommu: remove page merging code
@ 2018-11-27 14:58 Paul Durrant
  2018-11-27 15:57 ` Jan Beulich
  0 siblings, 1 reply; 5+ messages in thread
From: Paul Durrant @ 2018-11-27 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: Paul Durrant, Brian Woods, Suravee Suthikulpanit

The page merging logic makes use of bits 1-8 and bit 63 of a PTE, which used
to be specified as ignored. However, bits 5 and 6 are now specified as
'accessed' and 'dirty' bits and their use only remains safe as long as
the DTE 'Host Access Dirty' bits remain unused by Xen.
The code was also the subject of XSA-275 and, since then, has been disabled
after domain creation.

This patch removes the code, freeing up the remaining PTE 'ignored' bits
for other potential use and shortening the source by 170 lines. There may
be some marginal performance cost since higher order mappings will now be
ruled out until a mapping order parameter is passed to iommu_ops. That
will be dealt with by a subsequent patch though.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: Brian Woods <brian.woods@amd.com>

v2:
 - Remove 'no_merge' boolean
 - Expand commit comment
---
 xen/drivers/passthrough/amd/iommu_map.c | 175 +-------------------------------
 xen/include/asm-x86/iommu.h             |   1 -
 2 files changed, 1 insertion(+), 175 deletions(-)

diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
index 0ac3f473b3..04cb7b3182 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -323,134 +323,6 @@ uint64_t amd_iommu_get_address_from_pte(void *pte)
     return ptr;
 }
 
-/* For each pde, We use ignored bits (bit 1 - bit 8 and bit 63)
- * to save pde count, pde count = 511 is a candidate of page coalescing.
- */
-static unsigned int get_pde_count(uint64_t pde)
-{
-    unsigned int count;
-    uint64_t upper_mask = 1ULL << 63 ;
-    uint64_t lower_mask = 0xFF << 1;
-
-    count = ((pde & upper_mask) >> 55) | ((pde & lower_mask) >> 1);
-    return count;
-}
-
-/* Convert pde count into iommu pte ignored bits */
-static void set_pde_count(uint64_t *pde, unsigned int count)
-{
-    uint64_t upper_mask = 1ULL << 8 ;
-    uint64_t lower_mask = 0xFF;
-    uint64_t pte_mask = (~(1ULL << 63)) & (~(0xFF << 1));
-
-    *pde &= pte_mask;
-    *pde |= ((count & upper_mask ) << 55) | ((count & lower_mask ) << 1);
-}
-
-/* Return 1, if pages are suitable for merging at merge_level.
- * otherwise increase pde count if mfn is contigous with mfn - 1
- */
-static bool iommu_update_pde_count(struct domain *d, unsigned long pt_mfn,
-                                   unsigned long dfn, unsigned long mfn,
-                                   unsigned int merge_level)
-{
-    unsigned int pde_count, next_level;
-    unsigned long first_mfn;
-    uint64_t *table, *pde, *ntable;
-    uint64_t ntable_maddr, mask;
-    struct domain_iommu *hd = dom_iommu(d);
-    bool ok = false;
-
-    ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn );
-
-    next_level = merge_level - 1;
-
-    /* get pde at merge level */
-    table = map_domain_page(_mfn(pt_mfn));
-    pde = table + pfn_to_pde_idx(dfn, merge_level);
-
-    /* get page table of next level */
-    ntable_maddr = amd_iommu_get_address_from_pte(pde);
-    ntable = map_domain_page(_mfn(paddr_to_pfn(ntable_maddr)));
-
-    /* get the first mfn of next level */
-    first_mfn = amd_iommu_get_address_from_pte(ntable) >> PAGE_SHIFT;
-
-    if ( first_mfn == 0 )
-        goto out;
-
-    mask = (1ULL<< (PTE_PER_TABLE_SHIFT * next_level)) - 1;
-
-    if ( ((first_mfn & mask) == 0) &&
-         (((dfn & mask) | first_mfn) == mfn) )
-    {
-        pde_count = get_pde_count(*pde);
-
-        if ( pde_count == (PTE_PER_TABLE_SIZE - 1) )
-            ok = true;
-        else if ( pde_count < (PTE_PER_TABLE_SIZE - 1))
-        {
-            pde_count++;
-            set_pde_count(pde, pde_count);
-        }
-    }
-
-    else
-        /* non-contiguous mapping */
-        set_pde_count(pde, 0);
-
-out:
-    unmap_domain_page(ntable);
-    unmap_domain_page(table);
-
-    return ok;
-}
-
-static int iommu_merge_pages(struct domain *d, unsigned long pt_mfn,
-                             unsigned long dfn, unsigned int flags,
-                             unsigned int merge_level)
-{
-    uint64_t *table, *pde, *ntable;
-    uint64_t ntable_mfn;
-    unsigned long first_mfn;
-    struct domain_iommu *hd = dom_iommu(d);
-
-    ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn );
-
-    table = map_domain_page(_mfn(pt_mfn));
-    pde = table + pfn_to_pde_idx(dfn, merge_level);
-
-    /* get first mfn */
-    ntable_mfn = amd_iommu_get_address_from_pte(pde) >> PAGE_SHIFT;
-
-    if ( ntable_mfn == 0 )
-    {
-        unmap_domain_page(table);
-        return 1;
-    }
-
-    ntable = map_domain_page(_mfn(ntable_mfn));
-    first_mfn = amd_iommu_get_address_from_pte(ntable) >> PAGE_SHIFT;
-
-    if ( first_mfn == 0 )
-    {
-        unmap_domain_page(ntable);
-        unmap_domain_page(table);
-        return 1;
-    }
-
-    /* setup super page mapping, next level = 0 */
-    set_iommu_pde_present((uint32_t *)pde, first_mfn, 0,
-                          !!(flags & IOMMUF_writable),
-                          !!(flags & IOMMUF_readable));
-
-    amd_iommu_flush_all_pages(d);
-
-    unmap_domain_page(ntable);
-    unmap_domain_page(table);
-    return 0;
-}
-
 /* Walk io page tables and build level page tables if necessary
  * {Re, un}mapping super page frames causes re-allocation of io
  * page tables.
@@ -656,7 +528,6 @@ int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t mfn,
     struct domain_iommu *hd = dom_iommu(d);
     int rc;
     unsigned long pt_mfn[7];
-    unsigned int merge_level;
 
     if ( iommu_use_hap_pt(d) )
         return 0;
@@ -698,55 +569,14 @@ int amd_iommu_map_page(struct domain *d, dfn_t dfn, mfn_t mfn,
         return -EFAULT;
     }
 
-    /* Install 4k mapping first */
+    /* Install 4k mapping */
     need_flush = set_iommu_pte_present(pt_mfn[1], dfn_x(dfn), mfn_x(mfn), 1,
                                        !!(flags & IOMMUF_writable),
                                        !!(flags & IOMMUF_readable));
 
     if ( need_flush )
-    {
         amd_iommu_flush_pages(d, dfn_x(dfn), 0);
-        /* No further merging, as the logic doesn't cope. */
-        hd->arch.no_merge = true;
-    }
 
-    /*
-     * Suppress merging of non-R/W mappings or after initial table creation,
-     * as the merge logic does not cope with this.
-     */
-    if ( hd->arch.no_merge || flags != (IOMMUF_writable | IOMMUF_readable) )
-        goto out;
-    if ( d->creation_finished )
-    {
-        hd->arch.no_merge = true;
-        goto out;
-    }
-
-    for ( merge_level = 2; merge_level <= hd->arch.paging_mode;
-          merge_level++ )
-    {
-        if ( pt_mfn[merge_level] == 0 )
-            break;
-        if ( !iommu_update_pde_count(d, pt_mfn[merge_level],
-                                     dfn_x(dfn), mfn_x(mfn), merge_level) )
-            break;
-
-        if ( iommu_merge_pages(d, pt_mfn[merge_level], dfn_x(dfn),
-                               flags, merge_level) )
-        {
-            spin_unlock(&hd->arch.mapping_lock);
-            AMD_IOMMU_DEBUG("Merge iommu page failed at level %d, "
-                            "dfn = %"PRI_dfn" mfn = %"PRI_mfn"\n",
-                            merge_level, dfn_x(dfn), mfn_x(mfn));
-            domain_crash(d);
-            return -EFAULT;
-        }
-
-        /* Deallocate lower level page table */
-        free_amd_iommu_pgtable(mfn_to_page(_mfn(pt_mfn[merge_level - 1])));
-    }
-
-out:
     spin_unlock(&hd->arch.mapping_lock);
     return 0;
 }
@@ -798,9 +628,6 @@ int amd_iommu_unmap_page(struct domain *d, dfn_t dfn)
     /* mark PTE as 'page not present' */
     clear_iommu_pte_present(pt_mfn[1], dfn_x(dfn));
 
-    /* No further merging in amd_iommu_map_page(), as the logic doesn't cope. */
-    hd->arch.no_merge = true;
-
     spin_unlock(&hd->arch.mapping_lock);
 
     amd_iommu_flush_pages(d, dfn_x(dfn), 0);
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
index 055466b5bf..8dc392473d 100644
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -52,7 +52,6 @@ struct arch_iommu
 
     /* amd iommu support */
     int paging_mode;
-    bool no_merge;
     struct page_info *root_table;
     struct guest_iommu *g_iommu;
 };
-- 
2.11.0


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] amd-iommu: remove page merging code
  2018-11-27 14:58 [PATCH v2] amd-iommu: remove page merging code Paul Durrant
@ 2018-11-27 15:57 ` Jan Beulich
  2018-11-27 16:05   ` Paul Durrant
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2018-11-27 15:57 UTC (permalink / raw)
  To: Paul Durrant; +Cc: xen-devel, Brian Woods, Suravee Suthikulpanit

>>> On 27.11.18 at 15:58, <paul.durrant@citrix.com> wrote:
> The page merging logic makes use of bits 1-8 and bit 63 of a PTE, which used
> to be specified as ignored. However, bits 5 and 6 are now specified as
> 'accessed' and 'dirty' bits and their use only remains safe as long as
> the DTE 'Host Access Dirty' bits remain unused by Xen.

... or as long the two page table bits don't get made use of
(by Xen or hardware) before the domain starts running (i.e.
the "hardware" part is always true afaict).

> The code was also the subject of XSA-275 and, since then, has been disabled
> after domain creation.
> 
> This patch removes the code, freeing up the remaining PTE 'ignored' bits
> for other potential use and shortening the source by 170 lines. There may
> be some marginal performance cost since higher order mappings will now be
> ruled out until a mapping order parameter is passed to iommu_ops.

"Marginal" is a guess, or supported by actual measurements? With
heavy S/G of small blocks of data I could easily see this become
more than a marginal increase of overhead. How bad it is certainly
also depends on IOTLB capacity.

What I would find more convincing would be if there was a reason
why a fair part of the large page mappings get shattered anyway
today.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] amd-iommu: remove page merging code
  2018-11-27 15:57 ` Jan Beulich
@ 2018-11-27 16:05   ` Paul Durrant
  2018-11-27 16:38     ` Jan Beulich
  0 siblings, 1 reply; 5+ messages in thread
From: Paul Durrant @ 2018-11-27 16:05 UTC (permalink / raw)
  To: 'Jan Beulich'; +Cc: xen-devel, Brian Woods, Suravee Suthikulpanit

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 27 November 2018 15:58
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: Brian Woods <brian.woods@amd.com>; Suravee Suthikulpanit
> <suravee.suthikulpanit@amd.com>; xen-devel <xen-
> devel@lists.xenproject.org>
> Subject: Re: [Xen-devel] [PATCH v2] amd-iommu: remove page merging code
> 
> >>> On 27.11.18 at 15:58, <paul.durrant@citrix.com> wrote:
> > The page merging logic makes use of bits 1-8 and bit 63 of a PTE, which
> used
> > to be specified as ignored. However, bits 5 and 6 are now specified as
> > 'accessed' and 'dirty' bits and their use only remains safe as long as
> > the DTE 'Host Access Dirty' bits remain unused by Xen.
> 
> ... or as long the two page table bits don't get made use of
> (by Xen or hardware) before the domain starts running (i.e.
> the "hardware" part is always true afaict).
> 

Yes, that would also be true. I still don't like the re-use of bits that are no longer marked explicitly as ignored though.

> > The code was also the subject of XSA-275 and, since then, has been
> disabled
> > after domain creation.
> >
> > This patch removes the code, freeing up the remaining PTE 'ignored' bits
> > for other potential use and shortening the source by 170 lines. There
> may
> > be some marginal performance cost since higher order mappings will now
> be
> > ruled out until a mapping order parameter is passed to iommu_ops.
> 
> "Marginal" is a guess, or supported by actual measurements? With
> heavy S/G of small blocks of data I could easily see this become
> more than a marginal increase of overhead. How bad it is certainly
> also depends on IOTLB capacity.

Yes, it is down to IOTLB capacity and therefore I do not know what the reality of the performance cost is. I saw no problem in manual testing of a GPU but I don't have comparative benchmark numbers.

> 
> What I would find more convincing would be if there was a reason
> why a fair part of the large page mappings get shattered anyway
> today.
> 

Well, I could just leave PV-IOMMU unimplemented for AMD h/w if you think the page merging code is more important since, without the spare ignored bits, I have no way to track pages added by the hypercall vs. those added at start of day. I personally think that the simpler code is worthwhile in itself but I guess you disagree.


  Paul

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] amd-iommu: remove page merging code
  2018-11-27 16:05   ` Paul Durrant
@ 2018-11-27 16:38     ` Jan Beulich
  2018-11-27 16:40       ` Paul Durrant
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2018-11-27 16:38 UTC (permalink / raw)
  To: Paul Durrant; +Cc: xen-devel, Brian Woods, Suravee Suthikulpanit

>>> On 27.11.18 at 17:05, <Paul.Durrant@citrix.com> wrote:
> Well, I could just leave PV-IOMMU unimplemented for AMD h/w if you think the 
> page merging code is more important since, without the spare ignored bits, I 
> have no way to track pages added by the hypercall vs. those added at start of 
> day. I personally think that the simpler code is worthwhile in itself but I 
> guess you disagree.

Not exactly: With a proper reason (here: you need to free up at least
one of the bits; "for other potential use" simply is too vague for my
taste, and to be honest I had already forgotten about this need of
yours), I can live with this. Plus I'm not the maintainer of this code
anyway.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] amd-iommu: remove page merging code
  2018-11-27 16:38     ` Jan Beulich
@ 2018-11-27 16:40       ` Paul Durrant
  0 siblings, 0 replies; 5+ messages in thread
From: Paul Durrant @ 2018-11-27 16:40 UTC (permalink / raw)
  To: 'Jan Beulich'; +Cc: xen-devel, Brian Woods, Suravee Suthikulpanit

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 27 November 2018 16:38
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: Brian Woods <brian.woods@amd.com>; Suravee Suthikulpanit
> <suravee.suthikulpanit@amd.com>; xen-devel <xen-
> devel@lists.xenproject.org>
> Subject: RE: [Xen-devel] [PATCH v2] amd-iommu: remove page merging code
> 
> >>> On 27.11.18 at 17:05, <Paul.Durrant@citrix.com> wrote:
> > Well, I could just leave PV-IOMMU unimplemented for AMD h/w if you think
> the
> > page merging code is more important since, without the spare ignored
> bits, I
> > have no way to track pages added by the hypercall vs. those added at
> start of
> > day. I personally think that the simpler code is worthwhile in itself
> but I
> > guess you disagree.
> 
> Not exactly: With a proper reason (here: you need to free up at least
> one of the bits; "for other potential use" simply is too vague for my
> taste, and to be honest I had already forgotten about this need of
> yours), I can live with this. Plus I'm not the maintainer of this code
> anyway.
> 

Ok, I'll send v3 adjusting the comment again.

  Paul

> Jan
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-11-27 16:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-27 14:58 [PATCH v2] amd-iommu: remove page merging code Paul Durrant
2018-11-27 15:57 ` Jan Beulich
2018-11-27 16:05   ` Paul Durrant
2018-11-27 16:38     ` Jan Beulich
2018-11-27 16:40       ` Paul Durrant

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.