All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dump_p2m_table: For IOMMU
@ 2012-08-10  1:43 Santosh Jodh
  2012-08-10  7:49 ` Jan Beulich
  2012-08-10 12:31 ` Wei Wang
  0 siblings, 2 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-10  1:43 UTC (permalink / raw)
  To: xen-devel; +Cc: wei.wang2, tim, xiantao.zhang

New key handler 'o' to dump the IOMMU p2m table for each domain.
Skips dumping table for domain0.
Intel and AMD specific iommu_ops handler for dumping p2m table.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>

diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Aug 09 18:31:32 2012 -0700
@@ -22,6 +22,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/paging.h>
+#include <xen/softirq.h>
 #include <asm/hvm/iommu.h>
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
@@ -512,6 +513,81 @@ static int amd_iommu_group_id(u16 seg, u
 
 #include <asm/io_apic.h>
 
+static void amd_dump_p2m_table_level(struct page_info* pg, int level, 
+                                     paddr_t gpa, int *indent)
+{
+    paddr_t address;
+    void *table_vaddr, *pde;
+    paddr_t next_table_maddr;
+    int index, next_level, present;
+    u32 *entry;
+
+    if ( level <= 1 )
+        return;
+
+    table_vaddr = __map_domain_page(pg);
+    if ( table_vaddr == NULL )
+    {
+        printk("Failed to map IOMMU domain page %"PRIpaddr"\n", 
+                page_to_maddr(pg));
+        return;
+    }
+
+    for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
+    {
+        if ( !(index % 2) )
+            process_pending_softirqs();
+
+        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
+        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
+        entry = (u32*)pde;
+
+        next_level = get_field_from_reg_u32(entry[0],
+                                            IOMMU_PDE_NEXT_LEVEL_MASK,
+                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
+
+        present = get_field_from_reg_u32(entry[0],
+                                         IOMMU_PDE_PRESENT_MASK,
+                                         IOMMU_PDE_PRESENT_SHIFT);
+
+        address = gpa + amd_offset_level_address(index, level);
+        if ( present )
+        {
+            int i;
+
+            for ( i = 0; i < *indent; i++ )
+                printk("  ");
+
+            printk("gfn: %"PRIpaddr"  mfn: %"PRIpaddr"\n",
+                   PFN_DOWN(address), PFN_DOWN(next_table_maddr));
+
+            if ( (next_table_maddr != 0) && (next_level != 0) )
+            {
+                *indent += 1;
+                amd_dump_p2m_table_level(
+                    maddr_to_page(next_table_maddr), level - 1, 
+                    address, indent);
+
+                *indent -= 1;
+            }     
+        }
+    }
+
+    unmap_domain_page(table_vaddr);
+}
+
+static void amd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+    int indent;
+
+    if ( !hd->root_table ) 
+        return;
+
+    indent = 0;
+    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, &indent);
+}
+
 const struct iommu_ops amd_iommu_ops = {
     .init = amd_iommu_domain_init,
     .dom0_init = amd_iommu_dom0_init,
@@ -531,4 +607,5 @@ const struct iommu_ops amd_iommu_ops = {
     .resume = amd_iommu_resume,
     .share_p2m = amd_iommu_share_p2m,
     .crash_shutdown = amd_iommu_suspend,
+    .dump_p2m_table = amd_dump_p2m_table,
 };
diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/iommu.c	Thu Aug 09 18:31:32 2012 -0700
@@ -18,11 +18,13 @@
 #include <asm/hvm/iommu.h>
 #include <xen/paging.h>
 #include <xen/guest_access.h>
+#include <xen/keyhandler.h>
 #include <xen/softirq.h>
 #include <xsm/xsm.h>
 
 static void parse_iommu_param(char *s);
 static int iommu_populate_page_table(struct domain *d);
+static void iommu_dump_p2m_table(unsigned char key);
 
 /*
  * The 'iommu' parameter enables the IOMMU.  Optional comma separated
@@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in
 
 DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+static struct keyhandler iommu_p2m_table = {
+    .diagnostic = 0,
+    .u.fn = iommu_dump_p2m_table,
+    .desc = "dump iommu p2m table"
+};
+
 static void __init parse_iommu_param(char *s)
 {
     char *ss;
@@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai
     if ( !iommu_enabled )
         return;
 
+    register_keyhandler('o', &iommu_p2m_table);
     d->need_iommu = !!iommu_dom0_strict;
     if ( need_iommu(d) )
     {
@@ -654,6 +663,34 @@ int iommu_do_domctl(
     return ret;
 }
 
+static void iommu_dump_p2m_table(unsigned char key)
+{
+    struct domain *d;
+    const struct iommu_ops *ops;
+
+    if ( !iommu_enabled )
+    {
+        printk("IOMMU not enabled!\n");
+        return;
+    }
+
+    ops = iommu_get_ops();
+    for_each_domain(d)
+    {
+        if ( !d->domain_id )
+            continue;
+
+        if ( iommu_use_hap_pt(d) )
+        {
+            printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id);
+            continue;
+        }
+
+        printk("\ndomain%d IOMMU p2m table: \n", d->domain_id);
+        ops->dump_p2m_table(d);
+    }
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c	Thu Aug 09 18:31:32 2012 -0700
@@ -31,6 +31,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
+#include <xen/softirq.h>
 #include <asm/msi.h>
 #include <asm/irq.h>
 #if defined(__i386__) || defined(__x86_64__)
@@ -2365,6 +2366,71 @@ static void vtd_resume(void)
     }
 }
 
+static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, 
+                                     int *indent)
+{
+    paddr_t address;
+    int i;
+    struct dma_pte *pt_vaddr, *pte;
+    int next_level;
+
+    if ( pt_maddr == 0 )
+        return;
+
+    pt_vaddr = map_vtd_domain_page(pt_maddr);
+    if ( pt_vaddr == NULL )
+    {
+        printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr);
+        return;
+    }
+
+    next_level = level - 1;
+    for ( i = 0; i < PTE_NUM; i++ )
+    {
+        if ( !(i % 2) )
+            process_pending_softirqs();
+
+        pte = &pt_vaddr[i];
+        if ( dma_pte_present(*pte) )
+        {
+            int j;
+
+            for ( j = 0; j < *indent; j++ )
+                printk("  ");
+
+            address = gpa + offset_level_address(i, level);
+            printk("gfn: %"PRIpaddr" mfn: %"PRIpaddr" super=%d rd=%d wr=%d\n",
+                    address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K,
+                    dma_pte_superpage(*pte)? 1 : 0, dma_pte_read(*pte)? 1 : 0,
+                    dma_pte_write(*pte)? 1 : 0);
+
+            if ( next_level >= 1 ) {
+                *indent += 1;
+                vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, 
+                                         address, indent);
+
+                *indent -= 1;
+            }
+        }
+    }
+
+    unmap_vtd_domain_page(pt_vaddr);
+}
+
+static void vtd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd;
+    int indent;
+
+    if ( list_empty(&acpi_drhd_units) )
+        return;
+
+    hd = domain_hvm_iommu(d);
+    indent = 0;
+    vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 
+                             0, &indent);
+}
+
 const struct iommu_ops intel_iommu_ops = {
     .init = intel_iommu_domain_init,
     .dom0_init = intel_iommu_dom0_init,
@@ -2387,6 +2453,7 @@ const struct iommu_ops intel_iommu_ops =
     .crash_shutdown = vtd_crash_shutdown,
     .iotlb_flush = intel_iommu_iotlb_flush,
     .iotlb_flush_all = intel_iommu_iotlb_flush_all,
+    .dump_p2m_table = vtd_dump_p2m_table,
 };
 
 /*
diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h	Thu Aug 09 18:31:32 2012 -0700
@@ -248,6 +248,8 @@ struct context_entry {
 #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
 #define address_level_offset(addr, level) \
             ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
+#define offset_level_address(offset, level) \
+            ((u64)(offset) << level_to_offset_bits(level))
 #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
 #define level_size(l) (1 << level_to_offset_bits(l))
 #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
@@ -277,6 +279,9 @@ struct dma_pte {
 #define dma_set_pte_addr(p, addr) do {\
             (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
 #define dma_pte_present(p) (((p).val & 3) != 0)
+#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0)
+#define dma_pte_read(p) (((p).val & DMA_PTE_READ) != 0)
+#define dma_pte_write(p) (((p).val & DMA_PTE_WRITE) != 0)
 
 /* interrupt remap entry */
 struct iremap_entry {
diff -r 472fc515a463 -r f687c5580262 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Thu Aug 09 18:31:32 2012 -0700
@@ -38,6 +38,10 @@
 #define PTE_PER_TABLE_ALLOC(entries)	\
 	PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT)
 
+#define amd_offset_level_address(offset, level) \
+      	((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \
+                             (level - IOMMU_PAGING_MODE_LEVEL_1))))
+
 #define PCI_MIN_CAP_OFFSET	0x40
 #define PCI_MAX_CAP_BLOCKS	48
 #define PCI_CAP_PTR_MASK	0xFC
diff -r 472fc515a463 -r f687c5580262 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/include/xen/iommu.h	Thu Aug 09 18:31:32 2012 -0700
@@ -141,6 +141,7 @@ struct iommu_ops {
     void (*crash_shutdown)(void);
     void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
+    void (*dump_p2m_table)(struct domain *d);
 };
 
 void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10  1:43 [PATCH] dump_p2m_table: For IOMMU Santosh Jodh
@ 2012-08-10  7:49 ` Jan Beulich
  2012-08-10 12:31 ` Wei Wang
  1 sibling, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2012-08-10  7:49 UTC (permalink / raw)
  To: Santosh Jodh, xen-devel; +Cc: wei.wang2, tim, xiantao.zhang

>>> On 10.08.12 at 03:43, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> New key handler 'o' to dump the IOMMU p2m table for each domain.
> Skips dumping table for domain0.
> Intel and AMD specific iommu_ops handler for dumping p2m table.

Looks pretty good now to me, just one minor comment below.
But it will of course need the ack of both the VT-d and AMD IOMMU
maintainers (who, while on Cc, didn't participate in the discussion
so far).

> +    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, &indent);

Why do you pass a pointer here? Just pass 0, and in the recursive
call pass either +1 or +(level - next_level). (The inconsistency on
the use of next_level will need to be commented on by Wei anyway,
as already requested in an earlier reply.)

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10  1:43 [PATCH] dump_p2m_table: For IOMMU Santosh Jodh
  2012-08-10  7:49 ` Jan Beulich
@ 2012-08-10 12:31 ` Wei Wang
  2012-08-10 13:02   ` Jan Beulich
  2012-08-10 15:02   ` Santosh Jodh
  1 sibling, 2 replies; 24+ messages in thread
From: Wei Wang @ 2012-08-10 12:31 UTC (permalink / raw)
  To: Santosh Jodh; +Cc: xen-devel, tim, xiantao.zhang

Hi Santosh,

On 08/10/2012 03:43 AM, Santosh Jodh wrote:
> New key handler 'o' to dump the IOMMU p2m table for each domain.
> Skips dumping table for domain0.
> Intel and AMD specific iommu_ops handler for dumping p2m table.
>
> Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com>
>
> diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/amd/pci_amd_iommu.c
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Aug 09 18:31:32 2012 -0700
> @@ -22,6 +22,7 @@
>   #include<xen/pci.h>
>   #include<xen/pci_regs.h>
>   #include<xen/paging.h>
> +#include<xen/softirq.h>
>   #include<asm/hvm/iommu.h>
>   #include<asm/amd-iommu.h>
>   #include<asm/hvm/svm/amd-iommu-proto.h>
> @@ -512,6 +513,81 @@ static int amd_iommu_group_id(u16 seg, u
>
>   #include<asm/io_apic.h>
>
> +static void amd_dump_p2m_table_level(struct page_info* pg, int level,
> +                                     paddr_t gpa, int *indent)
> +{
> +    paddr_t address;
> +    void *table_vaddr, *pde;
> +    paddr_t next_table_maddr;
> +    int index, next_level, present;
> +    u32 *entry;
> +
> +    if ( level<= 1 )
> +        return;

So, the l1 page table is not printed, is it by design?


> +    table_vaddr = __map_domain_page(pg);
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %"PRIpaddr"\n",
> +                page_to_maddr(pg));
> +        return;
> +    }
> +
> +    for ( index = 0; index<  PTE_PER_TABLE_SIZE; index++ )
> +    {
> +        if ( !(index % 2) )
> +            process_pending_softirqs();
> +
> +        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
> +        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
> +        entry = (u32*)pde;
> +
> +        next_level = get_field_from_reg_u32(entry[0],
> +                                            IOMMU_PDE_NEXT_LEVEL_MASK,
> +                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
> +
> +        present = get_field_from_reg_u32(entry[0],
> +                                         IOMMU_PDE_PRESENT_MASK,
> +                                         IOMMU_PDE_PRESENT_SHIFT);
> +
> +        address = gpa + amd_offset_level_address(index, level);
> +        if ( present )
> +        {
> +            int i;
> +
> +            for ( i = 0; i<  *indent; i++ )
> +                printk("  ");
> +
> +            printk("gfn: %"PRIpaddr"  mfn: %"PRIpaddr"\n",
> +                   PFN_DOWN(address), PFN_DOWN(next_table_maddr));
> +
> +            if ( (next_table_maddr != 0)&&  (next_level != 0) )
> +            {
> +                *indent += 1;
> +                amd_dump_p2m_table_level(
> +                    maddr_to_page(next_table_maddr), level - 1,
> +                    address, indent);
> +
> +                *indent -= 1;
> +            }
> +        }
> +    }
> +
> +    unmap_domain_page(table_vaddr);
> +}
> +
> +static void amd_dump_p2m_table(struct domain *d)
> +{
> +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
> +    int indent;
> +
> +    if ( !hd->root_table )
> +        return;
> +
> +    indent = 0;
> +    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0,&indent);
> +}
> +
>   const struct iommu_ops amd_iommu_ops = {
>       .init = amd_iommu_domain_init,
>       .dom0_init = amd_iommu_dom0_init,
> @@ -531,4 +607,5 @@ const struct iommu_ops amd_iommu_ops = {
>       .resume = amd_iommu_resume,
>       .share_p2m = amd_iommu_share_p2m,
>       .crash_shutdown = amd_iommu_suspend,
> +    .dump_p2m_table = amd_dump_p2m_table,
>   };


I tested the amd part with an amd iommu system and I got output like this:

XEN) domain1 IOMMU p2m table:
(XEN) gfn: 0000000000000000  mfn: 00000000001b1fb5
(XEN)   gfn: 0000000000000000  mfn: 000000000021f80b
(XEN)   gfn: 0000000000000000  mfn: 000000000023f400
(XEN)   gfn: 0000000000000000  mfn: 000000000010a600
(XEN)   gfn: 0000000000000000  mfn: 000000000023f200
(XEN)   gfn: 0000000000000000  mfn: 000000000010a400
(XEN)   gfn: 0000000000000000  mfn: 000000000023f000
(XEN)   gfn: 0000000000000000  mfn: 000000000010ae00
(XEN)   gfn: 0000000000000000  mfn: 000000000023ee00
(XEN)   gfn: 0000000000000001  mfn: 000000000010ac00
(XEN)   gfn: 0000000000000001  mfn: 000000000023ec00
(XEN)   gfn: 0000000000000001  mfn: 000000000010aa00
(XEN)   gfn: 0000000000000001  mfn: 000000000023ea00
(XEN)   gfn: 0000000000000001  mfn: 000000000010a800
(XEN)   gfn: 0000000000000001  mfn: 000000000023e800
(XEN)   gfn: 0000000000000001  mfn: 000000000010be00
(XEN)   gfn: 0000000000000001  mfn: 000000000023e600
(XEN)   gfn: 0000000000000002  mfn: 000000000010bc00
(XEN)   gfn: 0000000000000002  mfn: 000000000023e400
(XEN)   gfn: 0000000000000002  mfn: 000000000010ba00
(XEN)   gfn: 0000000000000002  mfn: 000000000023e200
(XEN)   gfn: 0000000000000002  mfn: 000000000010b800
(XEN)   gfn: 0000000000000002  mfn: 000000000023e000
(XEN)   gfn: 0000000000000002  mfn: 000000000010b600
(XEN)   gfn: 0000000000000002  mfn: 000000000023de00
(XEN)   gfn: 0000000000000003  mfn: 000000000010b400
(XEN)   gfn: 0000000000000003  mfn: 000000000023dc00
(XEN)   gfn: 0000000000000003  mfn: 000000000010b200
(XEN)   gfn: 0000000000000003  mfn: 000000000023da00
(XEN)   gfn: 0000000000000003  mfn: 000000000010b000
(XEN)   gfn: 0000000000000003  mfn: 000000000023d800
(XEN)   gfn: 0000000000000003  mfn: 000000000010fe00
(XEN)   gfn: 0000000000000003  mfn: 000000000023d600
(XEN)   gfn: 0000000000000004  mfn: 000000000010fc00
(XEN)   gfn: 0000000000000004  mfn: 000000000023d400
(XEN)   gfn: 0000000000000004  mfn: 000000000010fa00
(XEN)   gfn: 0000000000000004  mfn: 000000000023d200
(XEN)   gfn: 0000000000000004  mfn: 000000000010f800
(XEN)   gfn: 0000000000000004  mfn: 000000000023d000
(XEN)   gfn: 0000000000000004  mfn: 000000000010f600
(XEN)   gfn: 0000000000000004  mfn: 000000000023ce00
(XEN)   gfn: 0000000000000005  mfn: 000000000010f400
(XEN)   gfn: 0000000000000005  mfn: 000000000023cc00
(XEN)   gfn: 0000000000000005  mfn: 000000000010f200
(XEN)   gfn: 0000000000000005  mfn: 000000000023ca00
(XEN)   gfn: 0000000000000005  mfn: 000000000010f000
(XEN)   gfn: 0000000000000005  mfn: 000000000023c800
(XEN)   gfn: 0000000000000005  mfn: 000000000010ee00
(XEN)   gfn: 0000000000000005  mfn: 000000000023c600
(XEN)   gfn: 0000000000000006  mfn: 000000000010ec00
(XEN)   gfn: 0000000000000006  mfn: 000000000023c400
(XEN)   gfn: 0000000000000006  mfn: 000000000010ea00
(XEN)   gfn: 0000000000000006  mfn: 000000000023c200
(XEN)   gfn: 0000000000000006  mfn: 000000000010e800
(XEN)   gfn: 0000000000000006  mfn: 000000000023c000

It looks that the same gfn has been mapped to multiple mfns. Do you want 
to output only the gfn to mfn mapping or you also want to output the 
address of intermediate page tables? What do "gfn" and "mfn" stands for 
here?

Thanks,
Wei



> diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/iommu.c
> --- a/xen/drivers/passthrough/iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/iommu.c	Thu Aug 09 18:31:32 2012 -0700
> @@ -18,11 +18,13 @@
>   #include<asm/hvm/iommu.h>
>   #include<xen/paging.h>
>   #include<xen/guest_access.h>
> +#include<xen/keyhandler.h>
>   #include<xen/softirq.h>
>   #include<xsm/xsm.h>
>
>   static void parse_iommu_param(char *s);
>   static int iommu_populate_page_table(struct domain *d);
> +static void iommu_dump_p2m_table(unsigned char key);
>
>   /*
>    * The 'iommu' parameter enables the IOMMU.  Optional comma separated
> @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in
>
>   DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
>
> +static struct keyhandler iommu_p2m_table = {
> +    .diagnostic = 0,
> +    .u.fn = iommu_dump_p2m_table,
> +    .desc = "dump iommu p2m table"
> +};
> +
>   static void __init parse_iommu_param(char *s)
>   {
>       char *ss;
> @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai
>       if ( !iommu_enabled )
>           return;
>
> +    register_keyhandler('o',&iommu_p2m_table);
>       d->need_iommu = !!iommu_dom0_strict;
>       if ( need_iommu(d) )
>       {
> @@ -654,6 +663,34 @@ int iommu_do_domctl(
>       return ret;
>   }
>
> +static void iommu_dump_p2m_table(unsigned char key)
> +{
> +    struct domain *d;
> +    const struct iommu_ops *ops;
> +
> +    if ( !iommu_enabled )
> +    {
> +        printk("IOMMU not enabled!\n");
> +        return;
> +    }
> +
> +    ops = iommu_get_ops();
> +    for_each_domain(d)
> +    {
> +        if ( !d->domain_id )
> +            continue;
> +
> +        if ( iommu_use_hap_pt(d) )
> +        {
> +            printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id);
> +            continue;
> +        }
> +
> +        printk("\ndomain%d IOMMU p2m table: \n", d->domain_id);
> +        ops->dump_p2m_table(d);
> +    }
> +}
> +
>   /*
>    * Local variables:
>    * mode: C
> diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/vtd/iommu.c
> --- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.c	Thu Aug 09 18:31:32 2012 -0700
> @@ -31,6 +31,7 @@
>   #include<xen/pci.h>
>   #include<xen/pci_regs.h>
>   #include<xen/keyhandler.h>
> +#include<xen/softirq.h>
>   #include<asm/msi.h>
>   #include<asm/irq.h>
>   #if defined(__i386__) || defined(__x86_64__)
> @@ -2365,6 +2366,71 @@ static void vtd_resume(void)
>       }
>   }
>
> +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa,
> +                                     int *indent)
> +{
> +    paddr_t address;
> +    int i;
> +    struct dma_pte *pt_vaddr, *pte;
> +    int next_level;
> +
> +    if ( pt_maddr == 0 )
> +        return;
> +
> +    pt_vaddr = map_vtd_domain_page(pt_maddr);
> +    if ( pt_vaddr == NULL )
> +    {
> +        printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr);
> +        return;
> +    }
> +
> +    next_level = level - 1;
> +    for ( i = 0; i<  PTE_NUM; i++ )
> +    {
> +        if ( !(i % 2) )
> +            process_pending_softirqs();
> +
> +        pte =&pt_vaddr[i];
> +        if ( dma_pte_present(*pte) )
> +        {
> +            int j;
> +
> +            for ( j = 0; j<  *indent; j++ )
> +                printk("  ");
> +
> +            address = gpa + offset_level_address(i, level);
> +            printk("gfn: %"PRIpaddr" mfn: %"PRIpaddr" super=%d rd=%d wr=%d\n",
> +                    address>>  PAGE_SHIFT_4K, pte->val>>  PAGE_SHIFT_4K,
> +                    dma_pte_superpage(*pte)? 1 : 0, dma_pte_read(*pte)? 1 : 0,
> +                    dma_pte_write(*pte)? 1 : 0);
> +
> +            if ( next_level>= 1 ) {
> +                *indent += 1;
> +                vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level,
> +                                         address, indent);
> +
> +                *indent -= 1;
> +            }
> +        }
> +    }
> +
> +    unmap_vtd_domain_page(pt_vaddr);
> +}
> +
> +static void vtd_dump_p2m_table(struct domain *d)
> +{
> +    struct hvm_iommu *hd;
> +    int indent;
> +
> +    if ( list_empty(&acpi_drhd_units) )
> +        return;
> +
> +    hd = domain_hvm_iommu(d);
> +    indent = 0;
> +    vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw),
> +                             0,&indent);
> +}
> +
>   const struct iommu_ops intel_iommu_ops = {
>       .init = intel_iommu_domain_init,
>       .dom0_init = intel_iommu_dom0_init,
> @@ -2387,6 +2453,7 @@ const struct iommu_ops intel_iommu_ops =
>       .crash_shutdown = vtd_crash_shutdown,
>       .iotlb_flush = intel_iommu_iotlb_flush,
>       .iotlb_flush_all = intel_iommu_iotlb_flush_all,
> +    .dump_p2m_table = vtd_dump_p2m_table,
>   };
>
>   /*
> diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/vtd/iommu.h
> --- a/xen/drivers/passthrough/vtd/iommu.h	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.h	Thu Aug 09 18:31:32 2012 -0700
> @@ -248,6 +248,8 @@ struct context_entry {
>   #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
>   #define address_level_offset(addr, level) \
>               ((addr>>  level_to_offset_bits(level))&  LEVEL_MASK)
> +#define offset_level_address(offset, level) \
> +            ((u64)(offset)<<  level_to_offset_bits(level))
>   #define level_mask(l) (((u64)(-1))<<  level_to_offset_bits(l))
>   #define level_size(l) (1<<  level_to_offset_bits(l))
>   #define align_to_level(addr, l) ((addr + level_size(l) - 1)&  level_mask(l))
> @@ -277,6 +279,9 @@ struct dma_pte {
>   #define dma_set_pte_addr(p, addr) do {\
>               (p).val |= ((addr)&  PAGE_MASK_4K); } while (0)
>   #define dma_pte_present(p) (((p).val&  3) != 0)
> +#define dma_pte_superpage(p) (((p).val&  (1<<7)) != 0)
> +#define dma_pte_read(p) (((p).val&  DMA_PTE_READ) != 0)
> +#define dma_pte_write(p) (((p).val&  DMA_PTE_WRITE) != 0)
>
>   /* interrupt remap entry */
>   struct iremap_entry {
> diff -r 472fc515a463 -r f687c5580262 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Thu Aug 09 18:31:32 2012 -0700
> @@ -38,6 +38,10 @@
>   #define PTE_PER_TABLE_ALLOC(entries)	\
>   	PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries)>>  PTE_PER_TABLE_SHIFT)
>
> +#define amd_offset_level_address(offset, level) \
> +      	((u64)(offset)<<  ((PTE_PER_TABLE_SHIFT * \
> +                             (level - IOMMU_PAGING_MODE_LEVEL_1))))
> +
>   #define PCI_MIN_CAP_OFFSET	0x40
>   #define PCI_MAX_CAP_BLOCKS	48
>   #define PCI_CAP_PTR_MASK	0xFC
> diff -r 472fc515a463 -r f687c5580262 xen/include/xen/iommu.h
> --- a/xen/include/xen/iommu.h	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/include/xen/iommu.h	Thu Aug 09 18:31:32 2012 -0700
> @@ -141,6 +141,7 @@ struct iommu_ops {
>       void (*crash_shutdown)(void);
>       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
>       void (*iotlb_flush_all)(struct domain *d);
> +    void (*dump_p2m_table)(struct domain *d);
>   };
>
>   void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10 12:31 ` Wei Wang
@ 2012-08-10 13:02   ` Jan Beulich
  2012-08-10 15:02   ` Santosh Jodh
  1 sibling, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2012-08-10 13:02 UTC (permalink / raw)
  To: Santosh Jodh; +Cc: Wei Wang, xen-devel, tim, xiantao.zhang

>>> On 10.08.12 at 14:31, Wei Wang <wei.wang2@amd.com> wrote:
> On 08/10/2012 03:43 AM, Santosh Jodh wrote:
>> +    if ( level<= 1 )
>> +        return;
> 
> So, the l1 page table is not printed, is it by design?

Yeah, that puzzled me too, but I wasn't sure what's right here.

> I tested the amd part with an amd iommu system and I got output like this:
> 
> XEN) domain1 IOMMU p2m table:
> (XEN) gfn: 0000000000000000  mfn: 00000000001b1fb5
> (XEN)   gfn: 0000000000000000  mfn: 000000000021f80b
> (XEN)   gfn: 0000000000000000  mfn: 000000000023f400
> (XEN)   gfn: 0000000000000000  mfn: 000000000010a600
> (XEN)   gfn: 0000000000000000  mfn: 000000000023f200
> (XEN)   gfn: 0000000000000000  mfn: 000000000010a400
> (XEN)   gfn: 0000000000000000  mfn: 000000000023f000
> (XEN)   gfn: 0000000000000000  mfn: 000000000010ae00
> (XEN)   gfn: 0000000000000000  mfn: 000000000023ee00
> (XEN)   gfn: 0000000000000001  mfn: 000000000010ac00
> (XEN)   gfn: 0000000000000001  mfn: 000000000023ec00
> (XEN)   gfn: 0000000000000001  mfn: 000000000010aa00
> (XEN)   gfn: 0000000000000001  mfn: 000000000023ea00
> (XEN)   gfn: 0000000000000001  mfn: 000000000010a800
> (XEN)   gfn: 0000000000000001  mfn: 000000000023e800
> (XEN)   gfn: 0000000000000001  mfn: 000000000010be00
> (XEN)   gfn: 0000000000000001  mfn: 000000000023e600
> (XEN)   gfn: 0000000000000002  mfn: 000000000010bc00
> (XEN)   gfn: 0000000000000002  mfn: 000000000023e400
> (XEN)   gfn: 0000000000000002  mfn: 000000000010ba00
> (XEN)   gfn: 0000000000000002  mfn: 000000000023e200
> (XEN)   gfn: 0000000000000002  mfn: 000000000010b800
> (XEN)   gfn: 0000000000000002  mfn: 000000000023e000
> (XEN)   gfn: 0000000000000002  mfn: 000000000010b600
> (XEN)   gfn: 0000000000000002  mfn: 000000000023de00
> (XEN)   gfn: 0000000000000003  mfn: 000000000010b400
> (XEN)   gfn: 0000000000000003  mfn: 000000000023dc00
> (XEN)   gfn: 0000000000000003  mfn: 000000000010b200
> (XEN)   gfn: 0000000000000003  mfn: 000000000023da00
> (XEN)   gfn: 0000000000000003  mfn: 000000000010b000
> (XEN)   gfn: 0000000000000003  mfn: 000000000023d800
> (XEN)   gfn: 0000000000000003  mfn: 000000000010fe00
> (XEN)   gfn: 0000000000000003  mfn: 000000000023d600
> (XEN)   gfn: 0000000000000004  mfn: 000000000010fc00
> (XEN)   gfn: 0000000000000004  mfn: 000000000023d400
> (XEN)   gfn: 0000000000000004  mfn: 000000000010fa00
> (XEN)   gfn: 0000000000000004  mfn: 000000000023d200
> (XEN)   gfn: 0000000000000004  mfn: 000000000010f800
> (XEN)   gfn: 0000000000000004  mfn: 000000000023d000
> (XEN)   gfn: 0000000000000004  mfn: 000000000010f600
> (XEN)   gfn: 0000000000000004  mfn: 000000000023ce00
> (XEN)   gfn: 0000000000000005  mfn: 000000000010f400
> (XEN)   gfn: 0000000000000005  mfn: 000000000023cc00
> (XEN)   gfn: 0000000000000005  mfn: 000000000010f200
> (XEN)   gfn: 0000000000000005  mfn: 000000000023ca00
> (XEN)   gfn: 0000000000000005  mfn: 000000000010f000
> (XEN)   gfn: 0000000000000005  mfn: 000000000023c800
> (XEN)   gfn: 0000000000000005  mfn: 000000000010ee00
> (XEN)   gfn: 0000000000000005  mfn: 000000000023c600
> (XEN)   gfn: 0000000000000006  mfn: 000000000010ec00
> (XEN)   gfn: 0000000000000006  mfn: 000000000023c400
> (XEN)   gfn: 0000000000000006  mfn: 000000000010ea00
> (XEN)   gfn: 0000000000000006  mfn: 000000000023c200
> (XEN)   gfn: 0000000000000006  mfn: 000000000010e800
> (XEN)   gfn: 0000000000000006  mfn: 000000000023c000
> 
> It looks that the same gfn has been mapped to multiple mfns. Do you want 
> to output only the gfn to mfn mapping or you also want to output the 
> address of intermediate page tables? What do "gfn" and "mfn" stands for 
> here?

Indeed, apart from the apparent brokenness, printing the GFN
with the intermediate levels makes no sense. Nor does it make
sense to print with 16 digits when beyond the 10th a non-zero
one will never be observable. I realize that that's a downside
of using PRIpaddr - I was probably giving a bad recommendation
here (or at least it wasn't applicable to all cases), I'm sorry for
that; instead, when you convert to [GM]FN, you can safely cast
to (and hence print as) unsigned long.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10 12:31 ` Wei Wang
  2012-08-10 13:02   ` Jan Beulich
@ 2012-08-10 15:02   ` Santosh Jodh
  1 sibling, 0 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-10 15:02 UTC (permalink / raw)
  To: Wei Wang; +Cc: xen-devel, Tim (Xen.org), xiantao.zhang

Thanks Wei - please see inline.

> +static void amd_dump_p2m_table_level(struct page_info* pg, int level,
> +                                     paddr_t gpa, int *indent) {
> +    paddr_t address;
> +    void *table_vaddr, *pde;
> +    paddr_t next_table_maddr;
> +    int index, next_level, present;
> +    u32 *entry;
> +
> +    if ( level<= 1 )
> +        return;

So, the l1 page table is not printed, is it by design?
[Santosh Jodh] That was a typo - it should be level < 1


I tested the amd part with an amd iommu system and I got output like this:

XEN) domain1 IOMMU p2m table:
(XEN) gfn: 0000000000000000  mfn: 00000000001b1fb5
(XEN)   gfn: 0000000000000000  mfn: 000000000021f80b
(XEN)   gfn: 0000000000000000  mfn: 000000000023f400
(XEN)   gfn: 0000000000000000  mfn: 000000000010a600
(XEN)   gfn: 0000000000000000  mfn: 000000000023f200
(XEN)   gfn: 0000000000000000  mfn: 000000000010a400
(XEN)   gfn: 0000000000000000  mfn: 000000000023f000
(XEN)   gfn: 0000000000000000  mfn: 000000000010ae00
(XEN)   gfn: 0000000000000000  mfn: 000000000023ee00
(XEN)   gfn: 0000000000000001  mfn: 000000000010ac00
(XEN)   gfn: 0000000000000001  mfn: 000000000023ec00
(XEN)   gfn: 0000000000000001  mfn: 000000000010aa00
(XEN)   gfn: 0000000000000001  mfn: 000000000023ea00
(XEN)   gfn: 0000000000000001  mfn: 000000000010a800
(XEN)   gfn: 0000000000000001  mfn: 000000000023e800
(XEN)   gfn: 0000000000000001  mfn: 000000000010be00
(XEN)   gfn: 0000000000000001  mfn: 000000000023e600
(XEN)   gfn: 0000000000000002  mfn: 000000000010bc00
(XEN)   gfn: 0000000000000002  mfn: 000000000023e400
(XEN)   gfn: 0000000000000002  mfn: 000000000010ba00
(XEN)   gfn: 0000000000000002  mfn: 000000000023e200
(XEN)   gfn: 0000000000000002  mfn: 000000000010b800
(XEN)   gfn: 0000000000000002  mfn: 000000000023e000
(XEN)   gfn: 0000000000000002  mfn: 000000000010b600
(XEN)   gfn: 0000000000000002  mfn: 000000000023de00
(XEN)   gfn: 0000000000000003  mfn: 000000000010b400
(XEN)   gfn: 0000000000000003  mfn: 000000000023dc00
(XEN)   gfn: 0000000000000003  mfn: 000000000010b200
(XEN)   gfn: 0000000000000003  mfn: 000000000023da00
(XEN)   gfn: 0000000000000003  mfn: 000000000010b000
(XEN)   gfn: 0000000000000003  mfn: 000000000023d800
(XEN)   gfn: 0000000000000003  mfn: 000000000010fe00
(XEN)   gfn: 0000000000000003  mfn: 000000000023d600
(XEN)   gfn: 0000000000000004  mfn: 000000000010fc00
(XEN)   gfn: 0000000000000004  mfn: 000000000023d400
(XEN)   gfn: 0000000000000004  mfn: 000000000010fa00
(XEN)   gfn: 0000000000000004  mfn: 000000000023d200
(XEN)   gfn: 0000000000000004  mfn: 000000000010f800
(XEN)   gfn: 0000000000000004  mfn: 000000000023d000
(XEN)   gfn: 0000000000000004  mfn: 000000000010f600
(XEN)   gfn: 0000000000000004  mfn: 000000000023ce00
(XEN)   gfn: 0000000000000005  mfn: 000000000010f400
(XEN)   gfn: 0000000000000005  mfn: 000000000023cc00
(XEN)   gfn: 0000000000000005  mfn: 000000000010f200
(XEN)   gfn: 0000000000000005  mfn: 000000000023ca00
(XEN)   gfn: 0000000000000005  mfn: 000000000010f000
(XEN)   gfn: 0000000000000005  mfn: 000000000023c800
(XEN)   gfn: 0000000000000005  mfn: 000000000010ee00
(XEN)   gfn: 0000000000000005  mfn: 000000000023c600
(XEN)   gfn: 0000000000000006  mfn: 000000000010ec00
(XEN)   gfn: 0000000000000006  mfn: 000000000023c400
(XEN)   gfn: 0000000000000006  mfn: 000000000010ea00
(XEN)   gfn: 0000000000000006  mfn: 000000000023c200
(XEN)   gfn: 0000000000000006  mfn: 000000000010e800
(XEN)   gfn: 0000000000000006  mfn: 000000000023c000

It looks that the same gfn has been mapped to multiple mfns. Do you want to output only the gfn to mfn mapping or you also want to output the address of intermediate page tables? What do "gfn" and "mfn" stands for here?
[Santosh Jodh] guest frame number and machine frame number. I had added the intermediate PD and PT prints after Jan Beulich suggested it - it's possible I misunderstood him. I will resend a new patch. Could you please try it as I don't have access to AMD IOMMU system.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-13  8:59 ` Jan Beulich
@ 2012-08-14 19:34   ` Santosh Jodh
  0 siblings, 0 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-14 19:34 UTC (permalink / raw)
  To: Jan Beulich; +Cc: wei.wang2, Tim (Xen.org), xiantao.zhang, xen-devel


Why do you do this differently than for VT-d here? There you don't check next_table_maddr (and I see no reason you would need to). Oh, I see, there's a similar check in a different place there. But this needs to be functionally similar here then.
Specifically, ...

> +        {
> +            amd_dump_p2m_table_level(
> +                maddr_to_page(next_table_maddr), level - 1, 
> +                address, indent + 1);
> +        }
> +        else

... you'd get into the else's body if next_table_maddr was zero, which is wrong afaict. So I think flow like

    if ( next_level )
        print
    else if ( next_table_maddr )
        recurse

would be the preferable way to go if you feel that these zero checks are necessary (and if you do then, because this being the case is really a bug, this shouldn't go through silently).
[Santosh Jodh] I was basing my code on existing code in the individual files. I was just being paranoid as this is debug code and I would not want to crash the system. Anyway, I am resending a patch that structures the code in the same way for both Intel and AMD.

> +        {
> +            int i;
> +
> +            for ( i = 0; i < indent; i++ )
> +                printk("  ");

printk("%*s...", indent, "", ...);
[Santosh Jodh] Cool - got it.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10 19:14 Santosh Jodh
  2012-08-13  8:59 ` Jan Beulich
@ 2012-08-13 10:31 ` Wei Wang
  1 sibling, 0 replies; 24+ messages in thread
From: Wei Wang @ 2012-08-13 10:31 UTC (permalink / raw)
  To: Santosh Jodh; +Cc: xen-devel, tim, xiantao.zhang

Hi Santosh
Please some outputs below, gfn still seems incorrect.
Thanks
Wei


(XEN)     gfn: 000000f0  mfn: 001023ac
(XEN)     gfn: 000000f0  mfn: 0023f83d
(XEN)     gfn: 000000f0  mfn: 001023ab
(XEN)     gfn: 000000f0  mfn: 0023f83c
(XEN)     gfn: 000000f0  mfn: 001023aa
(XEN)     gfn: 000000f0  mfn: 0023f83b
(XEN)     gfn: 000000f0  mfn: 001023a9
(XEN)     gfn: 000000f0  mfn: 0023f83a
(XEN)     gfn: 000000f0  mfn: 001023a8
(XEN)     gfn: 000000f0  mfn: 0023f839
(XEN)     gfn: 000000f0  mfn: 001023a7
(XEN)     gfn: 000000f0  mfn: 0023f838
(XEN)     gfn: 000000f0  mfn: 001023a6
(XEN)     gfn: 000000f0  mfn: 0023f837
(XEN)     gfn: 000000f0  mfn: 001023a5
(XEN)     gfn: 000000f0  mfn: 0023f836
(XEN)     gfn: 000000f0  mfn: 001023a4
(XEN)     gfn: 000000f0  mfn: 0023f835
(XEN)     gfn: 000000f0  mfn: 001023a3
(XEN)     gfn: 000000f0  mfn: 0023f834
(XEN)     gfn: 000000f0  mfn: 001023a2
(XEN)     gfn: 000000f0  mfn: 0023f833
(XEN)     gfn: 000000f0  mfn: 001023a1
(XEN)     gfn: 000000f0  mfn: 0023f832
(XEN)     gfn: 000000f0  mfn: 001023a0
(XEN)     gfn: 000000f0  mfn: 0023f831
(XEN)     gfn: 000000f0  mfn: 0010239f
(XEN)     gfn: 000000f0  mfn: 0023f830
(XEN)     gfn: 000000f0  mfn: 0010239e
(XEN)     gfn: 000000f0  mfn: 0023f82f
(XEN)     gfn: 000000f0  mfn: 0010239d
(XEN)     gfn: 000000f0  mfn: 0023f82e
(XEN)     gfn: 000000f0  mfn: 0010239c
(XEN)     gfn: 000000f0  mfn: 0023f82d
(XEN)     gfn: 000000f0  mfn: 0010239b
(XEN)     gfn: 000000f0  mfn: 0023f82c
(XEN)     gfn: 000000f0  mfn: 0010239a
(XEN)     gfn: 000000f0  mfn: 0023f82b
(XEN)     gfn: 000000f0  mfn: 00102399
(XEN)     gfn: 000000f0  mfn: 0023f82a
(XEN)     gfn: 000000f0  mfn: 00102398
(XEN)     gfn: 000000f0  mfn: 0023f829
(XEN)     gfn: 000000f0  mfn: 00102397
(XEN)     gfn: 000000f0  mfn: 0023f828
(XEN)     gfn: 000000f0  mfn: 00102396
(XEN)     gfn: 000000f0  mfn: 0023f827
(XEN)     gfn: 000000f0  mfn: 00102395
(XEN)     gfn: 000000f0  mfn: 0023f826
(XEN)     gfn: 000000f0  mfn: 00102394
(XEN)     gfn: 000000f0  mfn: 0023f825
(XEN)     gfn: 000000f0  mfn: 00102393
(XEN)     gfn: 000000f0  mfn: 0023f824
(XEN)     gfn: 000000f0  mfn: 00102392
(XEN)     gfn: 000000f0  mfn: 0023f823
(XEN)     gfn: 000000f0  mfn: 00102391
(XEN)     gfn: 000000f0  mfn: 0023f822
(XEN)     gfn: 000000f0  mfn: 00102390
(XEN)     gfn: 000000f0  mfn: 0023f821
(XEN)     gfn: 000000f0  mfn: 0010238f
(XEN)     gfn: 000000f0  mfn: 0023f820
(XEN)     gfn: 000000f0  mfn: 0010238e
(XEN)     gfn: 000000f0  mfn: 0023f81f
(XEN)     gfn: 000000f0  mfn: 0010238d
(XEN)     gfn: 000000f0  mfn: 0023f81e
(XEN)     gfn: 000000f0  mfn: 0010238c
(XEN)     gfn: 000000f0  mfn: 0023f81d
(XEN)     gfn: 000000f0  mfn: 0010238b
(XEN)     gfn: 000000f0  mfn: 0023f81c
(XEN)     gfn: 000000f0  mfn: 0010238a
(XEN)     gfn: 000000f0  mfn: 0023f81b
(XEN)     gfn: 000000f0  mfn: 00102389
(XEN)     gfn: 000000f0  mfn: 0023f81a
(XEN)     gfn: 000000f0  mfn: 00102388
(XEN)     gfn: 000000f0  mfn: 0023f819
(XEN)     gfn: 000000f0  mfn: 00102387
(XEN)     gfn: 000000f0  mfn: 0023f818
(XEN)     gfn: 000000f0  mfn: 00102386
(XEN)     gfn: 000000f0  mfn: 0023f817
(XEN)     gfn: 000000f0  mfn: 00102385
(XEN)     gfn: 000000f0  mfn: 0023f816
(XEN)     gfn: 000000f0  mfn: 00102384
(XEN)     gfn: 000000f0  mfn: 0023f815
(XEN)     gfn: 000000f0  mfn: 00102383
(XEN)     gfn: 000000f0  mfn: 0023f814
(XEN)     gfn: 000000f0  mfn: 00102382
(XEN)     gfn: 000000f0  mfn: 0023f813
(XEN)     gfn: 000000f0  mfn: 00102381
(XEN)     gfn: 000000f0  mfn: 0023f812
(XEN)     gfn: 000000f0  mfn: 00102380
(XEN)     gfn: 000000f0  mfn: 0023f811
(XEN)     gfn: 000000f0  mfn: 0010217f
(XEN)     gfn: 000000f0  mfn: 0023f810
(XEN)     gfn: 000000f0  mfn: 0010217e
(XEN)     gfn: 000000f0  mfn: 0023f80f
(XEN)     gfn: 000000f0  mfn: 0010217d
(XEN)     gfn: 000000f0  mfn: 0023f80e
(XEN)     gfn: 000000f0  mfn: 0010217c
(XEN)     gfn: 000000f0  mfn: 0023f80d
(XEN)     gfn: 000000f0  mfn: 0010217b
(XEN)     gfn: 000000f0  mfn: 0023f80c
(XEN)     gfn: 000000f0  mfn: 0010217a
(XEN)     gfn: 000000f0  mfn: 0023f80b
(XEN)     gfn: 000000f0  mfn: 00102179
(XEN)     gfn: 000000fc  mfn: 0023f806
(XEN)     gfn: 000000fc  mfn: 0023f809
(XEN)     gfn: 000000fc  mfn: 001025f1
(XEN)     gfn: 000000fc  mfn: 0023f807
(XEN)     gfn: 000000fc  mfn: 001025f0
(XEN)     gfn: 000000fc  mfn: 001025ef
(XEN)     gfn: 000000fc  mfn: 0023f805
(XEN)     gfn: 000000fc  mfn: 001025ee
(XEN)     gfn: 000000fc  mfn: 0023f804
(XEN)     gfn: 000000fc  mfn: 001025ed
(XEN)     gfn: 000000fc  mfn: 0023f803
(XEN)     gfn: 000000fc  mfn: 001025ec
(XEN)     gfn: 000000fc  mfn: 0023f802
(XEN)     gfn: 000000fc  mfn: 001025eb
(XEN)     gfn: 000000fc  mfn: 0023f801
(XEN)     gfn: 000000fc  mfn: 001025ea
(XEN)     gfn: 000000fc  mfn: 0023f800
(XEN)     gfn: 000000fe  mfn: 000812b0
(XEN)     gfn: 000000fe  mfn: 0010a085
(XEN)     gfn: 000000fe  mfn: 0021f60c
(XEN)     gfn: 000000fe  mfn: 0010a084
(XEN)     gfn: 000000fe  mfn: 0021f60b
(XEN)     gfn: 000000fe  mfn: 0010a083



On 08/10/2012 09:14 PM, Santosh Jodh wrote:
> New key handler 'o' to dump the IOMMU p2m table for each domain.
> Skips dumping table for domain0.
> Intel and AMD specific iommu_ops handler for dumping p2m table.
>
> Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com>
>
> diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/amd/pci_amd_iommu.c
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Aug 10 08:19:58 2012 -0700
> @@ -22,6 +22,7 @@
>   #include<xen/pci.h>
>   #include<xen/pci_regs.h>
>   #include<xen/paging.h>
> +#include<xen/softirq.h>
>   #include<asm/hvm/iommu.h>
>   #include<asm/amd-iommu.h>
>   #include<asm/hvm/svm/amd-iommu-proto.h>
> @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u
>
>   #include<asm/io_apic.h>
>
> +static void amd_dump_p2m_table_level(struct page_info* pg, int level,
> +                                     paddr_t gpa, int indent)
> +{
> +    paddr_t address;
> +    void *table_vaddr, *pde;
> +    paddr_t next_table_maddr;
> +    int index, next_level, present;
> +    u32 *entry;
> +
> +    if ( level<  1 )
> +        return;
> +
> +    table_vaddr = __map_domain_page(pg);
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %"PRIpaddr"\n",
> +                page_to_maddr(pg));
> +        return;
> +    }
> +
> +    for ( index = 0; index<  PTE_PER_TABLE_SIZE; index++ )
> +    {
> +        if ( !(index % 2) )
> +            process_pending_softirqs();
> +
> +        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
> +        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
> +        entry = (u32*)pde;
> +
> +        present = get_field_from_reg_u32(entry[0],
> +                                         IOMMU_PDE_PRESENT_MASK,
> +                                         IOMMU_PDE_PRESENT_SHIFT);
> +
> +        if ( !present )
> +            continue;
> +
> +        next_level = get_field_from_reg_u32(entry[0],
> +                                            IOMMU_PDE_NEXT_LEVEL_MASK,
> +                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
> +
> +        address = gpa + amd_offset_level_address(index, level);
> +        if ( (next_table_maddr != 0)&&  (next_level != 0) )
> +        {
> +            amd_dump_p2m_table_level(
> +                maddr_to_page(next_table_maddr), level - 1,
> +                address, indent + 1);
> +        }
> +        else
> +        {
> +            int i;
> +
> +            for ( i = 0; i<  indent; i++ )
> +                printk("  ");
> +
> +            printk("gfn: %08lx  mfn: %08lx\n",
> +                   (unsigned long)PFN_DOWN(address),
> +                   (unsigned long)PFN_DOWN(next_table_maddr));
> +        }
> +    }
> +
> +    unmap_domain_page(table_vaddr);
> +}
> +
> +static void amd_dump_p2m_table(struct domain *d)
> +{
> +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
> +
> +    if ( !hd->root_table )
> +        return;
> +
> +    printk("p2m table has %d levels\n", hd->paging_mode);
> +    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0);
> +}
> +
>   const struct iommu_ops amd_iommu_ops = {
>       .init = amd_iommu_domain_init,
>       .dom0_init = amd_iommu_dom0_init,
> @@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = {
>       .resume = amd_iommu_resume,
>       .share_p2m = amd_iommu_share_p2m,
>       .crash_shutdown = amd_iommu_suspend,
> +    .dump_p2m_table = amd_dump_p2m_table,
>   };
> diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/iommu.c
> --- a/xen/drivers/passthrough/iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/iommu.c	Fri Aug 10 08:19:58 2012 -0700
> @@ -18,11 +18,13 @@
>   #include<asm/hvm/iommu.h>
>   #include<xen/paging.h>
>   #include<xen/guest_access.h>
> +#include<xen/keyhandler.h>
>   #include<xen/softirq.h>
>   #include<xsm/xsm.h>
>
>   static void parse_iommu_param(char *s);
>   static int iommu_populate_page_table(struct domain *d);
> +static void iommu_dump_p2m_table(unsigned char key);
>
>   /*
>    * The 'iommu' parameter enables the IOMMU.  Optional comma separated
> @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in
>
>   DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
>
> +static struct keyhandler iommu_p2m_table = {
> +    .diagnostic = 0,
> +    .u.fn = iommu_dump_p2m_table,
> +    .desc = "dump iommu p2m table"
> +};
> +
>   static void __init parse_iommu_param(char *s)
>   {
>       char *ss;
> @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai
>       if ( !iommu_enabled )
>           return;
>
> +    register_keyhandler('o',&iommu_p2m_table);
>       d->need_iommu = !!iommu_dom0_strict;
>       if ( need_iommu(d) )
>       {
> @@ -654,6 +663,34 @@ int iommu_do_domctl(
>       return ret;
>   }
>
> +static void iommu_dump_p2m_table(unsigned char key)
> +{
> +    struct domain *d;
> +    const struct iommu_ops *ops;
> +
> +    if ( !iommu_enabled )
> +    {
> +        printk("IOMMU not enabled!\n");
> +        return;
> +    }
> +
> +    ops = iommu_get_ops();
> +    for_each_domain(d)
> +    {
> +        if ( !d->domain_id )
> +            continue;
> +
> +        if ( iommu_use_hap_pt(d) )
> +        {
> +            printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id);
> +            continue;
> +        }
> +
> +        printk("\ndomain%d IOMMU p2m table: \n", d->domain_id);
> +        ops->dump_p2m_table(d);
> +    }
> +}
> +
>   /*
>    * Local variables:
>    * mode: C
> diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/vtd/iommu.c
> --- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.c	Fri Aug 10 08:19:58 2012 -0700
> @@ -31,6 +31,7 @@
>   #include<xen/pci.h>
>   #include<xen/pci_regs.h>
>   #include<xen/keyhandler.h>
> +#include<xen/softirq.h>
>   #include<asm/msi.h>
>   #include<asm/irq.h>
>   #if defined(__i386__) || defined(__x86_64__)
> @@ -2365,6 +2366,71 @@ static void vtd_resume(void)
>       }
>   }
>
> +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa,
> +                                     int indent)
> +{
> +    paddr_t address;
> +    int i;
> +    struct dma_pte *pt_vaddr, *pte;
> +    int next_level;
> +
> +    if ( pt_maddr == 0 )
> +        return;
> +
> +    pt_vaddr = map_vtd_domain_page(pt_maddr);
> +    if ( pt_vaddr == NULL )
> +    {
> +        printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr);
> +        return;
> +    }
> +
> +    next_level = level - 1;
> +    for ( i = 0; i<  PTE_NUM; i++ )
> +    {
> +        if ( !(i % 2) )
> +            process_pending_softirqs();
> +
> +        pte =&pt_vaddr[i];
> +        if ( !dma_pte_present(*pte) )
> +            continue;
> +
> +        address = gpa + offset_level_address(i, level);
> +        if ( next_level>= 1 )
> +        {
> +            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level,
> +                                     address, indent + 1);
> +        }
> +        else
> +        {
> +            int j;
> +
> +            for ( j = 0; j<  indent; j++ )
> +                printk("  ");
> +
> +            printk("gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n",
> +                   (unsigned long)(address>>  PAGE_SHIFT_4K),
> +                   (unsigned long)(pte->val>>  PAGE_SHIFT_4K),
> +                   dma_pte_superpage(*pte)? 1 : 0,
> +                   dma_pte_read(*pte)? 1 : 0,
> +                   dma_pte_write(*pte)? 1 : 0);
> +        }
> +    }
> +
> +    unmap_vtd_domain_page(pt_vaddr);
> +}
> +
> +static void vtd_dump_p2m_table(struct domain *d)
> +{
> +    struct hvm_iommu *hd;
> +
> +    if ( list_empty(&acpi_drhd_units) )
> +        return;
> +
> +    hd = domain_hvm_iommu(d);
> +    printk("p2m table has %d levels\n", agaw_to_level(hd->agaw));
> +    vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0);
> +}
> +
>   const struct iommu_ops intel_iommu_ops = {
>       .init = intel_iommu_domain_init,
>       .dom0_init = intel_iommu_dom0_init,
> @@ -2387,6 +2453,7 @@ const struct iommu_ops intel_iommu_ops =
>       .crash_shutdown = vtd_crash_shutdown,
>       .iotlb_flush = intel_iommu_iotlb_flush,
>       .iotlb_flush_all = intel_iommu_iotlb_flush_all,
> +    .dump_p2m_table = vtd_dump_p2m_table,
>   };
>
>   /*
> diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/vtd/iommu.h
> --- a/xen/drivers/passthrough/vtd/iommu.h	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.h	Fri Aug 10 08:19:58 2012 -0700
> @@ -248,6 +248,8 @@ struct context_entry {
>   #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
>   #define address_level_offset(addr, level) \
>               ((addr>>  level_to_offset_bits(level))&  LEVEL_MASK)
> +#define offset_level_address(offset, level) \
> +            ((u64)(offset)<<  level_to_offset_bits(level))
>   #define level_mask(l) (((u64)(-1))<<  level_to_offset_bits(l))
>   #define level_size(l) (1<<  level_to_offset_bits(l))
>   #define align_to_level(addr, l) ((addr + level_size(l) - 1)&  level_mask(l))
> @@ -277,6 +279,9 @@ struct dma_pte {
>   #define dma_set_pte_addr(p, addr) do {\
>               (p).val |= ((addr)&  PAGE_MASK_4K); } while (0)
>   #define dma_pte_present(p) (((p).val&  3) != 0)
> +#define dma_pte_superpage(p) (((p).val&  (1<<7)) != 0)
> +#define dma_pte_read(p) (((p).val&  DMA_PTE_READ) != 0)
> +#define dma_pte_write(p) (((p).val&  DMA_PTE_WRITE) != 0)
>
>   /* interrupt remap entry */
>   struct iremap_entry {
> diff -r 472fc515a463 -r 9c7609a4fbc1 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Fri Aug 10 08:19:58 2012 -0700
> @@ -38,6 +38,10 @@
>   #define PTE_PER_TABLE_ALLOC(entries)	\
>   	PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries)>>  PTE_PER_TABLE_SHIFT)
>
> +#define amd_offset_level_address(offset, level) \
> +      	((u64)(offset)<<  ((PTE_PER_TABLE_SHIFT * \
> +                             (level - IOMMU_PAGING_MODE_LEVEL_1))))
> +
>   #define PCI_MIN_CAP_OFFSET	0x40
>   #define PCI_MAX_CAP_BLOCKS	48
>   #define PCI_CAP_PTR_MASK	0xFC
> diff -r 472fc515a463 -r 9c7609a4fbc1 xen/include/xen/iommu.h
> --- a/xen/include/xen/iommu.h	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/include/xen/iommu.h	Fri Aug 10 08:19:58 2012 -0700
> @@ -141,6 +141,7 @@ struct iommu_ops {
>       void (*crash_shutdown)(void);
>       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
>       void (*iotlb_flush_all)(struct domain *d);
> +    void (*dump_p2m_table)(struct domain *d);
>   };
>
>   void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10 19:14 Santosh Jodh
@ 2012-08-13  8:59 ` Jan Beulich
  2012-08-14 19:34   ` Santosh Jodh
  2012-08-13 10:31 ` Wei Wang
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2012-08-13  8:59 UTC (permalink / raw)
  To: Santosh Jodh; +Cc: wei.wang2, tim, xiantao.zhang, xen-devel

>>> On 10.08.12 at 21:14, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Aug 10 08:19:58 2012 -0700
> @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u
>  
>  #include <asm/io_apic.h>
>  
> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, 
> +                                     paddr_t gpa, int indent)
> +{
> +    paddr_t address;
> +    void *table_vaddr, *pde;
> +    paddr_t next_table_maddr;
> +    int index, next_level, present;
> +    u32 *entry;
> +
> +    if ( level < 1 )
> +        return;
> +
> +    table_vaddr = __map_domain_page(pg);
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %"PRIpaddr"\n", 
> +                page_to_maddr(pg));
> +        return;
> +    }
> +
> +    for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
> +    {
> +        if ( !(index % 2) )
> +            process_pending_softirqs();
> +
> +        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
> +        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
> +        entry = (u32*)pde;
> +
> +        present = get_field_from_reg_u32(entry[0],
> +                                         IOMMU_PDE_PRESENT_MASK,
> +                                         IOMMU_PDE_PRESENT_SHIFT);
> +
> +        if ( !present )
> +            continue;
> +
> +        next_level = get_field_from_reg_u32(entry[0],
> +                                            IOMMU_PDE_NEXT_LEVEL_MASK,
> +                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
> +
> +        address = gpa + amd_offset_level_address(index, level);
> +        if ( (next_table_maddr != 0) && (next_level != 0) )

Why do you do this differently than for VT-d here? There
you don't check next_table_maddr (and I see no reason you
would need to). Oh, I see, there's a similar check in a different
place there. But this needs to be functionally similar here then.
Specifically, ...

> +        {
> +            amd_dump_p2m_table_level(
> +                maddr_to_page(next_table_maddr), level - 1, 
> +                address, indent + 1);
> +        }
> +        else 

... you'd get into the else's body if next_table_maddr was zero,
which is wrong afaict. So I think flow like

    if ( next_level )
        print
    else if ( next_table_maddr )
        recurse

would be the preferable way to go if you feel that these zero
checks are necessary (and if you do then, because this being
the case is really a bug, this shouldn't go through silently).

> +        {
> +            int i;
> +
> +            for ( i = 0; i < indent; i++ )
> +                printk("  ");

printk("%*s...", indent, "", ...);

> +
> +            printk("gfn: %08lx  mfn: %08lx\n",
> +                   (unsigned long)PFN_DOWN(address), 
> +                   (unsigned long)PFN_DOWN(next_table_maddr));
> +        }
> +    }
> +
> +    unmap_domain_page(table_vaddr);
> +}
>...
> --- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.c	Fri Aug 10 08:19:58 2012 -0700
> @@ -2365,6 +2366,71 @@ static void vtd_resume(void)
>      }
>  }
>  
> +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, 
> +                                     int indent)
> +{
> +    paddr_t address;
> +    int i;
> +    struct dma_pte *pt_vaddr, *pte;
> +    int next_level;
> +
> +    if ( pt_maddr == 0 )
> +        return;
> +
> +    pt_vaddr = map_vtd_domain_page(pt_maddr);
> +    if ( pt_vaddr == NULL )
> +    {
> +        printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr);
> +        return;
> +    }
> +
> +    next_level = level - 1;
> +    for ( i = 0; i < PTE_NUM; i++ )
> +    {
> +        if ( !(i % 2) )
> +            process_pending_softirqs();
> +
> +        pte = &pt_vaddr[i];
> +        if ( !dma_pte_present(*pte) )
> +            continue;
> +
> +        address = gpa + offset_level_address(i, level);
> +        if ( next_level >= 1 ) 
> +        {
> +            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, 
> +                                     address, indent + 1);
> +        }
> +        else
> +        {
> +            int j;
> +
> +            for ( j = 0; j < indent; j++ )
> +                printk("  ");

See above.

Jan

> +
> +            printk("gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n",
> +                   (unsigned long)(address >> PAGE_SHIFT_4K),
> +                   (unsigned long)(pte->val >> PAGE_SHIFT_4K),
> +                   dma_pte_superpage(*pte)? 1 : 0,
> +                   dma_pte_read(*pte)? 1 : 0,
> +                   dma_pte_write(*pte)? 1 : 0);
> +        }
> +    }
> +
> +    unmap_vtd_domain_page(pt_vaddr);
> +}

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] dump_p2m_table: For IOMMU
@ 2012-08-10 19:14 Santosh Jodh
  2012-08-13  8:59 ` Jan Beulich
  2012-08-13 10:31 ` Wei Wang
  0 siblings, 2 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-10 19:14 UTC (permalink / raw)
  To: xen-devel; +Cc: wei.wang2, tim, xiantao.zhang

New key handler 'o' to dump the IOMMU p2m table for each domain.
Skips dumping table for domain0.
Intel and AMD specific iommu_ops handler for dumping p2m table.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>

diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Aug 10 08:19:58 2012 -0700
@@ -22,6 +22,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/paging.h>
+#include <xen/softirq.h>
 #include <asm/hvm/iommu.h>
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
@@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u
 
 #include <asm/io_apic.h>
 
+static void amd_dump_p2m_table_level(struct page_info* pg, int level, 
+                                     paddr_t gpa, int indent)
+{
+    paddr_t address;
+    void *table_vaddr, *pde;
+    paddr_t next_table_maddr;
+    int index, next_level, present;
+    u32 *entry;
+
+    if ( level < 1 )
+        return;
+
+    table_vaddr = __map_domain_page(pg);
+    if ( table_vaddr == NULL )
+    {
+        printk("Failed to map IOMMU domain page %"PRIpaddr"\n", 
+                page_to_maddr(pg));
+        return;
+    }
+
+    for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
+    {
+        if ( !(index % 2) )
+            process_pending_softirqs();
+
+        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
+        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
+        entry = (u32*)pde;
+
+        present = get_field_from_reg_u32(entry[0],
+                                         IOMMU_PDE_PRESENT_MASK,
+                                         IOMMU_PDE_PRESENT_SHIFT);
+
+        if ( !present )
+            continue;
+
+        next_level = get_field_from_reg_u32(entry[0],
+                                            IOMMU_PDE_NEXT_LEVEL_MASK,
+                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
+
+        address = gpa + amd_offset_level_address(index, level);
+        if ( (next_table_maddr != 0) && (next_level != 0) )
+        {
+            amd_dump_p2m_table_level(
+                maddr_to_page(next_table_maddr), level - 1, 
+                address, indent + 1);
+        }
+        else 
+        {
+            int i;
+
+            for ( i = 0; i < indent; i++ )
+                printk("  ");
+
+            printk("gfn: %08lx  mfn: %08lx\n",
+                   (unsigned long)PFN_DOWN(address), 
+                   (unsigned long)PFN_DOWN(next_table_maddr));
+        }
+    }
+
+    unmap_domain_page(table_vaddr);
+}
+
+static void amd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+
+    if ( !hd->root_table ) 
+        return;
+
+    printk("p2m table has %d levels\n", hd->paging_mode);
+    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0);
+}
+
 const struct iommu_ops amd_iommu_ops = {
     .init = amd_iommu_domain_init,
     .dom0_init = amd_iommu_dom0_init,
@@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = {
     .resume = amd_iommu_resume,
     .share_p2m = amd_iommu_share_p2m,
     .crash_shutdown = amd_iommu_suspend,
+    .dump_p2m_table = amd_dump_p2m_table,
 };
diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/iommu.c	Fri Aug 10 08:19:58 2012 -0700
@@ -18,11 +18,13 @@
 #include <asm/hvm/iommu.h>
 #include <xen/paging.h>
 #include <xen/guest_access.h>
+#include <xen/keyhandler.h>
 #include <xen/softirq.h>
 #include <xsm/xsm.h>
 
 static void parse_iommu_param(char *s);
 static int iommu_populate_page_table(struct domain *d);
+static void iommu_dump_p2m_table(unsigned char key);
 
 /*
  * The 'iommu' parameter enables the IOMMU.  Optional comma separated
@@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in
 
 DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+static struct keyhandler iommu_p2m_table = {
+    .diagnostic = 0,
+    .u.fn = iommu_dump_p2m_table,
+    .desc = "dump iommu p2m table"
+};
+
 static void __init parse_iommu_param(char *s)
 {
     char *ss;
@@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai
     if ( !iommu_enabled )
         return;
 
+    register_keyhandler('o', &iommu_p2m_table);
     d->need_iommu = !!iommu_dom0_strict;
     if ( need_iommu(d) )
     {
@@ -654,6 +663,34 @@ int iommu_do_domctl(
     return ret;
 }
 
+static void iommu_dump_p2m_table(unsigned char key)
+{
+    struct domain *d;
+    const struct iommu_ops *ops;
+
+    if ( !iommu_enabled )
+    {
+        printk("IOMMU not enabled!\n");
+        return;
+    }
+
+    ops = iommu_get_ops();
+    for_each_domain(d)
+    {
+        if ( !d->domain_id )
+            continue;
+
+        if ( iommu_use_hap_pt(d) )
+        {
+            printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id);
+            continue;
+        }
+
+        printk("\ndomain%d IOMMU p2m table: \n", d->domain_id);
+        ops->dump_p2m_table(d);
+    }
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c	Fri Aug 10 08:19:58 2012 -0700
@@ -31,6 +31,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
+#include <xen/softirq.h>
 #include <asm/msi.h>
 #include <asm/irq.h>
 #if defined(__i386__) || defined(__x86_64__)
@@ -2365,6 +2366,71 @@ static void vtd_resume(void)
     }
 }
 
+static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, 
+                                     int indent)
+{
+    paddr_t address;
+    int i;
+    struct dma_pte *pt_vaddr, *pte;
+    int next_level;
+
+    if ( pt_maddr == 0 )
+        return;
+
+    pt_vaddr = map_vtd_domain_page(pt_maddr);
+    if ( pt_vaddr == NULL )
+    {
+        printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr);
+        return;
+    }
+
+    next_level = level - 1;
+    for ( i = 0; i < PTE_NUM; i++ )
+    {
+        if ( !(i % 2) )
+            process_pending_softirqs();
+
+        pte = &pt_vaddr[i];
+        if ( !dma_pte_present(*pte) )
+            continue;
+
+        address = gpa + offset_level_address(i, level);
+        if ( next_level >= 1 ) 
+        {
+            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, 
+                                     address, indent + 1);
+        }
+        else
+        {
+            int j;
+
+            for ( j = 0; j < indent; j++ )
+                printk("  ");
+
+            printk("gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n",
+                   (unsigned long)(address >> PAGE_SHIFT_4K),
+                   (unsigned long)(pte->val >> PAGE_SHIFT_4K),
+                   dma_pte_superpage(*pte)? 1 : 0,
+                   dma_pte_read(*pte)? 1 : 0,
+                   dma_pte_write(*pte)? 1 : 0);
+        }
+    }
+
+    unmap_vtd_domain_page(pt_vaddr);
+}
+
+static void vtd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd;
+
+    if ( list_empty(&acpi_drhd_units) )
+        return;
+
+    hd = domain_hvm_iommu(d);
+    printk("p2m table has %d levels\n", agaw_to_level(hd->agaw));
+    vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0);
+}
+
 const struct iommu_ops intel_iommu_ops = {
     .init = intel_iommu_domain_init,
     .dom0_init = intel_iommu_dom0_init,
@@ -2387,6 +2453,7 @@ const struct iommu_ops intel_iommu_ops =
     .crash_shutdown = vtd_crash_shutdown,
     .iotlb_flush = intel_iommu_iotlb_flush,
     .iotlb_flush_all = intel_iommu_iotlb_flush_all,
+    .dump_p2m_table = vtd_dump_p2m_table,
 };
 
 /*
diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h	Fri Aug 10 08:19:58 2012 -0700
@@ -248,6 +248,8 @@ struct context_entry {
 #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
 #define address_level_offset(addr, level) \
             ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
+#define offset_level_address(offset, level) \
+            ((u64)(offset) << level_to_offset_bits(level))
 #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
 #define level_size(l) (1 << level_to_offset_bits(l))
 #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
@@ -277,6 +279,9 @@ struct dma_pte {
 #define dma_set_pte_addr(p, addr) do {\
             (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
 #define dma_pte_present(p) (((p).val & 3) != 0)
+#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0)
+#define dma_pte_read(p) (((p).val & DMA_PTE_READ) != 0)
+#define dma_pte_write(p) (((p).val & DMA_PTE_WRITE) != 0)
 
 /* interrupt remap entry */
 struct iremap_entry {
diff -r 472fc515a463 -r 9c7609a4fbc1 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Fri Aug 10 08:19:58 2012 -0700
@@ -38,6 +38,10 @@
 #define PTE_PER_TABLE_ALLOC(entries)	\
 	PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT)
 
+#define amd_offset_level_address(offset, level) \
+      	((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \
+                             (level - IOMMU_PAGING_MODE_LEVEL_1))))
+
 #define PCI_MIN_CAP_OFFSET	0x40
 #define PCI_MAX_CAP_BLOCKS	48
 #define PCI_CAP_PTR_MASK	0xFC
diff -r 472fc515a463 -r 9c7609a4fbc1 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/include/xen/iommu.h	Fri Aug 10 08:19:58 2012 -0700
@@ -141,6 +141,7 @@ struct iommu_ops {
     void (*crash_shutdown)(void);
     void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
+    void (*dump_p2m_table)(struct domain *d);
 };
 
 void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10 13:41       ` Wei Wang
@ 2012-08-10 14:24         ` Jan Beulich
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Beulich @ 2012-08-10 14:24 UTC (permalink / raw)
  To: Wei Wang; +Cc: tim, xiantao.zhang, Santosh Jodh, xen-devel

>>> On 10.08.12 at 15:41, Wei Wang <wei.wang2@amd.com> wrote:
> On 08/10/2012 02:52 PM, Jan Beulich wrote:
>>>>> On 10.08.12 at 12:50, Wei Wang<wei.wang2@amd.com>  wrote:
>>> On 08/09/2012 09:26 AM, Jan Beulich wrote:
>>>> (And similar
>>>> to the issue Santosh has already fixed here - the original
>>>> function pointlessly maps/unmaps the page when "level<= 1".
>>>> Furthermore, iommu_map.c has nice helper functions
>>>> iommu_next_level() and amd_iommu_is_pte_present() - why
>>>> aren't they in a header instead, so they could be used here,
>>>> avoiding the open coding of them?)
>>>
>>> Maybe those helps appears after the original function. I could sent a
>>> patch to clean up these:
>>> * do not map/unmap if level<= 1
>>> * move amd_iommu_is_pte_present() and iommu_next_level() to a header
>>> file. and use them in deallocate_next_page_table.
>>> * Using next_level instead of recalculation (if requested)
>>
>> Yes, please. As to using next_level - it depends, besides the above,
>>   on how bad it is if this is really wrong; an ASSERT() or BUG_ON()
>> might be on order here.
> 
> How about ASSERT(next_level < = (level -1) )?

But that would again mean levels can be skipped. If they can,
then using next_level is the way to go, and the assertion is fine.
If they can't, the assertion should say ==.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10 12:52     ` Jan Beulich
@ 2012-08-10 13:41       ` Wei Wang
  2012-08-10 14:24         ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Wei Wang @ 2012-08-10 13:41 UTC (permalink / raw)
  To: Jan Beulich; +Cc: tim, xiantao.zhang, Santosh Jodh, xen-devel

On 08/10/2012 02:52 PM, Jan Beulich wrote:
>>>> On 10.08.12 at 12:50, Wei Wang<wei.wang2@amd.com>  wrote:
>> On 08/09/2012 09:26 AM, Jan Beulich wrote:
>>
>>> Wei - here I'm particularly worried about the use of "level - 1"
>>> instead of "next_level", which would similarly apply to the
>>> original function. If the way this is currently done is okay, then
>>> why is next_level being computed in the first place?
>>
>> I think that recalculation is to guarantee that this recursive function
>> returns. It should run at most "paging_mode" times no matter what
>> "next_level" says. But if we could assume that next level field in every
>> pde is correct, then using next level is fine to me.
>
> Especially in the dumping function we shouldn't assume too
> much. However, wasn't it that one can skip levels in your
> IOMMU implementation? That can't be handled correctly if
> always subtracting 1.

We have no skip levels yet. But since it checks (next_level != 0) before 
calling itself, it should not deallocate pages unexpectedly. But it will 
also waste some time in the loop. if next_level == 0 but level > 1 (e.g. 
we have only l4, l2, l1 tables). So, yes, now using next_level with 
ASSERT also looks better to me.

>
>>> (And similar
>>> to the issue Santosh has already fixed here - the original
>>> function pointlessly maps/unmaps the page when "level<= 1".
>>> Furthermore, iommu_map.c has nice helper functions
>>> iommu_next_level() and amd_iommu_is_pte_present() - why
>>> aren't they in a header instead, so they could be used here,
>>> avoiding the open coding of them?)
>>
>> Maybe those helps appears after the original function. I could sent a
>> patch to clean up these:
>> * do not map/unmap if level<= 1
>> * move amd_iommu_is_pte_present() and iommu_next_level() to a header
>> file. and use them in deallocate_next_page_table.
>> * Using next_level instead of recalculation (if requested)
>
> Yes, please. As to using next_level - it depends, besides the above,
>   on how bad it is if this is really wrong; an ASSERT() or BUG_ON()
> might be on order here.

How about ASSERT(next_level < = (level -1) )?

> Jan
>
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-10 10:50   ` Wei Wang
@ 2012-08-10 12:52     ` Jan Beulich
  2012-08-10 13:41       ` Wei Wang
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2012-08-10 12:52 UTC (permalink / raw)
  To: Wei Wang; +Cc: tim, xiantao.zhang, Santosh Jodh, xen-devel

>>> On 10.08.12 at 12:50, Wei Wang <wei.wang2@amd.com> wrote:
> On 08/09/2012 09:26 AM, Jan Beulich wrote:
> 
>> Wei - here I'm particularly worried about the use of "level - 1"
>> instead of "next_level", which would similarly apply to the
>> original function. If the way this is currently done is okay, then
>> why is next_level being computed in the first place?
> 
> I think that recalculation is to guarantee that this recursive function 
> returns. It should run at most "paging_mode" times no matter what 
> "next_level" says. But if we could assume that next level field in every 
> pde is correct, then using next level is fine to me.

Especially in the dumping function we shouldn't assume too
much. However, wasn't it that one can skip levels in your
IOMMU implementation? That can't be handled correctly if
always subtracting 1.

>> (And similar
>> to the issue Santosh has already fixed here - the original
>> function pointlessly maps/unmaps the page when "level<= 1".
>> Furthermore, iommu_map.c has nice helper functions
>> iommu_next_level() and amd_iommu_is_pte_present() - why
>> aren't they in a header instead, so they could be used here,
>> avoiding the open coding of them?)
> 
> Maybe those helps appears after the original function. I could sent a 
> patch to clean up these:
> * do not map/unmap if level <= 1
> * move amd_iommu_is_pte_present() and iommu_next_level() to a header 
> file. and use them in deallocate_next_page_table.
> * Using next_level instead of recalculation (if requested)

Yes, please. As to using next_level - it depends, besides the above,
 on how bad it is if this is really wrong; an ASSERT() or BUG_ON()
might be on order here.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-09  7:26 ` Jan Beulich
  2012-08-10  1:41   ` Santosh Jodh
@ 2012-08-10 10:50   ` Wei Wang
  2012-08-10 12:52     ` Jan Beulich
  1 sibling, 1 reply; 24+ messages in thread
From: Wei Wang @ 2012-08-10 10:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: tim, xiantao.zhang, Santosh Jodh, xen-devel

On 08/09/2012 09:26 AM, Jan Beulich wrote:

> Wei - here I'm particularly worried about the use of "level - 1"
> instead of "next_level", which would similarly apply to the
> original function. If the way this is currently done is okay, then
> why is next_level being computed in the first place?

I think that recalculation is to guarantee that this recursive function 
returns. It should run at most "paging_mode" times no matter what 
"next_level" says. But if we could assume that next level field in every 
pde is correct, then using next level is fine to me.

(And similar
> to the issue Santosh has already fixed here - the original
> function pointlessly maps/unmaps the page when "level<= 1".
> Furthermore, iommu_map.c has nice helper functions
> iommu_next_level() and amd_iommu_is_pte_present() - why
> aren't they in a header instead, so they could be used here,
> avoiding the open coding of them?)

Maybe those helps appears after the original function. I could sent a 
patch to clean up these:
* do not map/unmap if level <= 1
* move amd_iommu_is_pte_present() and iommu_next_level() to a header 
file. and use them in deallocate_next_page_table.
* Using next_level instead of recalculation (if requested)

Thanks,
Wei

>> +        }
>> +
>> +        if ( present )
>> +        {
>> +            printk("gfn: %016"PRIx64"  mfn: %016"PRIx64"\n",
>> +                   address>>  PAGE_SHIFT, next_table_maddr>>  PAGE_SHIFT);
>
> I'd prefer you to use PFN_DOWN() here.
>
> Also, depth first, as requested by Tim, to me doesn't mean
> recursing before printing. I think you really want to print first,
> then recurse. Otherwise how would be output be made sense
> of?
>
>> +        }
>> +    }
>> +
>> +    unmap_domain_page(table_vaddr);
>> +}
>> ...
>> --- a/xen/drivers/passthrough/iommu.c	Tue Aug 07 18:37:31 2012 +0100
>> +++ b/xen/drivers/passthrough/iommu.c	Wed Aug 08 09:56:50 2012 -0700
>> @@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in
>>
>>   DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
>>
>> +void setup_iommu_dump(void);
>> +
>
> This is completely bogus. If the function was called from another
> source file, the declaration would belong into a header file. Since
> it's only used here, it ought to be static.
>
>>   static void __init parse_iommu_param(char *s)
>>   {
>>       char *ss;
>> @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai
>>       if ( !iommu_enabled )
>>           return;
>>
>> +    setup_iommu_dump();
>>       d->need_iommu = !!iommu_dom0_strict;
>>       if ( need_iommu(d) )
>>       {
>> ...
>> +void __init setup_iommu_dump(void)
>> +{
>> +    register_keyhandler('o',&iommu_p2m_table);
>> +}
>
> Furthermore, there's no real need for a separate function here
> anyway. Just call register_key_handler() directly. Or
> alternatively this ought to match other code doing the same -
> using an initcall.
>
>> --- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
>> +++ b/xen/drivers/passthrough/vtd/iommu.c	Wed Aug 08 09:56:50 2012 -0700
>> +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa)
>> +{
>> +    u64 address;
>
> Again, both gpa and address ought to be paddr_t, and the format
> specifiers should match.
>
>> +    int i;
>> +    struct dma_pte *pt_vaddr, *pte;
>> +    int next_level;
>> +
>> +    if ( pt_maddr == 0 )
>> +        return;
>> +
>> +    pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
>
> Pointless cast.
>
>> +    if ( pt_vaddr == NULL )
>> +    {
>> +        printk("Failed to map VT-D domain page %016"PRIx64"\n", pt_maddr);
>> +        return;
>> +    }
>> +
>> +    next_level = level - 1;
>> +    for ( i = 0; i<  PTE_NUM; i++ )
>> +    {
>> +        if ( !(i % 2) )
>> +            process_pending_softirqs();
>> +
>> +        pte =&pt_vaddr[i];
>> +        if ( !dma_pte_present(*pte) )
>> +            continue;
>> +
>> +        address = gpa + offset_level_address(i, level);
>> +        if ( next_level>= 1 )
>> +            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address);
>> +
>> +        if ( level == 1 )
>> +            printk("gfn: %016"PRIx64" mfn: %016"PRIx64" superpage=%d\n",
>> +                    address>>  PAGE_SHIFT_4K, pte->val>>  PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0);
>
> Why do you print leaf (level 1) tables here only?
>
> And the last line certainly is above 80 chars, so needs breaking up.
>
> (Also, just to avoid you needing to do another iteration: Don't
> switch to PFN_DOWN() here.)
>
> I further wonder whether "superpage" alone is enough - don't we
> have both 2M and 1G pages? Of course, that would become mute
> if higher levels got also dumped (as then this knowledge is implicit).
>
> Which reminds me to ask that both here and in the AMD code the
> recursion level should probably be reflected by indenting the
> printed strings.
>
> Jan
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-09  7:26 ` Jan Beulich
@ 2012-08-10  1:41   ` Santosh Jodh
  2012-08-10 10:50   ` Wei Wang
  1 sibling, 0 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-10  1:41 UTC (permalink / raw)
  To: Jan Beulich, wei.wang2; +Cc: Tim (Xen.org), xiantao.zhang, xen-devel

Thanks for the detailed feedback. Please see inline:


> +    int index, next_level, present;
> +    u32 *entry;
> +
> +    if ( level <= 1 )
> +        return;
> +
> +    table_vaddr = __map_domain_page(pg);
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %016"PRIx64"\n", 
> + page_to_maddr(pg));

We specifically have PRIpaddr for this purpose.
[Santosh Jodh] Ah - one more format specifier :-). Will change it.

> +        }
> +
> +        if ( present )
> +        {
> +            printk("gfn: %016"PRIx64"  mfn: %016"PRIx64"\n",
> +                   address >> PAGE_SHIFT, next_table_maddr >> 
> + PAGE_SHIFT);

I'd prefer you to use PFN_DOWN() here.
[Santosh Jodh] ok.

Also, depth first, as requested by Tim, to me doesn't mean recursing before printing. I think you really want to print first, then recurse. Otherwise how would be output be made sense of?
[Santosh Jodh] it gives a simple gfp -> mfn map. With nested printing, you get the PD and PT addresses printed - which are not super useful. Anyway, I will change it.

>  static void __init parse_iommu_param(char *s)  {
>      char *ss;
> @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai
>      if ( !iommu_enabled )
>          return;
>  
> +    setup_iommu_dump();
>      d->need_iommu = !!iommu_dom0_strict;
>      if ( need_iommu(d) )
>      {
>...
> +void __init setup_iommu_dump(void)
> +{
> +    register_keyhandler('o', &iommu_p2m_table); }

Furthermore, there's no real need for a separate function here anyway. Just call register_key_handler() directly. Or alternatively this ought to match other code doing the same - using an initcall.
[Santosh Jodh] will just call register_key_handler. I had to rearrange code in the file to avoid forward declarations. So much for trying to contain my changes to the end of the file :-)

> --- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.c	Wed Aug 08 09:56:50 2012 -0700
> +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 
> +gpa) {
> +    u64 address;

Again, both gpa and address ought to be paddr_t, and the format specifiers should match.
[Santosh Jodh] will do.

> +    int i;
> +    struct dma_pte *pt_vaddr, *pte;
> +    int next_level;
> +
> +    if ( pt_maddr == 0 )
> +        return;
> +
> +    pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);

Pointless cast.
[Santosh Jodh] yep - gone.

> +    if ( pt_vaddr == NULL )
> +    {
> +        printk("Failed to map VT-D domain page %016"PRIx64"\n", pt_maddr);
> +        return;
> +    }
> +
> +    next_level = level - 1;
> +    for ( i = 0; i < PTE_NUM; i++ )
> +    {
> +        if ( !(i % 2) )
> +            process_pending_softirqs();
> +
> +        pte = &pt_vaddr[i];
> +        if ( !dma_pte_present(*pte) )
> +            continue;
> +        
> +        address = gpa + offset_level_address(i, level);
> +        if ( next_level >= 1 )
> +            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, 
> + address);
> +
> +        if ( level == 1 )
> +            printk("gfn: %016"PRIx64" mfn: %016"PRIx64" superpage=%d\n", 
> +                    address >> PAGE_SHIFT_4K, pte->val >> 
> + PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0);

Why do you print leaf (level 1) tables here only?
[Santosh Jodh] as I said above - I was dumping gfn -> mfn. I will make it indent and print all levels.

And the last line certainly is above 80 chars, so needs breaking up.
[Santosh Jodh] yep - done.

(Also, just to avoid you needing to do another iteration: Don't switch to PFN_DOWN() here.)
[Santosh Jodh] ok

I further wonder whether "superpage" alone is enough - don't we have both 2M and 1G pages? Of course, that would become mute if higher levels got also dumped (as then this knowledge is implicit).

Which reminds me to ask that both here and in the AMD code the recursion level should probably be reflected by indenting the printed strings.
[Santosh Jodh] will print indented and all levels.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-08 17:17 Santosh Jodh
@ 2012-08-09  7:26 ` Jan Beulich
  2012-08-10  1:41   ` Santosh Jodh
  2012-08-10 10:50   ` Wei Wang
  0 siblings, 2 replies; 24+ messages in thread
From: Jan Beulich @ 2012-08-09  7:26 UTC (permalink / raw)
  To: wei.wang2, Santosh Jodh; +Cc: tim, xiantao.zhang, xen-devel

>>> On 08.08.12 at 19:17, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> New key handler 'o' to dump the IOMMU p2m table for each domain.
> Skips dumping table for domain0.
> Intel and AMD specific iommu_ops handler for dumping p2m table.

I'm sorry Santosh, but this is still not quite right.

> @@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u
>  
>  #include <asm/io_apic.h>
>  
> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa)

static void amd_dump_p2m_table_level(struct page_info *pg, int level, paddr_t gpa)

> +{
> +    u64 address;

paddr_t

> +    void *table_vaddr, *pde;
> +    u64 next_table_maddr;

This ought to also be paddr_t, but I realize that the whole AMD
IOMMU code is using u64 instead. So here it's probably okay to
remain u64.

> +    int index, next_level, present;
> +    u32 *entry;
> +
> +    if ( level <= 1 )
> +        return;
> +
> +    table_vaddr = __map_domain_page(pg);
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %016"PRIx64"\n", page_to_maddr(pg));

We specifically have PRIpaddr for this purpose.

> +        return;
> +    }
> +
> +    for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
> +    {
> +        if ( !(index % 2) )
> +            process_pending_softirqs();
> +
> +        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
> +        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
> +        entry = (u32*)pde;
> +
> +        next_level = get_field_from_reg_u32(entry[0],
> +                                            IOMMU_PDE_NEXT_LEVEL_MASK,
> +                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
> +
> +        present = get_field_from_reg_u32(entry[0],
> +                                         IOMMU_PDE_PRESENT_MASK,
> +                                         IOMMU_PDE_PRESENT_SHIFT);
> +
> +        address = gpa + amd_offset_level_address(index, level);
> +        if ( (next_table_maddr != 0) && (next_level != 0)
> +            && present )
> +        {
> +            amd_dump_p2m_table_level(
> +                maddr_to_page(next_table_maddr), level - 1, address);

I guess I see now that you started by cloning
deallocate_next_page_table(), which has almost all the same
issues I have been complaining about here.

Wei - here I'm particularly worried about the use of "level - 1"
instead of "next_level", which would similarly apply to the
original function. If the way this is currently done is okay, then
why is next_level being computed in the first place? (And similar
to the issue Santosh has already fixed here - the original
function pointlessly maps/unmaps the page when "level <= 1".
Furthermore, iommu_map.c has nice helper functions
iommu_next_level() and amd_iommu_is_pte_present() - why
aren't they in a header instead, so they could be used here,
avoiding the open coding of them?)

> +        }
> +
> +        if ( present )
> +        {
> +            printk("gfn: %016"PRIx64"  mfn: %016"PRIx64"\n",
> +                   address >> PAGE_SHIFT, next_table_maddr >> PAGE_SHIFT);

I'd prefer you to use PFN_DOWN() here.

Also, depth first, as requested by Tim, to me doesn't mean
recursing before printing. I think you really want to print first,
then recurse. Otherwise how would be output be made sense
of?

> +        }
> +    }
> +
> +    unmap_domain_page(table_vaddr);
> +}
>...
> --- a/xen/drivers/passthrough/iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/iommu.c	Wed Aug 08 09:56:50 2012 -0700
> @@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in
>  
>  DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
>  
> +void setup_iommu_dump(void);
> +

This is completely bogus. If the function was called from another
source file, the declaration would belong into a header file. Since
it's only used here, it ought to be static.

>  static void __init parse_iommu_param(char *s)
>  {
>      char *ss;
> @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai
>      if ( !iommu_enabled )
>          return;
>  
> +    setup_iommu_dump();
>      d->need_iommu = !!iommu_dom0_strict;
>      if ( need_iommu(d) )
>      {
>...
> +void __init setup_iommu_dump(void)
> +{
> +    register_keyhandler('o', &iommu_p2m_table);
> +}

Furthermore, there's no real need for a separate function here
anyway. Just call register_key_handler() directly. Or
alternatively this ought to match other code doing the same -
using an initcall.

> --- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.c	Wed Aug 08 09:56:50 2012 -0700
> +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa)
> +{
> +    u64 address;

Again, both gpa and address ought to be paddr_t, and the format
specifiers should match.

> +    int i;
> +    struct dma_pte *pt_vaddr, *pte;
> +    int next_level;
> +
> +    if ( pt_maddr == 0 )
> +        return;
> +
> +    pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);

Pointless cast.

> +    if ( pt_vaddr == NULL )
> +    {
> +        printk("Failed to map VT-D domain page %016"PRIx64"\n", pt_maddr);
> +        return;
> +    }
> +
> +    next_level = level - 1;
> +    for ( i = 0; i < PTE_NUM; i++ )
> +    {
> +        if ( !(i % 2) )
> +            process_pending_softirqs();
> +
> +        pte = &pt_vaddr[i];
> +        if ( !dma_pte_present(*pte) )
> +            continue;
> +        
> +        address = gpa + offset_level_address(i, level);
> +        if ( next_level >= 1 )
> +            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address);
> +
> +        if ( level == 1 )
> +            printk("gfn: %016"PRIx64" mfn: %016"PRIx64" superpage=%d\n", 
> +                    address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0);

Why do you print leaf (level 1) tables here only?

And the last line certainly is above 80 chars, so needs breaking up.

(Also, just to avoid you needing to do another iteration: Don't
switch to PFN_DOWN() here.)

I further wonder whether "superpage" alone is enough - don't we
have both 2M and 1G pages? Of course, that would become mute
if higher levels got also dumped (as then this knowledge is implicit).

Which reminds me to ask that both here and in the AMD code the
recursion level should probably be reflected by indenting the
printed strings.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] dump_p2m_table: For IOMMU
@ 2012-08-08 17:17 Santosh Jodh
  2012-08-09  7:26 ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Santosh Jodh @ 2012-08-08 17:17 UTC (permalink / raw)
  To: xen-devel; +Cc: wei.wang2, tim, xiantao.zhang

New key handler 'o' to dump the IOMMU p2m table for each domain.
Skips dumping table for domain0.
Intel and AMD specific iommu_ops handler for dumping p2m table.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>

diff -r 472fc515a463 -r 8deb7c7a25c4 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Wed Aug 08 09:56:50 2012 -0700
@@ -22,6 +22,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/paging.h>
+#include <xen/softirq.h>
 #include <asm/hvm/iommu.h>
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
@@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u
 
 #include <asm/io_apic.h>
 
+static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa)
+{
+    u64 address;
+    void *table_vaddr, *pde;
+    u64 next_table_maddr;
+    int index, next_level, present;
+    u32 *entry;
+
+    if ( level <= 1 )
+        return;
+
+    table_vaddr = __map_domain_page(pg);
+    if ( table_vaddr == NULL )
+    {
+        printk("Failed to map IOMMU domain page %016"PRIx64"\n", page_to_maddr(pg));
+        return;
+    }
+
+    for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
+    {
+        if ( !(index % 2) )
+            process_pending_softirqs();
+
+        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
+        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
+        entry = (u32*)pde;
+
+        next_level = get_field_from_reg_u32(entry[0],
+                                            IOMMU_PDE_NEXT_LEVEL_MASK,
+                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
+
+        present = get_field_from_reg_u32(entry[0],
+                                         IOMMU_PDE_PRESENT_MASK,
+                                         IOMMU_PDE_PRESENT_SHIFT);
+
+        address = gpa + amd_offset_level_address(index, level);
+        if ( (next_table_maddr != 0) && (next_level != 0)
+            && present )
+        {
+            amd_dump_p2m_table_level(
+                maddr_to_page(next_table_maddr), level - 1, address);
+        }
+
+        if ( present )
+        {
+            printk("gfn: %016"PRIx64"  mfn: %016"PRIx64"\n",
+                   address >> PAGE_SHIFT, next_table_maddr >> PAGE_SHIFT);
+        }
+    }
+
+    unmap_domain_page(table_vaddr);
+}
+
+static void amd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+
+    if ( !hd->root_table ) 
+        return;
+
+    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0);
+}
+
 const struct iommu_ops amd_iommu_ops = {
     .init = amd_iommu_domain_init,
     .dom0_init = amd_iommu_dom0_init,
@@ -531,4 +595,5 @@ const struct iommu_ops amd_iommu_ops = {
     .resume = amd_iommu_resume,
     .share_p2m = amd_iommu_share_p2m,
     .crash_shutdown = amd_iommu_suspend,
+    .dump_p2m_table = amd_dump_p2m_table,
 };
diff -r 472fc515a463 -r 8deb7c7a25c4 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/iommu.c	Wed Aug 08 09:56:50 2012 -0700
@@ -18,6 +18,7 @@
 #include <asm/hvm/iommu.h>
 #include <xen/paging.h>
 #include <xen/guest_access.h>
+#include <xen/keyhandler.h>
 #include <xen/softirq.h>
 #include <xsm/xsm.h>
 
@@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in
 
 DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+void setup_iommu_dump(void);
+
 static void __init parse_iommu_param(char *s)
 {
     char *ss;
@@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai
     if ( !iommu_enabled )
         return;
 
+    setup_iommu_dump();
     d->need_iommu = !!iommu_dom0_strict;
     if ( need_iommu(d) )
     {
@@ -654,6 +658,46 @@ int iommu_do_domctl(
     return ret;
 }
 
+static void iommu_dump_p2m_table(unsigned char key)
+{
+    struct domain *d;
+    const struct iommu_ops *ops;
+
+    if ( !iommu_enabled )
+    {
+        printk("IOMMU not enabled!\n");
+        return;
+    }
+
+    ops = iommu_get_ops();
+    for_each_domain(d)
+    {
+        if ( !d->domain_id )
+            continue;
+
+        if ( iommu_use_hap_pt(d) )
+        {
+            printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id);
+            continue;
+        }
+
+        printk("\ndomain%d IOMMU p2m table: \n", d->domain_id);
+        ops->dump_p2m_table(d);
+    }
+}
+
+static struct keyhandler iommu_p2m_table = {
+    .diagnostic = 0,
+    .u.fn = iommu_dump_p2m_table,
+    .desc = "dump iommu p2m table"
+};
+
+void __init setup_iommu_dump(void)
+{
+    register_keyhandler('o', &iommu_p2m_table);
+}
+
+
 /*
  * Local variables:
  * mode: C
diff -r 472fc515a463 -r 8deb7c7a25c4 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c	Wed Aug 08 09:56:50 2012 -0700
@@ -31,6 +31,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
+#include <xen/softirq.h>
 #include <asm/msi.h>
 #include <asm/irq.h>
 #if defined(__i386__) || defined(__x86_64__)
@@ -2365,6 +2366,56 @@ static void vtd_resume(void)
     }
 }
 
+static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa)
+{
+    u64 address;
+    int i;
+    struct dma_pte *pt_vaddr, *pte;
+    int next_level;
+
+    if ( pt_maddr == 0 )
+        return;
+
+    pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
+    if ( pt_vaddr == NULL )
+    {
+        printk("Failed to map VT-D domain page %016"PRIx64"\n", pt_maddr);
+        return;
+    }
+
+    next_level = level - 1;
+    for ( i = 0; i < PTE_NUM; i++ )
+    {
+        if ( !(i % 2) )
+            process_pending_softirqs();
+
+        pte = &pt_vaddr[i];
+        if ( !dma_pte_present(*pte) )
+            continue;
+        
+        address = gpa + offset_level_address(i, level);
+        if ( next_level >= 1 )
+            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address);
+
+        if ( level == 1 )
+            printk("gfn: %016"PRIx64" mfn: %016"PRIx64" superpage=%d\n", 
+                    address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0);
+    }
+
+    unmap_vtd_domain_page(pt_vaddr);
+}
+
+static void vtd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd;
+
+    if ( list_empty(&acpi_drhd_units) )
+        return;
+
+    hd = domain_hvm_iommu(d);
+    vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0);
+}
+
 const struct iommu_ops intel_iommu_ops = {
     .init = intel_iommu_domain_init,
     .dom0_init = intel_iommu_dom0_init,
@@ -2387,6 +2438,7 @@ const struct iommu_ops intel_iommu_ops =
     .crash_shutdown = vtd_crash_shutdown,
     .iotlb_flush = intel_iommu_iotlb_flush,
     .iotlb_flush_all = intel_iommu_iotlb_flush_all,
+    .dump_p2m_table = vtd_dump_p2m_table,
 };
 
 /*
diff -r 472fc515a463 -r 8deb7c7a25c4 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h	Wed Aug 08 09:56:50 2012 -0700
@@ -248,6 +248,8 @@ struct context_entry {
 #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
 #define address_level_offset(addr, level) \
             ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
+#define offset_level_address(offset, level) \
+            ((u64)(offset) << level_to_offset_bits(level))
 #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
 #define level_size(l) (1 << level_to_offset_bits(l))
 #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
@@ -277,6 +279,7 @@ struct dma_pte {
 #define dma_set_pte_addr(p, addr) do {\
             (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
 #define dma_pte_present(p) (((p).val & 3) != 0)
+#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0)
 
 /* interrupt remap entry */
 struct iremap_entry {
diff -r 472fc515a463 -r 8deb7c7a25c4 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Wed Aug 08 09:56:50 2012 -0700
@@ -38,6 +38,10 @@
 #define PTE_PER_TABLE_ALLOC(entries)	\
 	PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT)
 
+#define amd_offset_level_address(offset, level) \
+      	((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \
+                             (level - IOMMU_PAGING_MODE_LEVEL_1))))
+
 #define PCI_MIN_CAP_OFFSET	0x40
 #define PCI_MAX_CAP_BLOCKS	48
 #define PCI_CAP_PTR_MASK	0xFC
diff -r 472fc515a463 -r 8deb7c7a25c4 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/include/xen/iommu.h	Wed Aug 08 09:56:50 2012 -0700
@@ -141,6 +141,7 @@ struct iommu_ops {
     void (*crash_shutdown)(void);
     void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
+    void (*dump_p2m_table)(struct domain *d);
 };
 
 void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-08 16:21 ` Jan Beulich
@ 2012-08-08 17:17   ` Santosh Jodh
  0 siblings, 0 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-08 17:17 UTC (permalink / raw)
  To: Jan Beulich; +Cc: wei.wang2, Tim (Xen.org), xiantao.zhang, xen-devel

Thanks Jan. No - it does not compile on x86_32. Who runs x86_32 xen anyway? Just kidding...

BTW, I hate printf format specifiers...

Thanks,
Santosh

-----Original Message-----
From: Jan Beulich [mailto:JBeulich@suse.com] 
Sent: Wednesday, August 08, 2012 9:21 AM
To: Santosh Jodh
Cc: wei.wang2@amd.com; xiantao.zhang@intel.com; xen-devel; Tim (Xen.org)
Subject: Re: [Xen-devel] [PATCH] dump_p2m_table: For IOMMU

>>> On 08.08.12 at 17:56, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %-16lx\n", 
> + page_to_maddr(pg));

Does that build on x86-32? Similar format specifier problems appear to be present elsewhere in the patch.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-08 15:56 Santosh Jodh
@ 2012-08-08 16:21 ` Jan Beulich
  2012-08-08 17:17   ` Santosh Jodh
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2012-08-08 16:21 UTC (permalink / raw)
  To: Santosh Jodh; +Cc: wei.wang2, tim, xiantao.zhang, xen-devel

>>> On 08.08.12 at 17:56, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg));

Does that build on x86-32? Similar format specifier problems
appear to be present elsewhere in the patch.

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] dump_p2m_table: For IOMMU
@ 2012-08-08 15:56 Santosh Jodh
  2012-08-08 16:21 ` Jan Beulich
  0 siblings, 1 reply; 24+ messages in thread
From: Santosh Jodh @ 2012-08-08 15:56 UTC (permalink / raw)
  To: xen-devel; +Cc: wei.wang2, tim, xiantao.zhang

New key handler 'o' to dump the IOMMU p2m table for each domain.
Skips dumping table for domain0.
Intel and AMD specific iommu_ops handler for dumping p2m table.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>

diff -r 472fc515a463 -r 68946c7e1d67 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Wed Aug 08 08:55:49 2012 -0700
@@ -22,6 +22,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/paging.h>
+#include <xen/softirq.h>
 #include <asm/hvm/iommu.h>
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
@@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u
 
 #include <asm/io_apic.h>
 
+static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa)
+{
+    u64 address;
+    void *table_vaddr, *pde;
+    u64 next_table_maddr;
+    int index, next_level, present;
+    u32 *entry;
+
+    if ( level <= 1 )
+        return;
+
+    table_vaddr = __map_domain_page(pg);
+    if ( table_vaddr == NULL )
+    {
+        printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg));
+        return;
+    }
+
+    for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
+    {
+        if ( !(index % 2) )
+            process_pending_softirqs();
+
+        pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
+        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
+        entry = (u32*)pde;
+
+        next_level = get_field_from_reg_u32(entry[0],
+                                            IOMMU_PDE_NEXT_LEVEL_MASK,
+                                            IOMMU_PDE_NEXT_LEVEL_SHIFT);
+
+        present = get_field_from_reg_u32(entry[0],
+                                         IOMMU_PDE_PRESENT_MASK,
+                                         IOMMU_PDE_PRESENT_SHIFT);
+
+        address = gpa + amd_offset_level_address(index, level);
+        if ( (next_table_maddr != 0) && (next_level != 0)
+            && present )
+        {
+            amd_dump_p2m_table_level(
+                maddr_to_page(next_table_maddr), level - 1, address);
+        }
+
+        if ( present )
+        {
+            printk("gfn: %-16lx  mfn: %-16lx\n",
+                   address, next_table_maddr);
+        }
+    }
+
+    unmap_domain_page(table_vaddr);
+}
+
+static void amd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+
+    if ( !hd->root_table ) 
+        return;
+
+    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0);
+}
+
 const struct iommu_ops amd_iommu_ops = {
     .init = amd_iommu_domain_init,
     .dom0_init = amd_iommu_dom0_init,
@@ -531,4 +595,5 @@ const struct iommu_ops amd_iommu_ops = {
     .resume = amd_iommu_resume,
     .share_p2m = amd_iommu_share_p2m,
     .crash_shutdown = amd_iommu_suspend,
+    .dump_p2m_table = amd_dump_p2m_table,
 };
diff -r 472fc515a463 -r 68946c7e1d67 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/iommu.c	Wed Aug 08 08:55:49 2012 -0700
@@ -18,6 +18,7 @@
 #include <asm/hvm/iommu.h>
 #include <xen/paging.h>
 #include <xen/guest_access.h>
+#include <xen/keyhandler.h>
 #include <xen/softirq.h>
 #include <xsm/xsm.h>
 
@@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in
 
 DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+void setup_iommu_dump(void);
+
 static void __init parse_iommu_param(char *s)
 {
     char *ss;
@@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai
     if ( !iommu_enabled )
         return;
 
+    setup_iommu_dump();
     d->need_iommu = !!iommu_dom0_strict;
     if ( need_iommu(d) )
     {
@@ -654,6 +658,46 @@ int iommu_do_domctl(
     return ret;
 }
 
+static void iommu_dump_p2m_table(unsigned char key)
+{
+    struct domain *d;
+    const struct iommu_ops *ops;
+
+    if ( !iommu_enabled )
+    {
+        printk("IOMMU not enabled!\n");
+        return;
+    }
+
+    ops = iommu_get_ops();
+    for_each_domain(d)
+    {
+        if ( !d->domain_id )
+            continue;
+
+        if ( iommu_use_hap_pt(d) )
+        {
+            printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id);
+            continue;
+        }
+
+        printk("\ndomain%d IOMMU p2m table: \n", d->domain_id);
+        ops->dump_p2m_table(d);
+    }
+}
+
+static struct keyhandler iommu_p2m_table = {
+    .diagnostic = 0,
+    .u.fn = iommu_dump_p2m_table,
+    .desc = "dump iommu p2m table"
+};
+
+void __init setup_iommu_dump(void)
+{
+    register_keyhandler('o', &iommu_p2m_table);
+}
+
+
 /*
  * Local variables:
  * mode: C
diff -r 472fc515a463 -r 68946c7e1d67 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c	Wed Aug 08 08:55:49 2012 -0700
@@ -31,6 +31,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
+#include <xen/softirq.h>
 #include <asm/msi.h>
 #include <asm/irq.h>
 #if defined(__i386__) || defined(__x86_64__)
@@ -2365,6 +2366,56 @@ static void vtd_resume(void)
     }
 }
 
+static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa)
+{
+    u64 address;
+    int i;
+    struct dma_pte *pt_vaddr, *pte;
+    int next_level;
+
+    if ( pt_maddr == 0 )
+        return;
+
+    pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
+    if ( pt_vaddr == NULL )
+    {
+        printk("Failed to map VT-D domain page %-16lx\n", pt_maddr);
+        return;
+    }
+
+    next_level = level - 1;
+    for ( i = 0; i < PTE_NUM; i++ )
+    {
+        if ( !(i % 2) )
+            process_pending_softirqs();
+
+        pte = &pt_vaddr[i];
+        if ( !dma_pte_present(*pte) )
+            continue;
+        
+        address = gpa + offset_level_address(i, level);
+        if ( next_level >= 1 )
+            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address);
+
+        if ( level == 1 )
+            printk("gfn: %-16lx mfn: %-16lx superpage=%d\n", 
+                    address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0);
+    }
+
+    unmap_vtd_domain_page(pt_vaddr);
+}
+
+static void vtd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd;
+
+    if ( list_empty(&acpi_drhd_units) )
+        return;
+
+    hd = domain_hvm_iommu(d);
+    vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0);
+}
+
 const struct iommu_ops intel_iommu_ops = {
     .init = intel_iommu_domain_init,
     .dom0_init = intel_iommu_dom0_init,
@@ -2387,6 +2438,7 @@ const struct iommu_ops intel_iommu_ops =
     .crash_shutdown = vtd_crash_shutdown,
     .iotlb_flush = intel_iommu_iotlb_flush,
     .iotlb_flush_all = intel_iommu_iotlb_flush_all,
+    .dump_p2m_table = vtd_dump_p2m_table,
 };
 
 /*
diff -r 472fc515a463 -r 68946c7e1d67 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h	Wed Aug 08 08:55:49 2012 -0700
@@ -248,6 +248,8 @@ struct context_entry {
 #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
 #define address_level_offset(addr, level) \
             ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
+#define offset_level_address(offset, level) \
+            ((u64)(offset) << level_to_offset_bits(level))
 #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
 #define level_size(l) (1 << level_to_offset_bits(l))
 #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
@@ -277,6 +279,7 @@ struct dma_pte {
 #define dma_set_pte_addr(p, addr) do {\
             (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
 #define dma_pte_present(p) (((p).val & 3) != 0)
+#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0)
 
 /* interrupt remap entry */
 struct iremap_entry {
diff -r 472fc515a463 -r 68946c7e1d67 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Wed Aug 08 08:55:49 2012 -0700
@@ -38,6 +38,10 @@
 #define PTE_PER_TABLE_ALLOC(entries)	\
 	PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT)
 
+#define amd_offset_level_address(offset, level) \
+      	((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \
+                             (level - IOMMU_PAGING_MODE_LEVEL_1))))
+
 #define PCI_MIN_CAP_OFFSET	0x40
 #define PCI_MAX_CAP_BLOCKS	48
 #define PCI_CAP_PTR_MASK	0xFC
diff -r 472fc515a463 -r 68946c7e1d67 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h	Tue Aug 07 18:37:31 2012 +0100
+++ b/xen/include/xen/iommu.h	Wed Aug 08 08:55:49 2012 -0700
@@ -141,6 +141,7 @@ struct iommu_ops {
     void (*crash_shutdown)(void);
     void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
+    void (*dump_p2m_table)(struct domain *d);
 };
 
 void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-08  7:31 ` Jan Beulich
@ 2012-08-08 15:32   ` Santosh Jodh
  0 siblings, 0 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-08 15:32 UTC (permalink / raw)
  To: Jan Beulich; +Cc: wei.wang2, Tim (Xen.org), xiantao.zhang, xen-devel

Thanks for catching that. I did my work in the xenserver tree (which is based off 4.1) which did not have that handler. I will look for the next free letter and resend the patch.

Thanks,
Santosh

-----Original Message-----
From: Jan Beulich [mailto:JBeulich@suse.com] 
Sent: Wednesday, August 08, 2012 12:32 AM
To: Santosh Jodh
Cc: wei.wang2@amd.com; xiantao.zhang@intel.com; xen-devel; Tim (Xen.org)
Subject: Re: [Xen-devel] [PATCH] dump_p2m_table: For IOMMU

>>> On 07.08.12 at 16:49, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> +void __init setup_iommu_dump(void)
> +{
> +    register_keyhandler('I', &iommu_p2m_table); }

Are you btw aware that a handler for 'I' already exists (in xen/arch/x86/hvm/irq.c)?

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-07 14:49 Santosh Jodh
  2012-08-07 15:52 ` Jan Beulich
@ 2012-08-08  7:31 ` Jan Beulich
  2012-08-08 15:32   ` Santosh Jodh
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2012-08-08  7:31 UTC (permalink / raw)
  To: Santosh Jodh; +Cc: wei.wang2, tim, xiantao.zhang, xen-devel

>>> On 07.08.12 at 16:49, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> +void __init setup_iommu_dump(void)
> +{
> +    register_keyhandler('I', &iommu_p2m_table);
> +}

Are you btw aware that a handler for 'I' already exists (in
xen/arch/x86/hvm/irq.c)?

Jan

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-07 15:52 ` Jan Beulich
@ 2012-08-07 16:16   ` Santosh Jodh
  0 siblings, 0 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-07 16:16 UTC (permalink / raw)
  To: Jan Beulich; +Cc: wei.wang2, Tim (Xen.org), xiantao.zhang, xen-devel

Honestly, I don't have an AMD machine to test my code - I just wrote it for completion sake. I based my code on deallocate_next_page_table() in the same file. 

I agree that the map/unmap can be easily avoided.

Someone more familiar with AMD IOMMU might be able to comment more.

Thanks,
Santosh

-----Original Message-----
From: Jan Beulich [mailto:JBeulich@suse.com] 
Sent: Tuesday, August 07, 2012 8:52 AM
To: Santosh Jodh
Cc: wei.wang2@amd.com; xiantao.zhang@intel.com; xen-devel; Tim (Xen.org)
Subject: Re: [Xen-devel] [PATCH] dump_p2m_table: For IOMMU

>>> On 07.08.12 at 16:49, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Aug 02 11:49:37 2012 +0200
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 07:46:14 2012 -0700
> @@ -22,6 +22,7 @@
>  #include <xen/pci.h>
>  #include <xen/pci_regs.h>
>  #include <xen/paging.h>
> +#include <xen/softirq.h>
>  #include <asm/hvm/iommu.h>
>  #include <asm/amd-iommu.h>
>  #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,69 @@ static 
> int amd_iommu_group_id(u16 seg, u
>  
>  #include <asm/io_apic.h>
>  
> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, 
> +u64 gpa) {
> +    u64 address;
> +    void *table_vaddr, *pde;
> +    u64 next_table_maddr;
> +    int index, next_level, present;
> +    u32 *entry;
> +
> +    table_vaddr = __map_domain_page(pg);
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg));
> +        return;
> +    }
> +
> +    if ( level > 1 )

As long as the top level call below can never pass <= 1 here and the recursive call gets gated accordingly, I don't see why you do it differently here than for VT-d, resulting in both unnecessarily deep indentation and a pointless map/unmap pair around the conditional.

Jan

> +    {
> +        for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
> +        {
> +            if ( !(index % 2) )
> +                process_pending_softirqs();
> +
> +            pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
> +            next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
> +            entry = (u32*)pde;
> +
> +            next_level = get_field_from_reg_u32(entry[0],
> +                                                IOMMU_PDE_NEXT_LEVEL_MASK,
> +                                                
> + IOMMU_PDE_NEXT_LEVEL_SHIFT);
> +
> +            present = get_field_from_reg_u32(entry[0],
> +                                             IOMMU_PDE_PRESENT_MASK,
> +                                             
> + IOMMU_PDE_PRESENT_SHIFT);
> +
> +            address = gpa + amd_offset_level_address(index, level);
> +            if ( (next_table_maddr != 0) && (next_level != 0)
> +                && present )
> +            {
> +                amd_dump_p2m_table_level(
> +                    maddr_to_page(next_table_maddr), level - 1, address);
> +            }
> +
> +            if ( present )
> +            {
> +                printk("gfn: %-16lx  mfn: %-16lx\n",
> +                       address, next_table_maddr);
> +            }
> +        }
> +    }
> +
> +    unmap_domain_page(table_vaddr);
> +}
> +
> +static void amd_dump_p2m_table(struct domain *d) {
> +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
> +
> +    if ( !hd->root_table ) 
> +        return;
> +
> +    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0); }
> +
>  const struct iommu_ops amd_iommu_ops = {
>      .init = amd_iommu_domain_init,
>      .dom0_init = amd_iommu_dom0_init,

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] dump_p2m_table: For IOMMU
  2012-08-07 14:49 Santosh Jodh
@ 2012-08-07 15:52 ` Jan Beulich
  2012-08-07 16:16   ` Santosh Jodh
  2012-08-08  7:31 ` Jan Beulich
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Beulich @ 2012-08-07 15:52 UTC (permalink / raw)
  To: Santosh Jodh; +Cc: wei.wang2, tim, xiantao.zhang, xen-devel

>>> On 07.08.12 at 16:49, Santosh Jodh <santosh.jodh@citrix.com> wrote:
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Aug 02 11:49:37 2012 +0200
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 07:46:14 2012 -0700
> @@ -22,6 +22,7 @@
>  #include <xen/pci.h>
>  #include <xen/pci_regs.h>
>  #include <xen/paging.h>
> +#include <xen/softirq.h>
>  #include <asm/hvm/iommu.h>
>  #include <asm/amd-iommu.h>
>  #include <asm/hvm/svm/amd-iommu-proto.h>
> @@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u
>  
>  #include <asm/io_apic.h>
>  
> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa)
> +{
> +    u64 address;
> +    void *table_vaddr, *pde;
> +    u64 next_table_maddr;
> +    int index, next_level, present;
> +    u32 *entry;
> +
> +    table_vaddr = __map_domain_page(pg);
> +    if ( table_vaddr == NULL )
> +    {
> +        printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg));
> +        return;
> +    }
> +
> +    if ( level > 1 )

As long as the top level call below can never pass <= 1 here and
the recursive call gets gated accordingly, I don't see why you do
it differently here than for VT-d, resulting in both unnecessarily
deep indentation and a pointless map/unmap pair around the
conditional.

Jan

> +    {
> +        for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
> +        {
> +            if ( !(index % 2) )
> +                process_pending_softirqs();
> +
> +            pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
> +            next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
> +            entry = (u32*)pde;
> +
> +            next_level = get_field_from_reg_u32(entry[0],
> +                                                IOMMU_PDE_NEXT_LEVEL_MASK,
> +                                                IOMMU_PDE_NEXT_LEVEL_SHIFT);
> +
> +            present = get_field_from_reg_u32(entry[0],
> +                                             IOMMU_PDE_PRESENT_MASK,
> +                                             IOMMU_PDE_PRESENT_SHIFT);
> +
> +            address = gpa + amd_offset_level_address(index, level);
> +            if ( (next_table_maddr != 0) && (next_level != 0)
> +                && present )
> +            {
> +                amd_dump_p2m_table_level(
> +                    maddr_to_page(next_table_maddr), level - 1, address);
> +            }
> +
> +            if ( present )
> +            {
> +                printk("gfn: %-16lx  mfn: %-16lx\n",
> +                       address, next_table_maddr);
> +            }
> +        }
> +    }
> +
> +    unmap_domain_page(table_vaddr);
> +}
> +
> +static void amd_dump_p2m_table(struct domain *d)
> +{
> +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
> +
> +    if ( !hd->root_table ) 
> +        return;
> +
> +    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0);
> +}
> +
>  const struct iommu_ops amd_iommu_ops = {
>      .init = amd_iommu_domain_init,
>      .dom0_init = amd_iommu_dom0_init,

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] dump_p2m_table: For IOMMU
@ 2012-08-07 14:49 Santosh Jodh
  2012-08-07 15:52 ` Jan Beulich
  2012-08-08  7:31 ` Jan Beulich
  0 siblings, 2 replies; 24+ messages in thread
From: Santosh Jodh @ 2012-08-07 14:49 UTC (permalink / raw)
  To: xen-devel; +Cc: wei.wang2, tim, xiantao.zhang

New key handler 'I' to dump the IOMMU p2m table for each domain.
Skips dumping table for domain0.
Intel and AMD specific iommu_ops handler for dumping p2m table.

Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com>

diff -r 3d17148e465c -r 8b634f178682 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Aug 02 11:49:37 2012 +0200
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Tue Aug 07 07:46:14 2012 -0700
@@ -22,6 +22,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/paging.h>
+#include <xen/softirq.h>
 #include <asm/hvm/iommu.h>
 #include <asm/amd-iommu.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
@@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u
 
 #include <asm/io_apic.h>
 
+static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa)
+{
+    u64 address;
+    void *table_vaddr, *pde;
+    u64 next_table_maddr;
+    int index, next_level, present;
+    u32 *entry;
+
+    table_vaddr = __map_domain_page(pg);
+    if ( table_vaddr == NULL )
+    {
+        printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg));
+        return;
+    }
+
+    if ( level > 1 )
+    {
+        for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
+        {
+            if ( !(index % 2) )
+                process_pending_softirqs();
+
+            pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
+            next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
+            entry = (u32*)pde;
+
+            next_level = get_field_from_reg_u32(entry[0],
+                                                IOMMU_PDE_NEXT_LEVEL_MASK,
+                                                IOMMU_PDE_NEXT_LEVEL_SHIFT);
+
+            present = get_field_from_reg_u32(entry[0],
+                                             IOMMU_PDE_PRESENT_MASK,
+                                             IOMMU_PDE_PRESENT_SHIFT);
+
+            address = gpa + amd_offset_level_address(index, level);
+            if ( (next_table_maddr != 0) && (next_level != 0)
+                && present )
+            {
+                amd_dump_p2m_table_level(
+                    maddr_to_page(next_table_maddr), level - 1, address);
+            }
+
+            if ( present )
+            {
+                printk("gfn: %-16lx  mfn: %-16lx\n",
+                       address, next_table_maddr);
+            }
+        }
+    }
+
+    unmap_domain_page(table_vaddr);
+}
+
+static void amd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+
+    if ( !hd->root_table ) 
+        return;
+
+    amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0);
+}
+
 const struct iommu_ops amd_iommu_ops = {
     .init = amd_iommu_domain_init,
     .dom0_init = amd_iommu_dom0_init,
@@ -531,4 +595,5 @@ const struct iommu_ops amd_iommu_ops = {
     .resume = amd_iommu_resume,
     .share_p2m = amd_iommu_share_p2m,
     .crash_shutdown = amd_iommu_suspend,
+    .dump_p2m_table = amd_dump_p2m_table,
 };
diff -r 3d17148e465c -r 8b634f178682 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c	Thu Aug 02 11:49:37 2012 +0200
+++ b/xen/drivers/passthrough/iommu.c	Tue Aug 07 07:46:14 2012 -0700
@@ -18,6 +18,7 @@
 #include <asm/hvm/iommu.h>
 #include <xen/paging.h>
 #include <xen/guest_access.h>
+#include <xen/keyhandler.h>
 #include <xen/softirq.h>
 #include <xsm/xsm.h>
 
@@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in
 
 DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+void setup_iommu_dump(void);
+
 static void __init parse_iommu_param(char *s)
 {
     char *ss;
@@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai
     if ( !iommu_enabled )
         return;
 
+    setup_iommu_dump();
     d->need_iommu = !!iommu_dom0_strict;
     if ( need_iommu(d) )
     {
@@ -654,6 +658,46 @@ int iommu_do_domctl(
     return ret;
 }
 
+static void iommu_dump_p2m_table(unsigned char key)
+{
+    struct domain *d;
+    const struct iommu_ops *ops;
+
+    if ( !iommu_enabled )
+    {
+        printk("IOMMU not enabled!\n");
+        return;
+    }
+
+    ops = iommu_get_ops();
+    for_each_domain(d)
+    {
+        if ( !d->domain_id )
+            continue;
+
+        if ( iommu_use_hap_pt(d) )
+        {
+            printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id);
+            continue;
+        }
+
+        printk("\ndomain%d IOMMU p2m table: \n", d->domain_id);
+        ops->dump_p2m_table(d);
+    }
+}
+
+static struct keyhandler iommu_p2m_table = {
+    .diagnostic = 0,
+    .u.fn = iommu_dump_p2m_table,
+    .desc = "dump iommu p2m table"
+};
+
+void __init setup_iommu_dump(void)
+{
+    register_keyhandler('I', &iommu_p2m_table);
+}
+
+
 /*
  * Local variables:
  * mode: C
diff -r 3d17148e465c -r 8b634f178682 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c	Thu Aug 02 11:49:37 2012 +0200
+++ b/xen/drivers/passthrough/vtd/iommu.c	Tue Aug 07 07:46:14 2012 -0700
@@ -31,6 +31,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
+#include <xen/softirq.h>
 #include <asm/msi.h>
 #include <asm/irq.h>
 #if defined(__i386__) || defined(__x86_64__)
@@ -2356,6 +2357,56 @@ static void vtd_resume(void)
     }
 }
 
+static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa)
+{
+    u64 address;
+    int i;
+    struct dma_pte *pt_vaddr, *pte;
+    int next_level;
+
+    if ( pt_maddr == 0 )
+        return;
+
+    pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
+    if ( pt_vaddr == NULL )
+    {
+        printk("Failed to map VT-D domain page %-16lx\n", pt_maddr);
+        return;
+    }
+
+    next_level = level - 1;
+    for ( i = 0; i < PTE_NUM; i++ )
+    {
+        if ( !(i % 2) )
+            process_pending_softirqs();
+
+        pte = &pt_vaddr[i];
+        if ( !dma_pte_present(*pte) )
+            continue;
+        
+        address = gpa + offset_level_address(i, level);
+        if ( next_level >= 1 )
+            vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address);
+
+        if ( level == 1 )
+            printk("gfn: %-16lx mfn: %-16lx superpage=%d\n", 
+                    address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0);
+    }
+
+    unmap_vtd_domain_page(pt_vaddr);
+}
+
+static void vtd_dump_p2m_table(struct domain *d)
+{
+    struct hvm_iommu *hd;
+
+    if ( list_empty(&acpi_drhd_units) )
+        return;
+
+    hd = domain_hvm_iommu(d);
+    vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0);
+}
+
 const struct iommu_ops intel_iommu_ops = {
     .init = intel_iommu_domain_init,
     .dom0_init = intel_iommu_dom0_init,
@@ -2378,6 +2429,7 @@ const struct iommu_ops intel_iommu_ops =
     .crash_shutdown = vtd_crash_shutdown,
     .iotlb_flush = intel_iommu_iotlb_flush,
     .iotlb_flush_all = intel_iommu_iotlb_flush_all,
+    .dump_p2m_table = vtd_dump_p2m_table,
 };
 
 /*
diff -r 3d17148e465c -r 8b634f178682 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h	Thu Aug 02 11:49:37 2012 +0200
+++ b/xen/drivers/passthrough/vtd/iommu.h	Tue Aug 07 07:46:14 2012 -0700
@@ -248,6 +248,8 @@ struct context_entry {
 #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
 #define address_level_offset(addr, level) \
             ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
+#define offset_level_address(offset, level) \
+            ((u64)(offset) << level_to_offset_bits(level))
 #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
 #define level_size(l) (1 << level_to_offset_bits(l))
 #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
@@ -277,6 +279,7 @@ struct dma_pte {
 #define dma_set_pte_addr(p, addr) do {\
             (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
 #define dma_pte_present(p) (((p).val & 3) != 0)
+#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0)
 
 /* interrupt remap entry */
 struct iremap_entry {
diff -r 3d17148e465c -r 8b634f178682 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Thu Aug 02 11:49:37 2012 +0200
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Tue Aug 07 07:46:14 2012 -0700
@@ -38,6 +38,10 @@
 #define PTE_PER_TABLE_ALLOC(entries)	\
 	PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT)
 
+#define amd_offset_level_address(offset, level) \
+      	((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \
+                             (level - IOMMU_PAGING_MODE_LEVEL_1))))
+
 #define PCI_MIN_CAP_OFFSET	0x40
 #define PCI_MAX_CAP_BLOCKS	48
 #define PCI_CAP_PTR_MASK	0xFC
diff -r 3d17148e465c -r 8b634f178682 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h	Thu Aug 02 11:49:37 2012 +0200
+++ b/xen/include/xen/iommu.h	Tue Aug 07 07:46:14 2012 -0700
@@ -141,6 +141,7 @@ struct iommu_ops {
     void (*crash_shutdown)(void);
     void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
+    void (*dump_p2m_table)(struct domain *d);
 };
 
 void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2012-08-14 19:34 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-10  1:43 [PATCH] dump_p2m_table: For IOMMU Santosh Jodh
2012-08-10  7:49 ` Jan Beulich
2012-08-10 12:31 ` Wei Wang
2012-08-10 13:02   ` Jan Beulich
2012-08-10 15:02   ` Santosh Jodh
  -- strict thread matches above, loose matches on Subject: below --
2012-08-10 19:14 Santosh Jodh
2012-08-13  8:59 ` Jan Beulich
2012-08-14 19:34   ` Santosh Jodh
2012-08-13 10:31 ` Wei Wang
2012-08-08 17:17 Santosh Jodh
2012-08-09  7:26 ` Jan Beulich
2012-08-10  1:41   ` Santosh Jodh
2012-08-10 10:50   ` Wei Wang
2012-08-10 12:52     ` Jan Beulich
2012-08-10 13:41       ` Wei Wang
2012-08-10 14:24         ` Jan Beulich
2012-08-08 15:56 Santosh Jodh
2012-08-08 16:21 ` Jan Beulich
2012-08-08 17:17   ` Santosh Jodh
2012-08-07 14:49 Santosh Jodh
2012-08-07 15:52 ` Jan Beulich
2012-08-07 16:16   ` Santosh Jodh
2012-08-08  7:31 ` Jan Beulich
2012-08-08 15:32   ` Santosh Jodh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.