All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
@ 2013-06-10  5:05 suravee.suthikulpanit
  2013-06-10  5:05 ` [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787 suravee.suthikulpanit
                   ` (2 more replies)
  0 siblings, 3 replies; 50+ messages in thread
From: suravee.suthikulpanit @ 2013-06-10  5:05 UTC (permalink / raw)
  To: xen-devel, JBeulich; +Cc: Suravee Suthikulpanit

From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

The IOMMU interrupt bits in the IOMMU status registers are
"read-only, and write-1-to-clear (RW1C).  Therefore, the existing
logic which reads the register, set the bit, and then writing back
the values could accidentally clear certain bits if it has been set.

The correct logic would just be writing only the value which only
set the interrupt bits, and leave the rest to zeros.

This patch also, clean up #define masks as Jan has suggested.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
V2 changes:
	- Additional fixes as pointed out by Jan.
	- Clean up unnecessary #define mask as suggested by Jan.

 xen/drivers/passthrough/amd/iommu_cmd.c      |   18 +++--
 xen/drivers/passthrough/amd/iommu_init.c     |   31 ++++-----
 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h |   95 +++++++++++---------------
 3 files changed, 63 insertions(+), 81 deletions(-)

diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c
index 4c60149..f0283d4 100644
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -75,11 +75,9 @@ static void flush_command_buffer(struct amd_iommu *iommu)
     u32 cmd[4], status;
     int loop_count, comp_wait;
 
-    /* clear 'ComWaitInt' in status register (WIC) */
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                         IOMMU_STATUS_COMP_WAIT_INT_MASK,
-                         IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status);
-    writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C 'ComWaitInt' in status register */
+    writel((1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT),
+        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /* send an empty COMPLETION_WAIT command to flush command buffer */
     cmd[3] = cmd[2] = 0;
@@ -96,16 +94,16 @@ static void flush_command_buffer(struct amd_iommu *iommu)
     do {
         status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
         comp_wait = get_field_from_reg_u32(status,
-                                           IOMMU_STATUS_COMP_WAIT_INT_MASK,
-                                           IOMMU_STATUS_COMP_WAIT_INT_SHIFT);
+                                (1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT),
+                                IOMMU_STATUS_COMP_WAIT_INT_SHIFT);
         --loop_count;
     } while ( !comp_wait && loop_count );
 
     if ( comp_wait )
     {
-        /* clear 'ComWaitInt' in status register (WIC) */
-        status &= IOMMU_STATUS_COMP_WAIT_INT_MASK;
-        writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* RW1C 'ComWaitInt' in status register */
+        writel((1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT),
+            iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
         return;
     }
     AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index a939c73..a85c63f 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_iommu *iommu,
 
     ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
 
-    /*clear overflow bit */
-    iommu_clear_bit(&entry, of_bit);
+    /* RW1C overflow bit */
+    iommu_set_bit(&entry, of_bit);
     writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /*reset event log base address */
@@ -622,11 +622,9 @@ static void iommu_check_event_log(struct amd_iommu *iommu)
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
 
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C interrupt status bit */
+    writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT),
+        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -692,11 +690,9 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu)
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
 
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C interrupt status bit */
+    writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT),
+        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -733,10 +729,13 @@ static void iommu_interrupt_handler(int irq, void *dev_id,
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /* Silence interrupts from both event and PPR logging */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
+    /* Silence interrupts from both event and PPR
+     * by clearing the enable logging bits in the
+     * control register */
+    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+    /* RW1C status bit */
     writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
index d2176d0..2f2d740 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -313,31 +313,21 @@
 #define IOMMU_LOG_ENTRY_TIMEOUT                         1000
 
 /* Control Register */
-#define IOMMU_CONTROL_MMIO_OFFSET			0x18
-#define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK		0x00000001
-#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT		0
-#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK	0x00000002
-#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT	1
-#define IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK		0x00000004
-#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT		2
-#define IOMMU_CONTROL_EVENT_LOG_INT_MASK		0x00000008
-#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT		3
-#define IOMMU_CONTROL_COMP_WAIT_INT_MASK		0x00000010
-#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT		4
-#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_MASK		0x000000E0
-#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT	5
-#define IOMMU_CONTROL_PASS_POSTED_WRITE_MASK		0x00000100
-#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT		8
-#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_MASK	0x00000200
-#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT	9
-#define IOMMU_CONTROL_COHERENT_MASK			0x00000400
-#define IOMMU_CONTROL_COHERENT_SHIFT			10
-#define IOMMU_CONTROL_ISOCHRONOUS_MASK			0x00000800
-#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
-#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
-#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
-#define IOMMU_CONTROL_RESTART_MASK			0x80000000
-#define IOMMU_CONTROL_RESTART_SHIFT			31
+#define IOMMU_CONTROL_MMIO_OFFSET                       0x18
+#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT          0
+#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT       1
+#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT            2
+#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT               3
+#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT               4
+#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT        5
+#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT           8
+#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT      9
+#define IOMMU_CONTROL_COHERENT_SHIFT                    10
+#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT                 11
+#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT       12
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT              13
+#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT                 14
+#define IOMMU_CONTROL_RESTART_SHIFT                     31
 
 #define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT      13
 #define IOMMU_CONTROL_PPR_INT_SHIFT             14
@@ -363,38 +353,33 @@
 #define IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT	0
 
 /* Extended Feature Register*/
-#define IOMMU_EXT_FEATURE_MMIO_OFFSET                   0x30
-#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT                 0x0
-#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT                  0x1
-#define IOMMU_EXT_FEATURE_XTSUP_SHIFT                   0x2
-#define IOMMU_EXT_FEATURE_NXSUP_SHIFT                   0x3
-#define IOMMU_EXT_FEATURE_GTSUP_SHIFT                   0x4
-#define IOMMU_EXT_FEATURE_IASUP_SHIFT                   0x6
-#define IOMMU_EXT_FEATURE_GASUP_SHIFT                   0x7
-#define IOMMU_EXT_FEATURE_HESUP_SHIFT                   0x8
-#define IOMMU_EXT_FEATURE_PCSUP_SHIFT                   0x9
-#define IOMMU_EXT_FEATURE_HATS_SHIFT                    0x10
-#define IOMMU_EXT_FEATURE_HATS_MASK                     0x00000C00
-#define IOMMU_EXT_FEATURE_GATS_SHIFT                    0x12
-#define IOMMU_EXT_FEATURE_GATS_MASK                     0x00003000
-#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT                  0x14
-#define IOMMU_EXT_FEATURE_GLXSUP_MASK                   0x0000C000
-
-#define IOMMU_EXT_FEATURE_PASMAX_SHIFT                  0x0
-#define IOMMU_EXT_FEATURE_PASMAX_MASK                   0x0000001F
+#define IOMMU_EXT_FEATURE_MMIO_OFFSET           0x30
+#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT         0x0
+#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT          0x1
+#define IOMMU_EXT_FEATURE_XTSUP_SHIFT           0x2
+#define IOMMU_EXT_FEATURE_NXSUP_SHIFT           0x3
+#define IOMMU_EXT_FEATURE_GTSUP_SHIFT           0x4
+#define IOMMU_EXT_FEATURE_IASUP_SHIFT           0x6
+#define IOMMU_EXT_FEATURE_GASUP_SHIFT           0x7
+#define IOMMU_EXT_FEATURE_HESUP_SHIFT           0x8
+#define IOMMU_EXT_FEATURE_PCSUP_SHIFT           0x9
+#define IOMMU_EXT_FEATURE_HATS_SHIFT            0x10
+#define IOMMU_EXT_FEATURE_HATS_MASK             0x00000C00
+#define IOMMU_EXT_FEATURE_GATS_SHIFT            0x12
+#define IOMMU_EXT_FEATURE_GATS_MASK             0x00003000
+#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT          0x14
+#define IOMMU_EXT_FEATURE_GLXSUP_MASK           0x0000C000
+
+#define IOMMU_EXT_FEATURE_PASMAX_SHIFT          0x0
+#define IOMMU_EXT_FEATURE_PASMAX_MASK           0x0000001F
 
 /* Status Register*/
-#define IOMMU_STATUS_MMIO_OFFSET		0x2020
-#define IOMMU_STATUS_EVENT_OVERFLOW_MASK	0x00000001
-#define IOMMU_STATUS_EVENT_OVERFLOW_SHIFT	0
-#define IOMMU_STATUS_EVENT_LOG_INT_MASK		0x00000002
-#define IOMMU_STATUS_EVENT_LOG_INT_SHIFT	1
-#define IOMMU_STATUS_COMP_WAIT_INT_MASK		0x00000004
-#define IOMMU_STATUS_COMP_WAIT_INT_SHIFT	2
-#define IOMMU_STATUS_EVENT_LOG_RUN_MASK		0x00000008
-#define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT	3
-#define IOMMU_STATUS_CMD_BUFFER_RUN_MASK	0x00000010
-#define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT	4
+#define IOMMU_STATUS_MMIO_OFFSET                0x2020
+#define IOMMU_STATUS_EVENT_OVERFLOW_SHIFT       0
+#define IOMMU_STATUS_EVENT_LOG_INT_SHIFT        1
+#define IOMMU_STATUS_COMP_WAIT_INT_SHIFT        2
+#define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT        3
+#define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT       4
 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT     5
 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT          6
 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT          7
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10  5:05 [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits suravee.suthikulpanit
@ 2013-06-10  5:05 ` suravee.suthikulpanit
  2013-06-10  9:35   ` Tim Deegan
  2013-06-10 10:59   ` [PATCH 2/2 v3] " Jan Beulich
  2013-06-10 10:05 ` [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits Jan Beulich
  2013-06-10 10:56 ` [PATCH 1/2 v3] " Jan Beulich
  2 siblings, 2 replies; 50+ messages in thread
From: suravee.suthikulpanit @ 2013-06-10  5:05 UTC (permalink / raw)
  To: xen-devel, JBeulich; +Cc: Suravee Suthikulpanit

From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
and event log interrupt bits to re-enable the interrupt. This is done by
writing 1 to the memory mapped register to clear the bit. Due to hardware bug,
if the driver tries to clear this bit while the IOMMU hardware also setting
this bit, the conflict will result with the bit being set. If the interrupt
handling code does not make sure to clear this bit, subsequent changes in the
event/PPR logs will no longer generating interrupts, and would result if
buffer overflow. After clearing the bits, the driver must read back
the register to verify.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
V2 changes:
	- Coding style fixes

 xen/drivers/passthrough/amd/iommu_init.c |   36 ++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index a85c63f..048a2e6 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -616,15 +616,21 @@ static void iommu_check_event_log(struct amd_iommu *iommu)
 
     spin_lock_irqsave(&iommu->lock, flags);
     
-    /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    while (entry & (1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT))
+    {
+        /* Check event overflow */
+        if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
+            iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
 
-    if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
-        iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
+        /* RW1C interrupt status bit */
+        writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT),
+            iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
-    /* RW1C interrupt status bit */
-    writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT),
-        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* Workaround for erratum787:
+         * Re-check to make sure the bit has been cleared */
+        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -684,15 +690,21 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu)
     
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    while ( entry & (1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT ) )
+    {
+        /* Check event overflow */
+        if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
+            iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
 
-    if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
-        iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
+        /* RW1C interrupt status bit */
+        writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT),
+            iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
-    /* RW1C interrupt status bit */
-    writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT),
-        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* Workaround for erratum787:
+         * Re-check to make sure the bit has been cleared */
+        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10  5:05 ` [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787 suravee.suthikulpanit
@ 2013-06-10  9:35   ` Tim Deegan
  2013-06-10  9:47     ` Jan Beulich
  2013-06-10 10:59   ` [PATCH 2/2 v3] " Jan Beulich
  1 sibling, 1 reply; 50+ messages in thread
From: Tim Deegan @ 2013-06-10  9:35 UTC (permalink / raw)
  To: suravee.suthikulpanit; +Cc: JBeulich, xen-devel

At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> 
> The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
> and event log interrupt bits to re-enable the interrupt. This is done by
> writing 1 to the memory mapped register to clear the bit. Due to hardware bug,
> if the driver tries to clear this bit while the IOMMU hardware also setting
> this bit, the conflict will result with the bit being set. If the interrupt
> handling code does not make sure to clear this bit, subsequent changes in the
> event/PPR logs will no longer generating interrupts, and would result if
> buffer overflow. After clearing the bits, the driver must read back
> the register to verify.

Is there a risk of livelock here?  That is, if some device is causing a
lot of IOMMU faults, a CPU could get stuck in this loop re-enabling
interrupts as fast as they are raised.

The solution suggested in the erratum seems better: if the bit is set
after clearing, process the interrupts again (i.e. run/schedule the
top-half handler).  That way the bottom-half handler will definitely
terminate and the system can make some progress.

Cheers,

Tim.

> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
> V2 changes:
> 	- Coding style fixes
> 
>  xen/drivers/passthrough/amd/iommu_init.c |   36 ++++++++++++++++++++----------
>  1 file changed, 24 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
> index a85c63f..048a2e6 100644
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -616,15 +616,21 @@ static void iommu_check_event_log(struct amd_iommu *iommu)
>  
>      spin_lock_irqsave(&iommu->lock, flags);
>      
> -    /*check event overflow */
>      entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    while (entry & (1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT))
> +    {
> +        /* Check event overflow */
> +        if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
> +            iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
>  
> -    if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
> -        iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
> +        /* RW1C interrupt status bit */
> +        writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT),
> +            iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>  
> -    /* RW1C interrupt status bit */
> -    writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT),
> -        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +        /* Workaround for erratum787:
> +         * Re-check to make sure the bit has been cleared */
> +        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    }
>  
>      spin_unlock_irqrestore(&iommu->lock, flags);
>  }
> @@ -684,15 +690,21 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu)
>      
>      spin_lock_irqsave(&iommu->lock, flags);
>  
> -    /*check event overflow */
>      entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    while ( entry & (1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT ) )
> +    {
> +        /* Check event overflow */
> +        if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
> +            iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
>  
> -    if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
> -        iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
> +        /* RW1C interrupt status bit */
> +        writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT),
> +            iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>  
> -    /* RW1C interrupt status bit */
> -    writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT),
> -        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +        /* Workaround for erratum787:
> +         * Re-check to make sure the bit has been cleared */
> +        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    }
>  
>      spin_unlock_irqrestore(&iommu->lock, flags);
>  }
> -- 
> 1.7.10.4
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10  9:35   ` Tim Deegan
@ 2013-06-10  9:47     ` Jan Beulich
  2013-06-10 10:40       ` Tim Deegan
  0 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-10  9:47 UTC (permalink / raw)
  To: suravee.suthikulpanit, Tim Deegan; +Cc: xen-devel

>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote:
> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:
>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>> 
>> The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
>> and event log interrupt bits to re-enable the interrupt. This is done by
>> writing 1 to the memory mapped register to clear the bit. Due to hardware 
> bug,
>> if the driver tries to clear this bit while the IOMMU hardware also setting
>> this bit, the conflict will result with the bit being set. If the interrupt
>> handling code does not make sure to clear this bit, subsequent changes in 
> the
>> event/PPR logs will no longer generating interrupts, and would result if
>> buffer overflow. After clearing the bits, the driver must read back
>> the register to verify.
> 
> Is there a risk of livelock here?  That is, if some device is causing a
> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling
> interrupts as fast as they are raised.
> 
> The solution suggested in the erratum seems better: if the bit is set
> after clearing, process the interrupts again (i.e. run/schedule the
> top-half handler).  That way the bottom-half handler will definitely
> terminate and the system can make some progress.

That's what's being done really: The actual interrupt handler disables
the interrupt sources, and the tasklet re-enables them (or at least is
supposed to do so - patch 1 isn't really correct in the respect).

The only thing that I think is wrong (but again already in patch 1) is
that the status bit should get cleared before an interrupt source
gets re-enabled.

I started cleaning up patch 1 anyway, so I'll post a v3 once done.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10  5:05 [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits suravee.suthikulpanit
  2013-06-10  5:05 ` [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787 suravee.suthikulpanit
@ 2013-06-10 10:05 ` Jan Beulich
  2013-06-10 10:56 ` [PATCH 1/2 v3] " Jan Beulich
  2 siblings, 0 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 10:05 UTC (permalink / raw)
  To: suravee.suthikulpanit; +Cc: xen-devel

>>> On 10.06.13 at 07:05, <suravee.suthikulpanit@amd.com> wrote:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> 
> The IOMMU interrupt bits in the IOMMU status registers are
> "read-only, and write-1-to-clear (RW1C).  Therefore, the existing
> logic which reads the register, set the bit, and then writing back
> the values could accidentally clear certain bits if it has been set.
> 
> The correct logic would just be writing only the value which only
> set the interrupt bits, and leave the rest to zeros.
> 
> This patch also, clean up #define masks as Jan has suggested.

This went to far, and imo in the wrong direction - the mask values
are what ultimately should stay, since the shift values can be
computed from them. And this cleanup should really be done only
when [gs]et_field_in_reg_u32() get the redundant shift parameter
eliminated (i.e. in a separate, post-4.3 patch).

But as said already in the response to Tim's reply on patch 2 - this
patch also had a few other issues, so I'm going to reply with a v3
once I'm done with the fixing/cleanup.

Jan

> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
> V2 changes:
> 	- Additional fixes as pointed out by Jan.
> 	- Clean up unnecessary #define mask as suggested by Jan.
> 
>  xen/drivers/passthrough/amd/iommu_cmd.c      |   18 +++--
>  xen/drivers/passthrough/amd/iommu_init.c     |   31 ++++-----
>  xen/include/asm-x86/hvm/svm/amd-iommu-defs.h |   95 +++++++++++---------------
>  3 files changed, 63 insertions(+), 81 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c 
> b/xen/drivers/passthrough/amd/iommu_cmd.c
> index 4c60149..f0283d4 100644
> --- a/xen/drivers/passthrough/amd/iommu_cmd.c
> +++ b/xen/drivers/passthrough/amd/iommu_cmd.c
> @@ -75,11 +75,9 @@ static void flush_command_buffer(struct amd_iommu *iommu)
>      u32 cmd[4], status;
>      int loop_count, comp_wait;
>  
> -    /* clear 'ComWaitInt' in status register (WIC) */
> -    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
> -                         IOMMU_STATUS_COMP_WAIT_INT_MASK,
> -                         IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status);
> -    writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    /* RW1C 'ComWaitInt' in status register */
> +    writel((1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT),
> +        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>  
>      /* send an empty COMPLETION_WAIT command to flush command buffer */
>      cmd[3] = cmd[2] = 0;
> @@ -96,16 +94,16 @@ static void flush_command_buffer(struct amd_iommu *iommu)
>      do {
>          status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>          comp_wait = get_field_from_reg_u32(status,
> -                                           IOMMU_STATUS_COMP_WAIT_INT_MASK,
> -                                           
> IOMMU_STATUS_COMP_WAIT_INT_SHIFT);
> +                                (1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT),
> +                                IOMMU_STATUS_COMP_WAIT_INT_SHIFT);
>          --loop_count;
>      } while ( !comp_wait && loop_count );
>  
>      if ( comp_wait )
>      {
> -        /* clear 'ComWaitInt' in status register (WIC) */
> -        status &= IOMMU_STATUS_COMP_WAIT_INT_MASK;
> -        writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +        /* RW1C 'ComWaitInt' in status register */
> +        writel((1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT),
> +            iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>          return;
>      }
>      AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
> diff --git a/xen/drivers/passthrough/amd/iommu_init.c 
> b/xen/drivers/passthrough/amd/iommu_init.c
> index a939c73..a85c63f 100644
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_iommu *iommu,
>  
>      ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
>  
> -    /*clear overflow bit */
> -    iommu_clear_bit(&entry, of_bit);
> +    /* RW1C overflow bit */
> +    iommu_set_bit(&entry, of_bit);
>      writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>  
>      /*reset event log base address */
> @@ -622,11 +622,9 @@ static void iommu_check_event_log(struct amd_iommu 
> *iommu)
>      if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
>          iommu_reset_log(iommu, &iommu->event_log, 
> set_iommu_event_log_control);
>  
> -    /* reset interrupt status bit */
> -    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> -    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
> -
> -    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    /* RW1C interrupt status bit */
> +    writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT),
> +        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>  
>      spin_unlock_irqrestore(&iommu->lock, flags);
>  }
> @@ -692,11 +690,9 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu)
>      if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
>          iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
>  
> -    /* reset interrupt status bit */
> -    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> -    iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
> -
> -    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    /* RW1C interrupt status bit */
> +    writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT),
> +        iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>  
>      spin_unlock_irqrestore(&iommu->lock, flags);
>  }
> @@ -733,10 +729,13 @@ static void iommu_interrupt_handler(int irq, void 
> *dev_id,
>  
>      spin_lock_irqsave(&iommu->lock, flags);
>  
> -    /* Silence interrupts from both event and PPR logging */
> -    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> -    iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
> -    iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
> +    /* Silence interrupts from both event and PPR
> +     * by clearing the enable logging bits in the
> +     * control register */
> +    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
> +    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
> +    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
> +    /* RW1C status bit */
>      writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
>  
>      spin_unlock_irqrestore(&iommu->lock, flags);
> diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h 
> b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> index d2176d0..2f2d740 100644
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> @@ -313,31 +313,21 @@
>  #define IOMMU_LOG_ENTRY_TIMEOUT                         1000
>  
>  /* Control Register */
> -#define IOMMU_CONTROL_MMIO_OFFSET			0x18
> -#define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK		0x00000001
> -#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT		0
> -#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK	0x00000002
> -#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT	1
> -#define IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK		0x00000004
> -#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT		2
> -#define IOMMU_CONTROL_EVENT_LOG_INT_MASK		0x00000008
> -#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT		3
> -#define IOMMU_CONTROL_COMP_WAIT_INT_MASK		0x00000010
> -#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT		4
> -#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_MASK		0x000000E0
> -#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT	5
> -#define IOMMU_CONTROL_PASS_POSTED_WRITE_MASK		0x00000100
> -#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT		8
> -#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_MASK	0x00000200
> -#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT	9
> -#define IOMMU_CONTROL_COHERENT_MASK			0x00000400
> -#define IOMMU_CONTROL_COHERENT_SHIFT			10
> -#define IOMMU_CONTROL_ISOCHRONOUS_MASK			0x00000800
> -#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
> -#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
> -#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
> -#define IOMMU_CONTROL_RESTART_MASK			0x80000000
> -#define IOMMU_CONTROL_RESTART_SHIFT			31
> +#define IOMMU_CONTROL_MMIO_OFFSET                       0x18
> +#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT          0
> +#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT       1
> +#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT            2
> +#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT               3
> +#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT               4
> +#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT        5
> +#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT           8
> +#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT      9
> +#define IOMMU_CONTROL_COHERENT_SHIFT                    10
> +#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT                 11
> +#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT       12
> +#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT              13
> +#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT                 14
> +#define IOMMU_CONTROL_RESTART_SHIFT                     31
>  
>  #define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT      13
>  #define IOMMU_CONTROL_PPR_INT_SHIFT             14
> @@ -363,38 +353,33 @@
>  #define IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT	0
>  
>  /* Extended Feature Register*/
> -#define IOMMU_EXT_FEATURE_MMIO_OFFSET                   0x30
> -#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT                 0x0
> -#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT                  0x1
> -#define IOMMU_EXT_FEATURE_XTSUP_SHIFT                   0x2
> -#define IOMMU_EXT_FEATURE_NXSUP_SHIFT                   0x3
> -#define IOMMU_EXT_FEATURE_GTSUP_SHIFT                   0x4
> -#define IOMMU_EXT_FEATURE_IASUP_SHIFT                   0x6
> -#define IOMMU_EXT_FEATURE_GASUP_SHIFT                   0x7
> -#define IOMMU_EXT_FEATURE_HESUP_SHIFT                   0x8
> -#define IOMMU_EXT_FEATURE_PCSUP_SHIFT                   0x9
> -#define IOMMU_EXT_FEATURE_HATS_SHIFT                    0x10
> -#define IOMMU_EXT_FEATURE_HATS_MASK                     0x00000C00
> -#define IOMMU_EXT_FEATURE_GATS_SHIFT                    0x12
> -#define IOMMU_EXT_FEATURE_GATS_MASK                     0x00003000
> -#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT                  0x14
> -#define IOMMU_EXT_FEATURE_GLXSUP_MASK                   0x0000C000
> -
> -#define IOMMU_EXT_FEATURE_PASMAX_SHIFT                  0x0
> -#define IOMMU_EXT_FEATURE_PASMAX_MASK                   0x0000001F
> +#define IOMMU_EXT_FEATURE_MMIO_OFFSET           0x30
> +#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT         0x0
> +#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT          0x1
> +#define IOMMU_EXT_FEATURE_XTSUP_SHIFT           0x2
> +#define IOMMU_EXT_FEATURE_NXSUP_SHIFT           0x3
> +#define IOMMU_EXT_FEATURE_GTSUP_SHIFT           0x4
> +#define IOMMU_EXT_FEATURE_IASUP_SHIFT           0x6
> +#define IOMMU_EXT_FEATURE_GASUP_SHIFT           0x7
> +#define IOMMU_EXT_FEATURE_HESUP_SHIFT           0x8
> +#define IOMMU_EXT_FEATURE_PCSUP_SHIFT           0x9
> +#define IOMMU_EXT_FEATURE_HATS_SHIFT            0x10
> +#define IOMMU_EXT_FEATURE_HATS_MASK             0x00000C00
> +#define IOMMU_EXT_FEATURE_GATS_SHIFT            0x12
> +#define IOMMU_EXT_FEATURE_GATS_MASK             0x00003000
> +#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT          0x14
> +#define IOMMU_EXT_FEATURE_GLXSUP_MASK           0x0000C000
> +
> +#define IOMMU_EXT_FEATURE_PASMAX_SHIFT          0x0
> +#define IOMMU_EXT_FEATURE_PASMAX_MASK           0x0000001F
>  
>  /* Status Register*/
> -#define IOMMU_STATUS_MMIO_OFFSET		0x2020
> -#define IOMMU_STATUS_EVENT_OVERFLOW_MASK	0x00000001
> -#define IOMMU_STATUS_EVENT_OVERFLOW_SHIFT	0
> -#define IOMMU_STATUS_EVENT_LOG_INT_MASK		0x00000002
> -#define IOMMU_STATUS_EVENT_LOG_INT_SHIFT	1
> -#define IOMMU_STATUS_COMP_WAIT_INT_MASK		0x00000004
> -#define IOMMU_STATUS_COMP_WAIT_INT_SHIFT	2
> -#define IOMMU_STATUS_EVENT_LOG_RUN_MASK		0x00000008
> -#define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT	3
> -#define IOMMU_STATUS_CMD_BUFFER_RUN_MASK	0x00000010
> -#define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT	4
> +#define IOMMU_STATUS_MMIO_OFFSET                0x2020
> +#define IOMMU_STATUS_EVENT_OVERFLOW_SHIFT       0
> +#define IOMMU_STATUS_EVENT_LOG_INT_SHIFT        1
> +#define IOMMU_STATUS_COMP_WAIT_INT_SHIFT        2
> +#define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT        3
> +#define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT       4
>  #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT     5
>  #define IOMMU_STATUS_PPR_LOG_INT_SHIFT          6
>  #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT          7
> -- 
> 1.7.10.4

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10  9:47     ` Jan Beulich
@ 2013-06-10 10:40       ` Tim Deegan
  2013-06-10 10:53         ` Jan Beulich
  0 siblings, 1 reply; 50+ messages in thread
From: Tim Deegan @ 2013-06-10 10:40 UTC (permalink / raw)
  To: Jan Beulich; +Cc: suravee.suthikulpanit, xen-devel

At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote:
> >>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote:
> > At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:
> >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> >> 
> >> The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
> >> and event log interrupt bits to re-enable the interrupt. This is done by
> >> writing 1 to the memory mapped register to clear the bit. Due to hardware 
> > bug,
> >> if the driver tries to clear this bit while the IOMMU hardware also setting
> >> this bit, the conflict will result with the bit being set. If the interrupt
> >> handling code does not make sure to clear this bit, subsequent changes in 
> > the
> >> event/PPR logs will no longer generating interrupts, and would result if
> >> buffer overflow. After clearing the bits, the driver must read back
> >> the register to verify.
> > 
> > Is there a risk of livelock here?  That is, if some device is causing a
> > lot of IOMMU faults, a CPU could get stuck in this loop re-enabling
> > interrupts as fast as they are raised.
> > 
> > The solution suggested in the erratum seems better: if the bit is set
> > after clearing, process the interrupts again (i.e. run/schedule the
> > top-half handler).  That way the bottom-half handler will definitely
> > terminate and the system can make some progress.
> 
> That's what's being done really: The actual interrupt handler disables
> the interrupt sources, and the tasklet re-enables them (or at least is
> supposed to do so - patch 1 isn't really correct in the respect).

Oh I see, the idea is that we use the control register to mask
interrupts instead of relying on the status register?  That seems
better.  But doesn't this IOMMU already mask further interrupts when the
pending bit in the status register is set?  I can't see any wording
about that in the IOMMU doc but the erratum implies it.  Suravee, do you
know if this is the case?

If that _is_ the case then the correct handling logic is much simpler:
don't touch any IOMMU registers at all in the interrupt handler; clear
the interrupt-pending bits in the tasklet.

In either case, the while () loop worries me; I think it would be better
to schedule the tasklet again if we see the bit is set; a 'while
(pending is set) { clear pending bit; }' loop might never exit the
tasklet at all.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 10:40       ` Tim Deegan
@ 2013-06-10 10:53         ` Jan Beulich
  2013-06-10 12:43           ` Tim Deegan
  2013-06-10 13:53           ` Suravee Suthikulanit
  0 siblings, 2 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 10:53 UTC (permalink / raw)
  To: Tim Deegan; +Cc: suravee.suthikulpanit, xen-devel

>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote:
> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote:
>> >>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote:
>> > At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:
>> >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>> >> 
>> >> The IOMMU interrupt handling in bottom half must clear the PPR log 
> interrupt
>> >> and event log interrupt bits to re-enable the interrupt. This is done by
>> >> writing 1 to the memory mapped register to clear the bit. Due to hardware 
>> > bug,
>> >> if the driver tries to clear this bit while the IOMMU hardware also setting
>> >> this bit, the conflict will result with the bit being set. If the interrupt
>> >> handling code does not make sure to clear this bit, subsequent changes in 
>> > the
>> >> event/PPR logs will no longer generating interrupts, and would result if
>> >> buffer overflow. After clearing the bits, the driver must read back
>> >> the register to verify.
>> > 
>> > Is there a risk of livelock here?  That is, if some device is causing a
>> > lot of IOMMU faults, a CPU could get stuck in this loop re-enabling
>> > interrupts as fast as they are raised.
>> > 
>> > The solution suggested in the erratum seems better: if the bit is set
>> > after clearing, process the interrupts again (i.e. run/schedule the
>> > top-half handler).  That way the bottom-half handler will definitely
>> > terminate and the system can make some progress.
>> 
>> That's what's being done really: The actual interrupt handler disables
>> the interrupt sources, and the tasklet re-enables them (or at least is
>> supposed to do so - patch 1 isn't really correct in the respect).
> 
> Oh I see, the idea is that we use the control register to mask
> interrupts instead of relying on the status register?  That seems
> better.  But doesn't this IOMMU already mask further interrupts when the
> pending bit in the status register is set?  I can't see any wording
> about that in the IOMMU doc but the erratum implies it.  Suravee, do you
> know if this is the case?

Actually, the documentation has a subtle but perhaps important
difference int the wording here: For EventLogInt and ComWaitInt
is read "An interrupt is generated when <status bit> = 1b and MMIO
Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt
it says "An interrupt is generated when <status bit> changes from 0b
to 1b and MMIO Offset 0018h[<control bit>] = 1b".

So I'd like to be one the safe side and assume further interrupts can
be generated in all cases - see also the emulation behavior in
iommu_guest.c which - afaict - always raises an interrupt, not just on
a 0 -> 1 transition.

> If that _is_ the case then the correct handling logic is much simpler:
> don't touch any IOMMU registers at all in the interrupt handler; clear
> the interrupt-pending bits in the tasklet.
> 
> In either case, the while () loop worries me; I think it would be better
> to schedule the tasklet again if we see the bit is set; a 'while
> (pending is set) { clear pending bit; }' loop might never exit the
> tasklet at all.

That could only be due to a hardware bug worse than the one we're
working around here, and I don't think is worth dealing with. Actually,
re-scheduling the tasklet would likely make analyzing the issue (if it
ever really happened in practice) more difficult than just having the
NMI watchdog point cleanly at the spinning code here.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10  5:05 [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits suravee.suthikulpanit
  2013-06-10  5:05 ` [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787 suravee.suthikulpanit
  2013-06-10 10:05 ` [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits Jan Beulich
@ 2013-06-10 10:56 ` Jan Beulich
  2013-06-10 11:02   ` Jan Beulich
  2013-06-10 12:18   ` Tim Deegan
  2 siblings, 2 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 10:56 UTC (permalink / raw)
  To: xen-devel; +Cc: Tim Deegan, suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 9377 bytes --]

The IOMMU interrupt bits in the IOMMU status registers are
"read-only, and write-1-to-clear (RW1C).  Therefore, the existing
logic which reads the register, set the bit, and then writing back
the values could accidentally clear certain bits if it has been set.

The correct logic would just be writing only the value which only
set the interrupt bits, and leave the rest to zeros.

This patch also, clean up #define masks as Jan has suggested.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

With iommu_interrupt_handler() properly having got switched its readl()
from status to control register, the subsequent writel() needed to be
switched too (and the RW1C comment there was bogus).

Further, with iommu_interrupt_handler() now actually disabling the
interrupt sources, they also need to get re-enabled by the tasklet once
it finished processing the respective log.

Some of the cleanup went too far - undone.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_cmd.c	2013-06-10 11:28:23.000000000 +0200
+++ 2013-05-13/xen/drivers/passthrough/amd/iommu_cmd.c	2013-06-10 12:16:15.000000000 +0200
@@ -75,11 +75,9 @@ static void flush_command_buffer(struct 
     u32 cmd[4], status;
     int loop_count, comp_wait;
 
-    /* clear 'ComWaitInt' in status register (WIC) */
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                         IOMMU_STATUS_COMP_WAIT_INT_MASK,
-                         IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status);
-    writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C 'ComWaitInt' in status register */
+    writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /* send an empty COMPLETION_WAIT command to flush command buffer */
     cmd[3] = cmd[2] = 0;
@@ -103,9 +101,9 @@ static void flush_command_buffer(struct 
 
     if ( comp_wait )
     {
-        /* clear 'ComWaitInt' in status register (WIC) */
-        status &= IOMMU_STATUS_COMP_WAIT_INT_MASK;
-        writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* RW1C 'ComWaitInt' in status register */
+        writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
         return;
     }
     AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
--- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_guest.c	2012-02-10 11:25:54.000000000 +0100
+++ 2013-05-13/xen/drivers/passthrough/amd/iommu_guest.c	2013-06-10 12:32:00.000000000 +0200
@@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str
         u64_to_reg(&iommu->ppr_log.reg_tail, val);
         break;
     case IOMMU_STATUS_MMIO_OFFSET:
-        u64_to_reg(&iommu->reg_status, val);
+        val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK |
+               IOMMU_STATUS_EVENT_LOG_INT_MASK |
+               IOMMU_STATUS_COMP_WAIT_INT_MASK |
+               IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_PPR_LOG_INT_MASK |
+               IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_GAPIC_LOG_INT_MASK;
+        u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) ^ val);
         break;
 
     default:
--- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_init.c	2013-06-10 11:28:23.000000000 +0200
+++ 2013-05-13/xen/drivers/passthrough/amd/iommu_init.c	2013-06-10 12:17:00.000000000 +0200
@@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st
         writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET);
 
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
     else
     {
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
 
@@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i
 
     ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
 
-    /*clear overflow bit */
-    iommu_clear_bit(&entry, of_bit);
+    /* RW1C overflow bit */
+    iommu_set_bit(&entry, of_bit);
     writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /*reset event log base address */
@@ -619,14 +619,18 @@ static void iommu_check_event_log(struct
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -689,14 +693,18 @@ static void iommu_check_ppr_log(struct a
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -733,11 +741,14 @@ static void iommu_interrupt_handler(int 
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /* Silence interrupts from both event and PPR logging */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
+    /*
+     * Silence interrupts from both event and PPR by clearing the
+     * enable logging bits in the control register
+     */
+    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 
--- 2013-05-13.orig/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	2013-03-11 14:57:19.000000000 +0100
+++ 2013-05-13/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	2013-06-10 12:35:55.000000000 +0200
@@ -336,14 +336,13 @@
 #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT		13
+#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT			14
+#define IOMMU_CONTROL_PPR_ENABLE_SHIFT			15
+#define IOMMU_CONTROL_GT_ENABLE_SHIFT			16
 #define IOMMU_CONTROL_RESTART_MASK			0x80000000
 #define IOMMU_CONTROL_RESTART_SHIFT			31
 
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT      13
-#define IOMMU_CONTROL_PPR_INT_SHIFT             14
-#define IOMMU_CONTROL_PPR_ENABLE_SHIFT          15
-#define IOMMU_CONTROL_GT_ENABLE_SHIFT           16
-
 /* Exclusion Register */
 #define IOMMU_EXCLUSION_BASE_LOW_OFFSET		0x20
 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET	0x24
@@ -395,9 +394,18 @@
 #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT	3
 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK	0x00000010
 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT	4
+#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK      0x00000020
 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT     5
+#define IOMMU_STATUS_PPR_LOG_INT_MASK           0x00000040
 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT          6
+#define IOMMU_STATUS_PPR_LOG_RUN_MASK           0x00000080
 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT          7
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK    0x00000100
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT   8
+#define IOMMU_STATUS_GAPIC_LOG_INT_MASK         0x00000200
+#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT        9
+#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK         0x00000400
+#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT        10
 
 /* I/O Page Table */
 #define IOMMU_PAGE_TABLE_ENTRY_SIZE	8



[-- Attachment #2: AMD-IOMMU-correct-rw1c-handling --]
[-- Type: application/octet-stream, Size: 9222 bytes --]

iommu/amd: Fix logic for clearing the IOMMU interrupt bits

The IOMMU interrupt bits in the IOMMU status registers are
"read-only, and write-1-to-clear (RW1C).  Therefore, the existing
logic which reads the register, set the bit, and then writing back
the values could accidentally clear certain bits if it has been set.

The correct logic would just be writing only the value which only
set the interrupt bits, and leave the rest to zeros.

This patch also, clean up #define masks as Jan has suggested.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

With iommu_interrupt_handler() properly having got switched its readl()
from status to control register, the subsequent writel() needed to be
switched too (and the RW1C comment there was bogus).

Further, with iommu_interrupt_handler() now actually disabling the
interrupt sources, they also need to get re-enabled by the tasklet once
it finished processing the respective log.

Some of the cleanup went too far - undone.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_cmd.c	2013-06-10 11:28:23.000000000 +0200
+++ 2013-05-13/xen/drivers/passthrough/amd/iommu_cmd.c	2013-06-10 12:16:15.000000000 +0200
@@ -75,11 +75,9 @@ static void flush_command_buffer(struct 
     u32 cmd[4], status;
     int loop_count, comp_wait;
 
-    /* clear 'ComWaitInt' in status register (WIC) */
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                         IOMMU_STATUS_COMP_WAIT_INT_MASK,
-                         IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status);
-    writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C 'ComWaitInt' in status register */
+    writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /* send an empty COMPLETION_WAIT command to flush command buffer */
     cmd[3] = cmd[2] = 0;
@@ -103,9 +101,9 @@ static void flush_command_buffer(struct 
 
     if ( comp_wait )
     {
-        /* clear 'ComWaitInt' in status register (WIC) */
-        status &= IOMMU_STATUS_COMP_WAIT_INT_MASK;
-        writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* RW1C 'ComWaitInt' in status register */
+        writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
         return;
     }
     AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
--- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_guest.c	2012-02-10 11:25:54.000000000 +0100
+++ 2013-05-13/xen/drivers/passthrough/amd/iommu_guest.c	2013-06-10 12:32:00.000000000 +0200
@@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str
         u64_to_reg(&iommu->ppr_log.reg_tail, val);
         break;
     case IOMMU_STATUS_MMIO_OFFSET:
-        u64_to_reg(&iommu->reg_status, val);
+        val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK |
+               IOMMU_STATUS_EVENT_LOG_INT_MASK |
+               IOMMU_STATUS_COMP_WAIT_INT_MASK |
+               IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_PPR_LOG_INT_MASK |
+               IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_GAPIC_LOG_INT_MASK;
+        u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) ^ val);
         break;
 
     default:
--- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_init.c	2013-06-10 11:28:23.000000000 +0200
+++ 2013-05-13/xen/drivers/passthrough/amd/iommu_init.c	2013-06-10 12:17:00.000000000 +0200
@@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st
         writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET);
 
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
     else
     {
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
 
@@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i
 
     ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
 
-    /*clear overflow bit */
-    iommu_clear_bit(&entry, of_bit);
+    /* RW1C overflow bit */
+    iommu_set_bit(&entry, of_bit);
     writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /*reset event log base address */
@@ -619,14 +619,18 @@ static void iommu_check_event_log(struct
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -689,14 +693,18 @@ static void iommu_check_ppr_log(struct a
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -733,11 +741,14 @@ static void iommu_interrupt_handler(int 
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /* Silence interrupts from both event and PPR logging */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
+    /*
+     * Silence interrupts from both event and PPR by clearing the
+     * enable logging bits in the control register
+     */
+    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 
--- 2013-05-13.orig/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	2013-03-11 14:57:19.000000000 +0100
+++ 2013-05-13/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	2013-06-10 12:35:55.000000000 +0200
@@ -336,14 +336,13 @@
 #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT		13
+#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT			14
+#define IOMMU_CONTROL_PPR_ENABLE_SHIFT			15
+#define IOMMU_CONTROL_GT_ENABLE_SHIFT			16
 #define IOMMU_CONTROL_RESTART_MASK			0x80000000
 #define IOMMU_CONTROL_RESTART_SHIFT			31
 
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT      13
-#define IOMMU_CONTROL_PPR_INT_SHIFT             14
-#define IOMMU_CONTROL_PPR_ENABLE_SHIFT          15
-#define IOMMU_CONTROL_GT_ENABLE_SHIFT           16
-
 /* Exclusion Register */
 #define IOMMU_EXCLUSION_BASE_LOW_OFFSET		0x20
 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET	0x24
@@ -395,9 +394,18 @@
 #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT	3
 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK	0x00000010
 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT	4
+#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK      0x00000020
 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT     5
+#define IOMMU_STATUS_PPR_LOG_INT_MASK           0x00000040
 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT          6
+#define IOMMU_STATUS_PPR_LOG_RUN_MASK           0x00000080
 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT          7
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK    0x00000100
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT   8
+#define IOMMU_STATUS_GAPIC_LOG_INT_MASK         0x00000200
+#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT        9
+#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK         0x00000400
+#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT        10
 
 /* I/O Page Table */
 #define IOMMU_PAGE_TABLE_ENTRY_SIZE	8

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH 2/2 v3] iommu/amd: Workaround for erratum 787
  2013-06-10  5:05 ` [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787 suravee.suthikulpanit
  2013-06-10  9:35   ` Tim Deegan
@ 2013-06-10 10:59   ` Jan Beulich
  2013-06-11  6:47     ` [PATCH 2/2 v5] " Jan Beulich
  1 sibling, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 10:59 UTC (permalink / raw)
  To: xen-devel; +Cc: Tim Deegan, suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 2973 bytes --]

The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
and event log interrupt bits to re-enable the interrupt. This is done by
writing 1 to the memory mapped register to clear the bit. Due to hardware bug,
if the driver tries to clear this bit while the IOMMU hardware also setting
this bit, the conflict will result with the bit being set. If the interrupt
handling code does not make sure to clear this bit, subsequent changes in the
event/PPR logs will no longer generating interrupts, and would result if
buffer overflow. After clearing the bits, the driver must read back
the register to verify.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Adjust to apply on top of heavily modified patch 1. Adjust flow to get away
with a single readl() in each instance of the status register checks.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -616,13 +616,18 @@ static void iommu_check_event_log(struct
 
     spin_lock_irqsave(&iommu->lock, flags);
     
-    /*check event overflow */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
-    /* RW1C interrupt status bit */
-    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
-           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    do {
+        /* RW1C interrupt status bit */
+        writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /*
+         * Workaround for erratum787:
+         * Re-check to make sure the bit has been cleared.
+         */
+        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    } while ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK );
 
+    /* Check event overflow. */
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
     else
@@ -690,13 +695,18 @@ static void iommu_check_ppr_log(struct a
     
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /*check event overflow */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
-    /* RW1C interrupt status bit */
-    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
-           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    do {
+        /* RW1C interrupt status bit */
+        writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /*
+         * Workaround for erratum787:
+         * Re-check to make sure the bit has been cleared.
+         */
+        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    } while ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK );
 
+    /* Check event overflow. */
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
     else




[-- Attachment #2: AMD-IOMMU-erratum-787-workaround --]
[-- Type: application/octet-stream, Size: 2937 bytes --]

iommu/amd: Workaround for erratum 787

The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
and event log interrupt bits to re-enable the interrupt. This is done by
writing 1 to the memory mapped register to clear the bit. Due to hardware bug,
if the driver tries to clear this bit while the IOMMU hardware also setting
this bit, the conflict will result with the bit being set. If the interrupt
handling code does not make sure to clear this bit, subsequent changes in the
event/PPR logs will no longer generating interrupts, and would result if
buffer overflow. After clearing the bits, the driver must read back
the register to verify.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Adjust to apply on top of heavily modified patch 1. Adjust flow to get away
with a single readl() in each instance of the status register checks.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -616,13 +616,18 @@ static void iommu_check_event_log(struct
 
     spin_lock_irqsave(&iommu->lock, flags);
     
-    /*check event overflow */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
-    /* RW1C interrupt status bit */
-    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
-           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    do {
+        /* RW1C interrupt status bit */
+        writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /*
+         * Workaround for erratum787:
+         * Re-check to make sure the bit has been cleared.
+         */
+        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    } while ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK );
 
+    /* Check event overflow. */
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
     else
@@ -690,13 +695,18 @@ static void iommu_check_ppr_log(struct a
     
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /*check event overflow */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
-    /* RW1C interrupt status bit */
-    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
-           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    do {
+        /* RW1C interrupt status bit */
+        writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /*
+         * Workaround for erratum787:
+         * Re-check to make sure the bit has been cleared.
+         */
+        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    } while ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK );
 
+    /* Check event overflow. */
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
     else

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 10:56 ` [PATCH 1/2 v3] " Jan Beulich
@ 2013-06-10 11:02   ` Jan Beulich
  2013-06-10 12:18   ` Tim Deegan
  1 sibling, 0 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 11:02 UTC (permalink / raw)
  To: xen-devel; +Cc: Tim Deegan, suravee.suthikulpanit

>>> On 10.06.13 at 12:56, "Jan Beulich" <JBeulich@suse.com> wrote:
> The IOMMU interrupt bits in the IOMMU status registers are
> "read-only, and write-1-to-clear (RW1C).  Therefore, the existing
> logic which reads the register, set the bit, and then writing back
> the values could accidentally clear certain bits if it has been set.
> 
> The correct logic would just be writing only the value which only
> set the interrupt bits, and leave the rest to zeros.
> 
> This patch also, clean up #define masks as Jan has suggested.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> 
> With iommu_interrupt_handler() properly having got switched its readl()
> from status to control register, the subsequent writel() needed to be
> switched too (and the RW1C comment there was bogus).
> 
> Further, with iommu_interrupt_handler() now actually disabling the
> interrupt sources, they also need to get re-enabled by the tasklet once
> it finished processing the respective log.

Finally, guest write emulation to the status register needs to be done
with the RW1C (and RO for all other bits) semantics in mind too.

> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 10:56 ` [PATCH 1/2 v3] " Jan Beulich
  2013-06-10 11:02   ` Jan Beulich
@ 2013-06-10 12:18   ` Tim Deegan
  2013-06-10 12:31     ` Jan Beulich
  2013-06-10 12:41     ` [PATCH 1/2 v4] " Jan Beulich
  1 sibling, 2 replies; 50+ messages in thread
From: Tim Deegan @ 2013-06-10 12:18 UTC (permalink / raw)
  To: Jan Beulich; +Cc: suravee.suthikulpanit, xen-devel

At 11:56 +0100 on 10 Jun (1370865376), Jan Beulich wrote:
> --- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_guest.c	2012-02-10 11:25:54.000000000 +0100
> +++ 2013-05-13/xen/drivers/passthrough/amd/iommu_guest.c	2013-06-10 12:32:00.000000000 +0200
> @@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str
>          u64_to_reg(&iommu->ppr_log.reg_tail, val);
>          break;
>      case IOMMU_STATUS_MMIO_OFFSET:
> -        u64_to_reg(&iommu->reg_status, val);
> +        val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK |
> +               IOMMU_STATUS_EVENT_LOG_INT_MASK |
> +               IOMMU_STATUS_COMP_WAIT_INT_MASK |
> +               IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK |
> +               IOMMU_STATUS_PPR_LOG_INT_MASK |
> +               IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK |
> +               IOMMU_STATUS_GAPIC_LOG_INT_MASK;
> +        u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) ^ val);

'^ val'?  Surely '& ~val' or it will set unset bits too.

> @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i
>  
>      ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
>  
> -    /*clear overflow bit */
> -    iommu_clear_bit(&entry, of_bit);
> +    /* RW1C overflow bit */
> +    iommu_set_bit(&entry, of_bit);
>      writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);

Won't this clear all other status bits as well?  

Tim.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 12:18   ` Tim Deegan
@ 2013-06-10 12:31     ` Jan Beulich
  2013-06-10 13:58       ` Suravee Suthikulanit
  2013-06-10 12:41     ` [PATCH 1/2 v4] " Jan Beulich
  1 sibling, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 12:31 UTC (permalink / raw)
  To: Tim Deegan; +Cc: suravee.suthikulpanit, xen-devel

>>> On 10.06.13 at 14:18, Tim Deegan <tim@xen.org> wrote:
> At 11:56 +0100 on 10 Jun (1370865376), Jan Beulich wrote:
>> --- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_guest.c	2012-02-10 
> 11:25:54.000000000 +0100
>> +++ 2013-05-13/xen/drivers/passthrough/amd/iommu_guest.c	2013-06-10 
> 12:32:00.000000000 +0200
>> @@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str
>>          u64_to_reg(&iommu->ppr_log.reg_tail, val);
>>          break;
>>      case IOMMU_STATUS_MMIO_OFFSET:
>> -        u64_to_reg(&iommu->reg_status, val);
>> +        val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK |
>> +               IOMMU_STATUS_EVENT_LOG_INT_MASK |
>> +               IOMMU_STATUS_COMP_WAIT_INT_MASK |
>> +               IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK |
>> +               IOMMU_STATUS_PPR_LOG_INT_MASK |
>> +               IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK |
>> +               IOMMU_STATUS_GAPIC_LOG_INT_MASK;
>> +        u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) ^ val);
> 
> '^ val'?  Surely '& ~val' or it will set unset bits too.

Oh, yes, of course.

>> @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i
>>  
>>      ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
>>  
>> -    /*clear overflow bit */
>> -    iommu_clear_bit(&entry, of_bit);
>> +    /* RW1C overflow bit */
>> +    iommu_set_bit(&entry, of_bit);
>>      writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> 
> Won't this clear all other status bits as well?  

Yes it will, albeit not because of this patch. But I guess we should
fix this as we go. Expect v4 soon...

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH 1/2 v4] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 12:18   ` Tim Deegan
  2013-06-10 12:31     ` Jan Beulich
@ 2013-06-10 12:41     ` Jan Beulich
  2013-06-10 12:46       ` Tim Deegan
                         ` (2 more replies)
  1 sibling, 3 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 12:41 UTC (permalink / raw)
  To: xen-devel; +Cc: Tim Deegan, suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 10270 bytes --]

The IOMMU interrupt bits in the IOMMU status registers are
"read-only, and write-1-to-clear (RW1C).  Therefore, the existing
logic which reads the register, set the bit, and then writing back
the values could accidentally clear certain bits if it has been set.

The correct logic would just be writing only the value which only
set the interrupt bits, and leave the rest to zeros.

This patch also, clean up #define masks as Jan has suggested.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

With iommu_interrupt_handler() properly having got switched its readl()
from status to control register, the subsequent writel() needed to be
switched too (and the RW1C comment there was bogus).

Some of the cleanup went too far - undone.

Further, with iommu_interrupt_handler() now actually disabling the
interrupt sources, they also need to get re-enabled by the tasklet once
it finished processing the respective log.

Finally, guest write emulation to the status register needs to be done
with the RW1C (and RO for all other bits) semantics in mind too.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Clear _only_ the respective overflow bit in iommu_reset_log(). Fix
    logic error in adjustment to guest_iommu_mmio_write64(). (Both
    pointed out by Tim - Thanks!)

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -75,11 +75,9 @@ static void flush_command_buffer(struct 
     u32 cmd[4], status;
     int loop_count, comp_wait;
 
-    /* clear 'ComWaitInt' in status register (WIC) */
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                         IOMMU_STATUS_COMP_WAIT_INT_MASK,
-                         IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status);
-    writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C 'ComWaitInt' in status register */
+    writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /* send an empty COMPLETION_WAIT command to flush command buffer */
     cmd[3] = cmd[2] = 0;
@@ -103,9 +101,9 @@ static void flush_command_buffer(struct 
 
     if ( comp_wait )
     {
-        /* clear 'ComWaitInt' in status register (WIC) */
-        status &= IOMMU_STATUS_COMP_WAIT_INT_MASK;
-        writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* RW1C 'ComWaitInt' in status register */
+        writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
         return;
     }
     AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str
         u64_to_reg(&iommu->ppr_log.reg_tail, val);
         break;
     case IOMMU_STATUS_MMIO_OFFSET:
-        u64_to_reg(&iommu->reg_status, val);
+        val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK |
+               IOMMU_STATUS_EVENT_LOG_INT_MASK |
+               IOMMU_STATUS_COMP_WAIT_INT_MASK |
+               IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_PPR_LOG_INT_MASK |
+               IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_GAPIC_LOG_INT_MASK;
+        u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) & ~val);
         break;
 
     default:
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st
         writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET);
 
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
     else
     {
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
 
@@ -410,7 +410,7 @@ static void iommu_reset_log(struct amd_i
                             void (*ctrl_func)(struct amd_iommu *iommu, int))
 {
     u32 entry;
-    int log_run, run_bit, of_bit;
+    int log_run, run_bit;
     int loop_count = 1000;
 
     BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log)));
@@ -419,10 +419,6 @@ static void iommu_reset_log(struct amd_i
         IOMMU_STATUS_EVENT_LOG_RUN_SHIFT :
         IOMMU_STATUS_PPR_LOG_RUN_SHIFT;
 
-    of_bit = ( log == &iommu->event_log ) ?
-        IOMMU_STATUS_EVENT_OVERFLOW_SHIFT :
-        IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT;
-
     /* wait until EventLogRun bit = 0 */
     do {
         entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
@@ -439,9 +435,10 @@ static void iommu_reset_log(struct amd_i
 
     ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
 
-    /*clear overflow bit */
-    iommu_clear_bit(&entry, of_bit);
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C overflow bit */
+    writel(log == &iommu->event_log ? IOMMU_STATUS_EVENT_OVERFLOW_MASK
+                                    : IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /*reset event log base address */
     log->head = 0;
@@ -619,14 +616,18 @@ static void iommu_check_event_log(struct
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -689,14 +690,18 @@ static void iommu_check_ppr_log(struct a
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -733,11 +738,14 @@ static void iommu_interrupt_handler(int 
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /* Silence interrupts from both event and PPR logging */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
+    /*
+     * Silence interrupts from both event and PPR by clearing the
+     * enable logging bits in the control register
+     */
+    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -336,14 +336,13 @@
 #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT		13
+#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT			14
+#define IOMMU_CONTROL_PPR_ENABLE_SHIFT			15
+#define IOMMU_CONTROL_GT_ENABLE_SHIFT			16
 #define IOMMU_CONTROL_RESTART_MASK			0x80000000
 #define IOMMU_CONTROL_RESTART_SHIFT			31
 
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT      13
-#define IOMMU_CONTROL_PPR_INT_SHIFT             14
-#define IOMMU_CONTROL_PPR_ENABLE_SHIFT          15
-#define IOMMU_CONTROL_GT_ENABLE_SHIFT           16
-
 /* Exclusion Register */
 #define IOMMU_EXCLUSION_BASE_LOW_OFFSET		0x20
 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET	0x24
@@ -395,9 +394,18 @@
 #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT	3
 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK	0x00000010
 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT	4
+#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK      0x00000020
 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT     5
+#define IOMMU_STATUS_PPR_LOG_INT_MASK           0x00000040
 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT          6
+#define IOMMU_STATUS_PPR_LOG_RUN_MASK           0x00000080
 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT          7
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK    0x00000100
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT   8
+#define IOMMU_STATUS_GAPIC_LOG_INT_MASK         0x00000200
+#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT        9
+#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK         0x00000400
+#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT        10
 
 /* I/O Page Table */
 #define IOMMU_PAGE_TABLE_ENTRY_SIZE	8



[-- Attachment #2: AMD-IOMMU-correct-rw1c-handling --]
[-- Type: application/octet-stream, Size: 10085 bytes --]

iommu/amd: Fix logic for clearing the IOMMU interrupt bits

The IOMMU interrupt bits in the IOMMU status registers are
"read-only, and write-1-to-clear (RW1C).  Therefore, the existing
logic which reads the register, set the bit, and then writing back
the values could accidentally clear certain bits if it has been set.

The correct logic would just be writing only the value which only
set the interrupt bits, and leave the rest to zeros.

This patch also, clean up #define masks as Jan has suggested.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

With iommu_interrupt_handler() properly having got switched its readl()
from status to control register, the subsequent writel() needed to be
switched too (and the RW1C comment there was bogus).

Some of the cleanup went too far - undone.

Further, with iommu_interrupt_handler() now actually disabling the
interrupt sources, they also need to get re-enabled by the tasklet once
it finished processing the respective log.

Finally, guest write emulation to the status register needs to be done
with the RW1C (and RO for all other bits) semantics in mind too.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Clear _only_ the respective overflow bit in iommu_reset_log(). Fix
    logic error in adjustment to guest_iommu_mmio_write64(). (Both
    pointed out by Tim - Thanks!)

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -75,11 +75,9 @@ static void flush_command_buffer(struct 
     u32 cmd[4], status;
     int loop_count, comp_wait;
 
-    /* clear 'ComWaitInt' in status register (WIC) */
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                         IOMMU_STATUS_COMP_WAIT_INT_MASK,
-                         IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status);
-    writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C 'ComWaitInt' in status register */
+    writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /* send an empty COMPLETION_WAIT command to flush command buffer */
     cmd[3] = cmd[2] = 0;
@@ -103,9 +101,9 @@ static void flush_command_buffer(struct 
 
     if ( comp_wait )
     {
-        /* clear 'ComWaitInt' in status register (WIC) */
-        status &= IOMMU_STATUS_COMP_WAIT_INT_MASK;
-        writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* RW1C 'ComWaitInt' in status register */
+        writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
         return;
     }
     AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str
         u64_to_reg(&iommu->ppr_log.reg_tail, val);
         break;
     case IOMMU_STATUS_MMIO_OFFSET:
-        u64_to_reg(&iommu->reg_status, val);
+        val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK |
+               IOMMU_STATUS_EVENT_LOG_INT_MASK |
+               IOMMU_STATUS_COMP_WAIT_INT_MASK |
+               IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_PPR_LOG_INT_MASK |
+               IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_GAPIC_LOG_INT_MASK;
+        u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) & ~val);
         break;
 
     default:
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st
         writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET);
 
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
     else
     {
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
 
@@ -410,7 +410,7 @@ static void iommu_reset_log(struct amd_i
                             void (*ctrl_func)(struct amd_iommu *iommu, int))
 {
     u32 entry;
-    int log_run, run_bit, of_bit;
+    int log_run, run_bit;
     int loop_count = 1000;
 
     BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log)));
@@ -419,10 +419,6 @@ static void iommu_reset_log(struct amd_i
         IOMMU_STATUS_EVENT_LOG_RUN_SHIFT :
         IOMMU_STATUS_PPR_LOG_RUN_SHIFT;
 
-    of_bit = ( log == &iommu->event_log ) ?
-        IOMMU_STATUS_EVENT_OVERFLOW_SHIFT :
-        IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT;
-
     /* wait until EventLogRun bit = 0 */
     do {
         entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
@@ -439,9 +435,10 @@ static void iommu_reset_log(struct amd_i
 
     ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
 
-    /*clear overflow bit */
-    iommu_clear_bit(&entry, of_bit);
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C overflow bit */
+    writel(log == &iommu->event_log ? IOMMU_STATUS_EVENT_OVERFLOW_MASK
+                                    : IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /*reset event log base address */
     log->head = 0;
@@ -619,14 +616,18 @@ static void iommu_check_event_log(struct
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -689,14 +690,18 @@ static void iommu_check_ppr_log(struct a
     /*check event overflow */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -733,11 +738,14 @@ static void iommu_interrupt_handler(int 
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /* Silence interrupts from both event and PPR logging */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
+    /*
+     * Silence interrupts from both event and PPR by clearing the
+     * enable logging bits in the control register
+     */
+    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -336,14 +336,13 @@
 #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT		13
+#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT			14
+#define IOMMU_CONTROL_PPR_ENABLE_SHIFT			15
+#define IOMMU_CONTROL_GT_ENABLE_SHIFT			16
 #define IOMMU_CONTROL_RESTART_MASK			0x80000000
 #define IOMMU_CONTROL_RESTART_SHIFT			31
 
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT      13
-#define IOMMU_CONTROL_PPR_INT_SHIFT             14
-#define IOMMU_CONTROL_PPR_ENABLE_SHIFT          15
-#define IOMMU_CONTROL_GT_ENABLE_SHIFT           16
-
 /* Exclusion Register */
 #define IOMMU_EXCLUSION_BASE_LOW_OFFSET		0x20
 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET	0x24
@@ -395,9 +394,18 @@
 #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT	3
 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK	0x00000010
 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT	4
+#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK      0x00000020
 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT     5
+#define IOMMU_STATUS_PPR_LOG_INT_MASK           0x00000040
 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT          6
+#define IOMMU_STATUS_PPR_LOG_RUN_MASK           0x00000080
 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT          7
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK    0x00000100
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT   8
+#define IOMMU_STATUS_GAPIC_LOG_INT_MASK         0x00000200
+#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT        9
+#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK         0x00000400
+#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT        10
 
 /* I/O Page Table */
 #define IOMMU_PAGE_TABLE_ENTRY_SIZE	8

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 10:53         ` Jan Beulich
@ 2013-06-10 12:43           ` Tim Deegan
  2013-06-10 13:23             ` Jan Beulich
                               ` (2 more replies)
  2013-06-10 13:53           ` Suravee Suthikulanit
  1 sibling, 3 replies; 50+ messages in thread
From: Tim Deegan @ 2013-06-10 12:43 UTC (permalink / raw)
  To: Jan Beulich; +Cc: suravee.suthikulpanit, xen-devel

At 11:53 +0100 on 10 Jun (1370865209), Jan Beulich wrote:
> > Oh I see, the idea is that we use the control register to mask
> > interrupts instead of relying on the status register?  That seems
> > better.  But doesn't this IOMMU already mask further interrupts when the
> > pending bit in the status register is set?  I can't see any wording
> > about that in the IOMMU doc but the erratum implies it.  Suravee, do you
> > know if this is the case?
> 
> Actually, the documentation has a subtle but perhaps important
> difference int the wording here: For EventLogInt and ComWaitInt
> is read "An interrupt is generated when <status bit> = 1b and MMIO
> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt
> it says "An interrupt is generated when <status bit> changes from 0b
> to 1b and MMIO Offset 0018h[<control bit>] = 1b".
> 
> So I'd like to be one the safe side and assume further interrupts can
> be generated in all cases.

Fair enough.

 - see also the emulation behavior in
> iommu_guest.c which - afaict - always raises an interrupt, not just on
> a 0 -> 1 transition.

Well, if it were a documented beahviour, we ought to change that. :)

> > In either case, the while () loop worries me; I think it would be better
> > to schedule the tasklet again if we see the bit is set; a 'while
> > (pending is set) { clear pending bit; }' loop might never exit the
> > tasklet at all.
> 
> That could only be due to a hardware bug worse than the one we're
> working around here, and I don't think is worth dealing with.

Well, there's runaway guest hardware.  If we reschedule the tasklet then
pci_check_disable_device() will eventually catch and suppress the
misbehaviour; if we spin here it won't get a chance.  I guess the
argument is that it will eventually overflow the log buffer and stop
setting the log-interrupt bit -- that needs at least a comment.

I was also a bit worried about the erratum-787 event getting delayed
(since there's no interrupt to cause us to actually process it), but I
just realised that's a more general problem: we ought to be resetting
the 'pending' bits _before_ scanning the log, or any new entries that
arrive between the log scan and the RW1C write won't be seen until the
_next_ log entry causes an interrupt.

How about:
 write-to-clear status.pending
 process the log
 if (status.pending)
    reschedule the tasklet
 else
    unmask the interrupt.

Since we have to do the extra read anyway for erratum 787, we might as
well save ourselves the bother of taking an interrupt in the other case.

Tim.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v4] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 12:41     ` [PATCH 1/2 v4] " Jan Beulich
@ 2013-06-10 12:46       ` Tim Deegan
  2013-06-10 13:49       ` George Dunlap
  2013-06-11  6:47       ` [PATCH 1/2 v5] " Jan Beulich
  2 siblings, 0 replies; 50+ messages in thread
From: Tim Deegan @ 2013-06-10 12:46 UTC (permalink / raw)
  To: Jan Beulich; +Cc: suravee.suthikulpanit, xen-devel

At 13:41 +0100 on 10 Jun (1370871672), Jan Beulich wrote:
> The IOMMU interrupt bits in the IOMMU status registers are
> "read-only, and write-1-to-clear (RW1C).  Therefore, the existing
> logic which reads the register, set the bit, and then writing back
> the values could accidentally clear certain bits if it has been set.
> 
> The correct logic would just be writing only the value which only
> set the interrupt bits, and leave the rest to zeros.
> 
> This patch also, clean up #define masks as Jan has suggested.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> 
> With iommu_interrupt_handler() properly having got switched its readl()
> from status to control register, the subsequent writel() needed to be
> switched too (and the RW1C comment there was bogus).
> 
> Some of the cleanup went too far - undone.
> 
> Further, with iommu_interrupt_handler() now actually disabling the
> interrupt sources, they also need to get re-enabled by the tasklet once
> it finished processing the respective log.
> 
> Finally, guest write emulation to the status register needs to be done
> with the RW1C (and RO for all other bits) semantics in mind too.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Tim Deegan <tim@xen.org>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 12:43           ` Tim Deegan
@ 2013-06-10 13:23             ` Jan Beulich
  2013-06-10 13:41             ` Jan Beulich
  2013-06-10 13:55             ` Jan Beulich
  2 siblings, 0 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 13:23 UTC (permalink / raw)
  To: Tim Deegan; +Cc: suravee.suthikulpanit, xen-devel

>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote:
> At 11:53 +0100 on 10 Jun (1370865209), Jan Beulich wrote:
>> > Oh I see, the idea is that we use the control register to mask
>> > interrupts instead of relying on the status register?  That seems
>> > better.  But doesn't this IOMMU already mask further interrupts when the
>> > pending bit in the status register is set?  I can't see any wording
>> > about that in the IOMMU doc but the erratum implies it.  Suravee, do you
>> > know if this is the case?
>> 
>> Actually, the documentation has a subtle but perhaps important
>> difference int the wording here: For EventLogInt and ComWaitInt
>> is read "An interrupt is generated when <status bit> = 1b and MMIO
>> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt
>> it says "An interrupt is generated when <status bit> changes from 0b
>> to 1b and MMIO Offset 0018h[<control bit>] = 1b".
>> 
>> So I'd like to be one the safe side and assume further interrupts can
>> be generated in all cases.
> 
> Fair enough.
> 
>  - see also the emulation behavior in
>> iommu_guest.c which - afaict - always raises an interrupt, not just on
>> a 0 -> 1 transition.
> 
> Well, if it were a documented beahviour, we ought to change that. :)

I'll leave that to Suravee though, pending clarification on whether
the difference in wording is actually meaningful.

>> > In either case, the while () loop worries me; I think it would be better
>> > to schedule the tasklet again if we see the bit is set; a 'while
>> > (pending is set) { clear pending bit; }' loop might never exit the
>> > tasklet at all.
>> 
>> That could only be due to a hardware bug worse than the one we're
>> working around here, and I don't think is worth dealing with.
> 
> Well, there's runaway guest hardware.  If we reschedule the tasklet then
> pci_check_disable_device() will eventually catch and suppress the
> misbehaviour; if we spin here it won't get a chance.  I guess the
> argument is that it will eventually overflow the log buffer and stop
> setting the log-interrupt bit -- that needs at least a comment.
> 
> I was also a bit worried about the erratum-787 event getting delayed
> (since there's no interrupt to cause us to actually process it), but I
> just realised that's a more general problem: we ought to be resetting
> the 'pending' bits _before_ scanning the log, or any new entries that
> arrive between the log scan and the RW1C write won't be seen until the
> _next_ log entry causes an interrupt.
> 
> How about:
>  write-to-clear status.pending
>  process the log
>  if (status.pending)
>     reschedule the tasklet
>  else
>     unmask the interrupt.
> 
> Since we have to do the extra read anyway for erratum 787, we might as
> well save ourselves the bother of taking an interrupt in the other case.

Yes, that's a very reasonable approach. Will be a v4 then here soon
too, except that the locking there looks bogus too (and hence may
need fixing along the way)...

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 12:43           ` Tim Deegan
  2013-06-10 13:23             ` Jan Beulich
@ 2013-06-10 13:41             ` Jan Beulich
  2013-06-10 13:56               ` Tim Deegan
  2013-06-10 13:55             ` Jan Beulich
  2 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 13:41 UTC (permalink / raw)
  To: Tim Deegan; +Cc: suravee.suthikulpanit, xen-devel

>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote:
> How about:
>  write-to-clear status.pending
>  process the log
>  if (status.pending)
>     reschedule the tasklet
>  else
>     unmask the interrupt.

Actually I think this is leaving a window for improperly handled
log entries too: There's also no guarantee that re-enabling the
interrupt would cause an interrupt to be raised when the buffer
became non-empty between the status.pending check and the
re-enable. Therefore I think we need

  write-to-clear status.pending
  process the log
  unmask the interrupt.
  if (status.pending)
     reschedule the tasklet

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v4] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 12:41     ` [PATCH 1/2 v4] " Jan Beulich
  2013-06-10 12:46       ` Tim Deegan
@ 2013-06-10 13:49       ` George Dunlap
  2013-06-10 14:08         ` Jan Beulich
  2013-06-11  6:47       ` [PATCH 1/2 v5] " Jan Beulich
  2 siblings, 1 reply; 50+ messages in thread
From: George Dunlap @ 2013-06-10 13:49 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, Suravee Suthikulpanit, xen-devel

On Mon, Jun 10, 2013 at 1:41 PM, Jan Beulich <JBeulich@suse.com> wrote:
> The IOMMU interrupt bits in the IOMMU status registers are
> "read-only, and write-1-to-clear (RW1C).  Therefore, the existing
> logic which reads the register, set the bit, and then writing back
> the values could accidentally clear certain bits if it has been set.
>
> The correct logic would just be writing only the value which only
> set the interrupt bits, and leave the rest to zeros.
>
> This patch also, clean up #define masks as Jan has suggested.
>
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>
> With iommu_interrupt_handler() properly having got switched its readl()
> from status to control register, the subsequent writel() needed to be
> switched too (and the RW1C comment there was bogus).
>
> Some of the cleanup went too far - undone.
>
> Further, with iommu_interrupt_handler() now actually disabling the
> interrupt sources, they also need to get re-enabled by the tasklet once
> it finished processing the respective log.
>
> Finally, guest write emulation to the status register needs to be done
> with the RW1C (and RO for all other bits) semantics in mind too.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

What's the impact of this as a bug?

This looks like it has a fairly high risk of introducing regressions
in existing working system.  So unless it has a pretty wide impact, I
think we should wait and include this in 4.3.1.

 -George

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 10:53         ` Jan Beulich
  2013-06-10 12:43           ` Tim Deegan
@ 2013-06-10 13:53           ` Suravee Suthikulanit
  2013-06-10 13:59             ` Jan Beulich
  1 sibling, 1 reply; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-10 13:53 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, xen-devel

On 6/10/2013 5:53 AM, Jan Beulich wrote:
>>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote:
>> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote:
>>>>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote:
>>>> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:
>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>>
>>>>> The IOMMU interrupt handling in bottom half must clear the PPR log
>> interrupt
>>>>> and event log interrupt bits to re-enable the interrupt. This is done by
>>>>> writing 1 to the memory mapped register to clear the bit. Due to hardware
>>>> bug,
>>>>> if the driver tries to clear this bit while the IOMMU hardware also setting
>>>>> this bit, the conflict will result with the bit being set. If the interrupt
>>>>> handling code does not make sure to clear this bit, subsequent changes in
>>>> the
>>>>> event/PPR logs will no longer generating interrupts, and would result if
>>>>> buffer overflow. After clearing the bits, the driver must read back
>>>>> the register to verify.
>>>> Is there a risk of livelock here?  That is, if some device is causing a
>>>> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling
>>>> interrupts as fast as they are raised.
>>>>
>>>> The solution suggested in the erratum seems better: if the bit is set
>>>> after clearing, process the interrupts again (i.e. run/schedule the
>>>> top-half handler).  That way the bottom-half handler will definitely
>>>> terminate and the system can make some progress.
>>> That's what's being done really: The actual interrupt handler disables
>>> the interrupt sources, and the tasklet re-enables them (or at least is
>>> supposed to do so - patch 1 isn't really correct in the respect).
>> Oh I see, the idea is that we use the control register to mask
>> interrupts instead of relying on the status register?  That seems
>> better.  But doesn't this IOMMU already mask further interrupts when the
>> pending bit in the status register is set?  I can't see any wording
>> about that in the IOMMU doc but the erratum implies it.  Suravee, do you
>> know if this is the case?
> Actually, the documentation has a subtle but perhaps important
> difference int the wording here: For EventLogInt and ComWaitInt
> is read "An interrupt is generated when <status bit> = 1b and MMIO
> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt
> it says "An interrupt is generated when <status bit> changes from 0b
> to 1b and MMIO Offset 0018h[<control bit>] = 1b".
>
> So I'd like to be one the safe side and assume further interrupts can
> be generated in all cases - see also the emulation behavior in
> iommu_guest.c which - afaict - always raises an interrupt, not just on
> a 0 -> 1 transition.

The behavior should be that the interrupt will be generated if the "xxxInt"
bit is 0.  Once generated, it will be set to "1" by the hardware.  If this
bit is 1, events will be added to the log but interrupt will not be generated.

Suravee

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 12:43           ` Tim Deegan
  2013-06-10 13:23             ` Jan Beulich
  2013-06-10 13:41             ` Jan Beulich
@ 2013-06-10 13:55             ` Jan Beulich
  2013-06-10 15:03               ` Jan Beulich
  2 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 13:55 UTC (permalink / raw)
  To: Tim Deegan; +Cc: suravee.suthikulpanit, xen-devel

>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote:
> How about:
>  write-to-clear status.pending
>  process the log
>  if (status.pending)
>     reschedule the tasklet
>  else
>     unmask the interrupt.

Actually, I don't think that's right: Clearing the pending bit with
the respective interrupt source disabled doesn't allow the
pending bit to become set again upon arrival of a new log entry,
and hence we might still be leaving entries unhandled for an
indefinite period of time. So I now think we need to do

  write-to-clear status.pending
  process the log
  unmask the interrupt
  process the log again
  if (status.pending)
     reschedule the tasklet

Of course we could skip the unmask and parse-again if the
interrupt wasn't masked in the first place, which is possible since
it gets masked only for any IOMMU that had its interrupt handler
executed before the tasklet gets busy on it.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 13:41             ` Jan Beulich
@ 2013-06-10 13:56               ` Tim Deegan
  0 siblings, 0 replies; 50+ messages in thread
From: Tim Deegan @ 2013-06-10 13:56 UTC (permalink / raw)
  To: Jan Beulich; +Cc: suravee.suthikulpanit, xen-devel

At 14:41 +0100 on 10 Jun (1370875314), Jan Beulich wrote:
> >>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote:
> > How about:
> >  write-to-clear status.pending
> >  process the log
> >  if (status.pending)
> >     reschedule the tasklet
> >  else
> >     unmask the interrupt.
> 
> Actually I think this is leaving a window for improperly handled
> log entries too: There's also no guarantee that re-enabling the
> interrupt would cause an interrupt to be raised when the buffer
> became non-empty between the status.pending check and the
> re-enable.

Oh.  Yes, I suppose that depends on whether the interrupt is
triggered on (edge(pending) && enabled) or edge(pending && enabled), or
something else.

> Therefore I think we need
> 
>   write-to-clear status.pending
>   process the log
>   unmask the interrupt.
>   if (status.pending)
>      reschedule the tasklet

Yes, that looks better.

Tim.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 12:31     ` Jan Beulich
@ 2013-06-10 13:58       ` Suravee Suthikulanit
  0 siblings, 0 replies; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-10 13:58 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, xen-devel

On 6/10/2013 7:31 AM, Jan Beulich wrote:
>>> @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i
>>> >>  
>>> >>      ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
>>> >>  
>>> >>-    /*clear overflow bit */
>>> >>-    iommu_clear_bit(&entry, of_bit);
>>> >>+    /* RW1C overflow bit */
>>> >>+    iommu_set_bit(&entry, of_bit);
>>> >>      writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>> >
>> >Won't this clear all other status bits as well?
> Yes it will, albeit not because of this patch. But I guess we should
> fix this as we go. Expect v4 soon...
Sorry, I missed one case.

Suravee

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 13:53           ` Suravee Suthikulanit
@ 2013-06-10 13:59             ` Jan Beulich
  2013-06-10 15:11               ` Suravee Suthikulanit
  0 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 13:59 UTC (permalink / raw)
  To: Suravee Suthikulanit; +Cc: Tim Deegan, xen-devel

>>> On 10.06.13 at 15:53, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
wrote:
> On 6/10/2013 5:53 AM, Jan Beulich wrote:
>>>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote:
>>> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote:
>>>>>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote:
>>>>> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:
>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>>>
>>>>>> The IOMMU interrupt handling in bottom half must clear the PPR log
>>> interrupt
>>>>>> and event log interrupt bits to re-enable the interrupt. This is done by
>>>>>> writing 1 to the memory mapped register to clear the bit. Due to hardware
>>>>> bug,
>>>>>> if the driver tries to clear this bit while the IOMMU hardware also setting
>>>>>> this bit, the conflict will result with the bit being set. If the interrupt
>>>>>> handling code does not make sure to clear this bit, subsequent changes in
>>>>> the
>>>>>> event/PPR logs will no longer generating interrupts, and would result if
>>>>>> buffer overflow. After clearing the bits, the driver must read back
>>>>>> the register to verify.
>>>>> Is there a risk of livelock here?  That is, if some device is causing a
>>>>> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling
>>>>> interrupts as fast as they are raised.
>>>>>
>>>>> The solution suggested in the erratum seems better: if the bit is set
>>>>> after clearing, process the interrupts again (i.e. run/schedule the
>>>>> top-half handler).  That way the bottom-half handler will definitely
>>>>> terminate and the system can make some progress.
>>>> That's what's being done really: The actual interrupt handler disables
>>>> the interrupt sources, and the tasklet re-enables them (or at least is
>>>> supposed to do so - patch 1 isn't really correct in the respect).
>>> Oh I see, the idea is that we use the control register to mask
>>> interrupts instead of relying on the status register?  That seems
>>> better.  But doesn't this IOMMU already mask further interrupts when the
>>> pending bit in the status register is set?  I can't see any wording
>>> about that in the IOMMU doc but the erratum implies it.  Suravee, do you
>>> know if this is the case?
>> Actually, the documentation has a subtle but perhaps important
>> difference int the wording here: For EventLogInt and ComWaitInt
>> is read "An interrupt is generated when <status bit> = 1b and MMIO
>> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt
>> it says "An interrupt is generated when <status bit> changes from 0b
>> to 1b and MMIO Offset 0018h[<control bit>] = 1b".
>>
>> So I'd like to be one the safe side and assume further interrupts can
>> be generated in all cases - see also the emulation behavior in
>> iommu_guest.c which - afaict - always raises an interrupt, not just on
>> a 0 -> 1 transition.
> 
> The behavior should be that the interrupt will be generated if the "xxxInt"
> bit is 0.  Once generated, it will be set to "1" by the hardware.  If this
> bit is 1, events will be added to the log but interrupt will not be 
> generated.

"Should be" isn't enough here, even more so given the mentioned
wording differences in your documentation. We need to know how
actual (past, current, and future) hardware behaves.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v4] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 13:49       ` George Dunlap
@ 2013-06-10 14:08         ` Jan Beulich
  0 siblings, 0 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 14:08 UTC (permalink / raw)
  To: George Dunlap; +Cc: Tim Deegan, Suravee Suthikulpanit, xen-devel

>>> On 10.06.13 at 15:49, George Dunlap <George.Dunlap@eu.citrix.com> wrote:
> On Mon, Jun 10, 2013 at 1:41 PM, Jan Beulich <JBeulich@suse.com> wrote:
>> The IOMMU interrupt bits in the IOMMU status registers are
>> "read-only, and write-1-to-clear (RW1C).  Therefore, the existing
>> logic which reads the register, set the bit, and then writing back
>> the values could accidentally clear certain bits if it has been set.
>>
>> The correct logic would just be writing only the value which only
>> set the interrupt bits, and leave the rest to zeros.
>>
>> This patch also, clean up #define masks as Jan has suggested.
>>
>> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>
>> With iommu_interrupt_handler() properly having got switched its readl()
>> from status to control register, the subsequent writel() needed to be
>> switched too (and the RW1C comment there was bogus).
>>
>> Some of the cleanup went too far - undone.
>>
>> Further, with iommu_interrupt_handler() now actually disabling the
>> interrupt sources, they also need to get re-enabled by the tasklet once
>> it finished processing the respective log.
>>
>> Finally, guest write emulation to the status register needs to be done
>> with the RW1C (and RO for all other bits) semantics in mind too.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> What's the impact of this as a bug?

The primary issue really comes very close to a security one: The
workaround to the live lock (observed on VT-d iirc) consisting of
disabling the interrupt and deferring the actual handling to a
tasklet might have been completely broken. Whether that's
actually the case very much depends on how close documentation
documents actual hardware: If the wording there is precise, we'd
be susceptible to this on the event log side, but be safe from this
for the PPR log.

> This looks like it has a fairly high risk of introducing regressions
> in existing working system.  So unless it has a pretty wide impact, I
> think we should wait and include this in 4.3.1.

If it turns out the documentation is imprecise in the way that we
would hope for, I'm fine with postponing this, as then the only real
problem would be that we might be seeing too few interrupts (i.e.
there might be a silently non-functional device somewhere in the
system because of this, but there wouldn't be any risk to the
system as a whole). So when to apply this depends heavily on AMD
clarifying actual hardware behavior vs. documentation.

But of course you also need to view this in the context of patch
2, which is something I think we want to have in 4.3 (but which
will [minimally] need reworking if this patch it to be deferred).

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 13:55             ` Jan Beulich
@ 2013-06-10 15:03               ` Jan Beulich
  2013-06-10 16:31                 ` Tim Deegan
  0 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 15:03 UTC (permalink / raw)
  To: Tim Deegan; +Cc: suravee.suthikulpanit, xen-devel

>>> On 10.06.13 at 15:55, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote:
>> How about:
>>  write-to-clear status.pending
>>  process the log
>>  if (status.pending)
>>     reschedule the tasklet
>>  else
>>     unmask the interrupt.
> 
> Actually, I don't think that's right: Clearing the pending bit with
> the respective interrupt source disabled doesn't allow the
> pending bit to become set again upon arrival of a new log entry,
> and hence we might still be leaving entries unhandled for an
> indefinite period of time. So I now think we need to do
> 
>   write-to-clear status.pending
>   process the log
>   unmask the interrupt
>   process the log again
>   if (status.pending)
>      reschedule the tasklet
> 
> Of course we could skip the unmask and parse-again if the
> interrupt wasn't masked in the first place, which is possible since
> it gets masked only for any IOMMU that had its interrupt handler
> executed before the tasklet gets busy on it.

So with this done I now realized that all of these transformations
don't really belong in this erratum workaround patch. They either
should be a prereq patch (or folded into patch 1), or a follow-up
one, with the one here then nevertheless going back to the
original simple loop approach. Do you view this any different?

Here is what one of the two functions now looks like, just for
reference:

static void iommu_check_event_log(struct amd_iommu *iommu)
{
    u32 entry;
    unsigned long flags;

    for ( ; ; )
    {
        /* RW1C interrupt status bit */
        writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);

        iommu_read_log(iommu, &iommu->event_log,
                       sizeof(event_entry_t), parse_event_log_entry);

        spin_lock_irqsave(&iommu->lock, flags);

        /* Check event overflow. */
        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
        if ( entry & IOMMU_STATUS_EVENT_OVERFLOW_MASK )
            iommu_reset_log(iommu, &iommu->event_log,
                            set_iommu_event_log_control);
        else
        {
            entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
            if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
            {
                entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
                writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
                spin_unlock_irqrestore(&iommu->lock, flags);
                continue;
            }
        }

        break;
    }

    /*
     * Workaround for erratum787:
     * Re-check to make sure the bit has been cleared.
     */
    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
    if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK )
        tasklet_schedule(&amd_iommu_irq_tasklet);

    spin_unlock_irqrestore(&iommu->lock, flags);
}

You'll note that even here we're having a loop again, which you
presumably won't like much. The only alternative I see is to run
iommu_read_log() with iommu->lock held, which has the caveat of
the function itself taking a lock (albeit - without having done any
proving yet - I think that lock is taken completely pointlessly).

In any case - the erratum workaround is really just the comment
and three following lines. Everything else belongs elsewhere imo.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 13:59             ` Jan Beulich
@ 2013-06-10 15:11               ` Suravee Suthikulanit
  2013-06-10 15:21                 ` Jan Beulich
  0 siblings, 1 reply; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-10 15:11 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, xen-devel

On 6/10/2013 8:59 AM, Jan Beulich wrote:
>>>> On 10.06.13 at 15:53, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
> wrote:
>> On 6/10/2013 5:53 AM, Jan Beulich wrote:
>>>>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote:
>>>> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote:
>>>>>>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote:
>>>>>> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:
>>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>>>>
>>>>>>> The IOMMU interrupt handling in bottom half must clear the PPR log
>>>> interrupt
>>>>>>> and event log interrupt bits to re-enable the interrupt. This is done by
>>>>>>> writing 1 to the memory mapped register to clear the bit. Due to hardware
>>>>>> bug,
>>>>>>> if the driver tries to clear this bit while the IOMMU hardware also setting
>>>>>>> this bit, the conflict will result with the bit being set. If the interrupt
>>>>>>> handling code does not make sure to clear this bit, subsequent changes in
>>>>>> the
>>>>>>> event/PPR logs will no longer generating interrupts, and would result if
>>>>>>> buffer overflow. After clearing the bits, the driver must read back
>>>>>>> the register to verify.
>>>>>> Is there a risk of livelock here?  That is, if some device is causing a
>>>>>> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling
>>>>>> interrupts as fast as they are raised.
>>>>>>
>>>>>> The solution suggested in the erratum seems better: if the bit is set
>>>>>> after clearing, process the interrupts again (i.e. run/schedule the
>>>>>> top-half handler).  That way the bottom-half handler will definitely
>>>>>> terminate and the system can make some progress.
>>>>> That's what's being done really: The actual interrupt handler disables
>>>>> the interrupt sources, and the tasklet re-enables them (or at least is
>>>>> supposed to do so - patch 1 isn't really correct in the respect).
>>>> Oh I see, the idea is that we use the control register to mask
>>>> interrupts instead of relying on the status register?  That seems
>>>> better.  But doesn't this IOMMU already mask further interrupts when the
>>>> pending bit in the status register is set?  I can't see any wording
>>>> about that in the IOMMU doc but the erratum implies it.  Suravee, do you
>>>> know if this is the case?
>>> Actually, the documentation has a subtle but perhaps important
>>> difference int the wording here: For EventLogInt and ComWaitInt
>>> is read "An interrupt is generated when <status bit> = 1b and MMIO
>>> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt
>>> it says "An interrupt is generated when <status bit> changes from 0b
>>> to 1b and MMIO Offset 0018h[<control bit>] = 1b".
>>>
>>> So I'd like to be one the safe side and assume further interrupts can
>>> be generated in all cases - see also the emulation behavior in
>>> iommu_guest.c which - afaict - always raises an interrupt, not just on
>>> a 0 -> 1 transition.
>> The behavior should be that the interrupt will be generated if the "xxxInt"
>> bit is 0.  Once generated, it will be set to "1" by the hardware.  If this
>> bit is 1, events will be added to the log but interrupt will not be
>> generated.
> "Should be" isn't enough here, even more so given the mentioned
> wording differences in your documentation. We need to know how
> actual (past, current, and future) hardware behaves.
>
> Jan
>
I am sure this is what the behavior of the hardware.  Besides, only the 
hardware can set this bit.
I have also tested by not clearing the bit, and basically I did not see 
additional interrupts.

Suravee

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 15:11               ` Suravee Suthikulanit
@ 2013-06-10 15:21                 ` Jan Beulich
  0 siblings, 0 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-10 15:21 UTC (permalink / raw)
  To: Suravee Suthikulanit; +Cc: Tim Deegan, xen-devel

>>> On 10.06.13 at 17:11, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
wrote:
> On 6/10/2013 8:59 AM, Jan Beulich wrote:
>>>>> On 10.06.13 at 15:53, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
>> wrote:
>>> On 6/10/2013 5:53 AM, Jan Beulich wrote:
>>>>>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote:
>>>>> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote:
>>>>>>>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote:
>>>>>>> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:
>>>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>>>>>
>>>>>>>> The IOMMU interrupt handling in bottom half must clear the PPR log
>>>>> interrupt
>>>>>>>> and event log interrupt bits to re-enable the interrupt. This is done by
>>>>>>>> writing 1 to the memory mapped register to clear the bit. Due to hardware
>>>>>>> bug,
>>>>>>>> if the driver tries to clear this bit while the IOMMU hardware also setting
>>>>>>>> this bit, the conflict will result with the bit being set. If the interrupt
>>>>>>>> handling code does not make sure to clear this bit, subsequent changes in
>>>>>>> the
>>>>>>>> event/PPR logs will no longer generating interrupts, and would result if
>>>>>>>> buffer overflow. After clearing the bits, the driver must read back
>>>>>>>> the register to verify.
>>>>>>> Is there a risk of livelock here?  That is, if some device is causing a
>>>>>>> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling
>>>>>>> interrupts as fast as they are raised.
>>>>>>>
>>>>>>> The solution suggested in the erratum seems better: if the bit is set
>>>>>>> after clearing, process the interrupts again (i.e. run/schedule the
>>>>>>> top-half handler).  That way the bottom-half handler will definitely
>>>>>>> terminate and the system can make some progress.
>>>>>> That's what's being done really: The actual interrupt handler disables
>>>>>> the interrupt sources, and the tasklet re-enables them (or at least is
>>>>>> supposed to do so - patch 1 isn't really correct in the respect).
>>>>> Oh I see, the idea is that we use the control register to mask
>>>>> interrupts instead of relying on the status register?  That seems
>>>>> better.  But doesn't this IOMMU already mask further interrupts when the
>>>>> pending bit in the status register is set?  I can't see any wording
>>>>> about that in the IOMMU doc but the erratum implies it.  Suravee, do you
>>>>> know if this is the case?
>>>> Actually, the documentation has a subtle but perhaps important
>>>> difference int the wording here: For EventLogInt and ComWaitInt
>>>> is read "An interrupt is generated when <status bit> = 1b and MMIO
>>>> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt
>>>> it says "An interrupt is generated when <status bit> changes from 0b
>>>> to 1b and MMIO Offset 0018h[<control bit>] = 1b".
>>>>
>>>> So I'd like to be one the safe side and assume further interrupts can
>>>> be generated in all cases - see also the emulation behavior in
>>>> iommu_guest.c which - afaict - always raises an interrupt, not just on
>>>> a 0 -> 1 transition.
>>> The behavior should be that the interrupt will be generated if the "xxxInt"
>>> bit is 0.  Once generated, it will be set to "1" by the hardware.  If this
>>> bit is 1, events will be added to the log but interrupt will not be
>>> generated.
>> "Should be" isn't enough here, even more so given the mentioned
>> wording differences in your documentation. We need to know how
>> actual (past, current, and future) hardware behaves.
>>
> I am sure this is what the behavior of the hardware.  Besides, only the 
> hardware can set this bit.
> I have also tested by not clearing the bit, and basically I did not see 
> additional interrupts.

Which would allow us to simplify patch 1 quite a bit - there's then
no need to clear the two interrupt-enable bits in the interrupt
handler, and iommu_check_*_log() then also wouldn't need to set
them again. We would just need to find the right point in time
when to clear the corresponding status flag. And - as written in a
response to Tim already - probably some parts of what we
discussed for patch 2 really would need to move to patch 1.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 15:03               ` Jan Beulich
@ 2013-06-10 16:31                 ` Tim Deegan
  2013-06-10 23:13                   ` Suravee Suthikulanit
  2013-06-11  6:40                   ` Jan Beulich
  0 siblings, 2 replies; 50+ messages in thread
From: Tim Deegan @ 2013-06-10 16:31 UTC (permalink / raw)
  To: Jan Beulich; +Cc: suravee.suthikulpanit, xen-devel

At 16:03 +0100 on 10 Jun (1370880211), Jan Beulich wrote:
> >>> On 10.06.13 at 15:55, "Jan Beulich" <JBeulich@suse.com> wrote:
> >>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote:
> >> How about:
> >>  write-to-clear status.pending
> >>  process the log
> >>  if (status.pending)
> >>     reschedule the tasklet
> >>  else
> >>     unmask the interrupt.
> > 
> > Actually, I don't think that's right: Clearing the pending bit with
> > the respective interrupt source disabled doesn't allow the
> > pending bit to become set again upon arrival of a new log entry,

>From my reading of the IOMMU spec the pending bit gets set whether the
enable bit is set or not:

  PPRInt: peripheral page service request interrupt. Revision 1:
  RO. Reset 0b. Reserved.  Revision 2: RW1C. Reset 0b. 1=PPR request
  entry written to the PPR log by the IOMMU. 0=No PPR entry written to
  the PPR log by the IOMMU. An interrupt is generated when PPRInt=1b and
  MMIO Offset 0018h[PPRIntEn]=1b.

and

  EventLogInt: event log interrupt. RW1C. Reset 0b. 1=Event entry
  written to the event log by the IOMMU. 0=No event entry written to the
  event log by the IOMMU. An interrupt is generated when EventLogInt=1b
  and MMIO Offset 0018h[EventIntEn]=1b.

so we should be OK without a second pass if the pending bit is still
clear after unmasking the interrupt.

> So with this done I now realized that all of these transformations
> don't really belong in this erratum workaround patch.

Agreed.  I think this reshuffle to avoid lost entries should be its own
patch, and the erratum 787 one should follow it -- unless the logic we
end up with handles erratum 787 as a side-effect. :)

> static void iommu_check_event_log(struct amd_iommu *iommu)
> {
>     u32 entry;
>     unsigned long flags;
> 
>     for ( ; ; )
>     {
>         /* RW1C interrupt status bit */
>         writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
>                iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> 
>         iommu_read_log(iommu, &iommu->event_log,
>                        sizeof(event_entry_t), parse_event_log_entry);
> 
>         spin_lock_irqsave(&iommu->lock, flags);
> 
>         /* Check event overflow. */
>         entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>         if ( entry & IOMMU_STATUS_EVENT_OVERFLOW_MASK )
>             iommu_reset_log(iommu, &iommu->event_log,
>                             set_iommu_event_log_control);
>         else
>         {
>             entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
>             if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
>             {
>                 entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
>                 writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
>                 spin_unlock_irqrestore(&iommu->lock, flags);
>                 continue;
>             }
>         }
> 
>         break;
>     }
> 
>     /*
>      * Workaround for erratum787:
>      * Re-check to make sure the bit has been cleared.
>      */
>     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>     if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK )
>         tasklet_schedule(&amd_iommu_irq_tasklet);
> 
>     spin_unlock_irqrestore(&iommu->lock, flags);
> }
> 
> You'll note that even here we're having a loop again, which you
> presumably won't like much.

Well, this time the event handling is inside the loop, so it ought to
catch and disable bad passthrough devices.  I'd still prefer having the
tasklet schedule itself and terminate, but I'm happy to defer to your
taste.

> The only alternative I see is to run
> iommu_read_log() with iommu->lock held, which has the caveat of
> the function itself taking a lock (albeit - without having done any
> proving yet - I think that lock is taken completely pointlessly).
> 
> In any case - the erratum workaround is really just the comment
> and three following lines. Everything else belongs elsewhere imo.

Yep.

Tim.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 16:31                 ` Tim Deegan
@ 2013-06-10 23:13                   ` Suravee Suthikulanit
  2013-06-11  6:45                     ` Jan Beulich
  2013-06-11  6:40                   ` Jan Beulich
  1 sibling, 1 reply; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-10 23:13 UTC (permalink / raw)
  To: Tim Deegan; +Cc: Jan Beulich, xen-devel

Hi All,

We should also check if the EventLogInt and PPRInt bits are set before actually
going into the log processing code.  Also, I agree with Jan that we should not need
to disable the Event log and the PPR log in the IOMMU control register.
This could be handled simply through the status register.

Also, I think we can further simplify the logic for the workaround by having only
one loop instead of two.  Here is the newly proposed changes for the patch.  However,
I am still not sure if we should reschedule the tasklet instead of just using the
while loop here.

Thank you,

Suravee


diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index b5a39a9..bd9913f 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -615,19 +615,8 @@ static void iommu_check_event_log(struct amd_iommu *iommu)
      
      /*check event overflow */
      entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
-    /* RW1C interrupt status bit */
-    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
-           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
      if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
          iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
-    else
-    {
-        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-        iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
-        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-    }
  
      spin_unlock_irqrestore(&iommu->lock, flags);
  }
@@ -689,26 +678,20 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu)
  
      /*check event overflow */
      entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
-    /* RW1C interrupt status bit */
-    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
-           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
      if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
          iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
-    else
-    {
-        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
-        writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-    }
  
      spin_unlock_irqrestore(&iommu->lock, flags);
  }
  
+#define IOMMU_INT_PENDING(x)	( (x & IOMMU_STATUS_EVENT_LOG_INT_MASK) || \
+				  (x & IOMMU_STATUS_PPR_LOG_INT_MASK) )
+
  static void do_amd_iommu_irq(unsigned long data)
  {
      struct amd_iommu *iommu;
+    u32 status, entry;
+    unsigned long flags;
  
      if ( !iommu_found() )
      {
@@ -722,33 +705,47 @@ static void do_amd_iommu_irq(unsigned long data)
       * tasklet (instead of one per each IOMMUs).
       */
      for_each_amd_iommu ( iommu ) {
-        iommu_check_event_log(iommu);
+        /* Get the IOMMU status register */
+        spin_lock_irqsave(&iommu->lock, flags);
+        status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        spin_unlock_irqrestore(&iommu->lock, flags);
+
+        while ( IOMMU_INT_PENDING(status) )
+        {
+            entry = 0;
+
+            if ( status & IOMMU_STATUS_EVENT_LOG_INT_MASK )
+            {
+                iommu_check_event_log(iommu);
+                iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
+	    }
  
-        if ( iommu->ppr_log.buffer != NULL )
-            iommu_check_ppr_log(iommu);
+            if ( (iommu->ppr_log.buffer != NULL)
+            && (status & IOMMU_STATUS_PPR_LOG_INT_MASK) )
+            {
+                iommu_check_ppr_log(iommu);
+                iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
+            }
+
+            spin_lock_irqsave(&iommu->lock, flags);
+
+            /* RW1C interrupt status bit */
+            writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
+            /*
+             * Workaround for erratum787:
+             * Re-check to make sure the bit has been cleared.
+             */
+            status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
+            spin_unlock_irqrestore(&iommu->lock, flags);
+        }
      }
  }
  
  static void iommu_interrupt_handler(int irq, void *dev_id,
                                      struct cpu_user_regs *regs)
  {
-    u32 entry;
-    unsigned long flags;
-    struct amd_iommu *iommu = dev_id;
-
-    spin_lock_irqsave(&iommu->lock, flags);
-
-    /*
-     * Silence interrupts from both event and PPR by clearing the
-     * enable logging bits in the control register
-     */
-    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
-
-    spin_unlock_irqrestore(&iommu->lock, flags);
-
      /* It is the tasklet that will clear the logs and re-enable interrupts */
      tasklet_schedule(&amd_iommu_irq_tasklet);
  }

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 16:31                 ` Tim Deegan
  2013-06-10 23:13                   ` Suravee Suthikulanit
@ 2013-06-11  6:40                   ` Jan Beulich
  2013-06-11  8:53                     ` Tim Deegan
  1 sibling, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-11  6:40 UTC (permalink / raw)
  To: Tim Deegan; +Cc: suravee.suthikulpanit, xen-devel

>>> On 10.06.13 at 18:31, Tim Deegan <tim@xen.org> wrote:
> At 16:03 +0100 on 10 Jun (1370880211), Jan Beulich wrote:
> From my reading of the IOMMU spec the pending bit gets set whether the
> enable bit is set or not:
> 
>   PPRInt: peripheral page service request interrupt. Revision 1:
>   RO. Reset 0b. Reserved.  Revision 2: RW1C. Reset 0b. 1=PPR request
>   entry written to the PPR log by the IOMMU. 0=No PPR entry written to
>   the PPR log by the IOMMU. An interrupt is generated when PPRInt=1b and
>   MMIO Offset 0018h[PPRIntEn]=1b.
> 
> and
> 
>   EventLogInt: event log interrupt. RW1C. Reset 0b. 1=Event entry
>   written to the event log by the IOMMU. 0=No event entry written to the
>   event log by the IOMMU. An interrupt is generated when EventLogInt=1b
>   and MMIO Offset 0018h[EventIntEn]=1b.
> 
> so we should be OK without a second pass if the pending bit is still
> clear after unmasking the interrupt.

Do we want to rely on this? I don't think so, as the names of the
bits suggest otherwise. I'll send v5 in a minute, and I think this will
work for both possible cases.

>> So with this done I now realized that all of these transformations
>> don't really belong in this erratum workaround patch.
> 
> Agreed.  I think this reshuffle to avoid lost entries should be its own
> patch, and the erratum 787 one should follow it -- unless the logic we
> end up with handles erratum 787 as a side-effect. :)

Actually merging it into patch 1 seemed the more natural thing in
the end, as some of the effects of the patch before this re-shuffle
really require these further adjustments to be done at once.

>> You'll note that even here we're having a loop again, which you
>> presumably won't like much.
> 
> Well, this time the event handling is inside the loop, so it ought to
> catch and disable bad passthrough devices.  I'd still prefer having the
> tasklet schedule itself and terminate, but I'm happy to defer to your
> taste.

I came to the same conclusion - it'll re-schedule the tasklet now
both for the interrupt re-enabling case as well as (in patch 2) for
the interrupt-bit-still-or-again-set one.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-10 23:13                   ` Suravee Suthikulanit
@ 2013-06-11  6:45                     ` Jan Beulich
  0 siblings, 0 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-11  6:45 UTC (permalink / raw)
  To: Suravee Suthikulanit, Tim Deegan; +Cc: xen-devel

>>> On 11.06.13 at 01:13, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote:
> We should also check if the EventLogInt and PPRInt bits are set before 
> actually going into the log processing code.

No, I'd prefer to keep that aspect as is, again to be one the safe
side.

> Also, I agree with Jan that we should not need
> to disable the Event log and the PPR log in the IOMMU control register.
> This could be handled simply through the status register.

I also didn't switch that aspect, keeping in mind that your
documentation says otherwise (and, however minor, that the virtual
IOMMU emulation code is implemented with the opposite behavior).
Safest will be to not depend on questionable aspects.

> Also, I think we can further simplify the logic for the workaround by having 
> only
> one loop instead of two.  Here is the newly proposed changes for the patch.  
> However,
> I am still not sure if we should reschedule the tasklet instead of just 
> using the while loop here.

It's re-scheduling the tasklet for all recovery purposes now. Please
take a look at v5 (to be sent right after this mail).

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-10 12:41     ` [PATCH 1/2 v4] " Jan Beulich
  2013-06-10 12:46       ` Tim Deegan
  2013-06-10 13:49       ` George Dunlap
@ 2013-06-11  6:47       ` Jan Beulich
  2013-06-11 23:03         ` Suravee Suthikulanit
  2 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-11  6:47 UTC (permalink / raw)
  To: xen-devel; +Cc: Tim Deegan, suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 11755 bytes --]

The IOMMU interrupt bits in the IOMMU status registers are
"read-only, and write-1-to-clear (RW1C).  Therefore, the existing
logic which reads the register, set the bit, and then writing back
the values could accidentally clear certain bits if it has been set.

The correct logic would just be writing only the value which only
set the interrupt bits, and leave the rest to zeros.

This patch also, clean up #define masks as Jan has suggested.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

With iommu_interrupt_handler() properly having got switched its readl()
from status to control register, the subsequent writel() needed to be
switched too (and the RW1C comment there was bogus).

Some of the cleanup went too far - undone.

Further, with iommu_interrupt_handler() now actually disabling the
interrupt sources, they also need to get re-enabled by the tasklet once
it finished processing the respective log. This also implies re-running
the tasklet so that log entries added between reading the log and re-
enabling the interrupt will get handled in a timely manner.

Finally, guest write emulation to the status register needs to be done
with the RW1C (and RO for all other bits) semantics in mind too.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: Move most of what accumulated in patch 2 here. Re-schedule the
    tasklet when re-enabling a previously disabled interrupt source.

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -75,11 +75,9 @@ static void flush_command_buffer(struct 
     u32 cmd[4], status;
     int loop_count, comp_wait;
 
-    /* clear 'ComWaitInt' in status register (WIC) */
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                         IOMMU_STATUS_COMP_WAIT_INT_MASK,
-                         IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status);
-    writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C 'ComWaitInt' in status register */
+    writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /* send an empty COMPLETION_WAIT command to flush command buffer */
     cmd[3] = cmd[2] = 0;
@@ -103,9 +101,9 @@ static void flush_command_buffer(struct 
 
     if ( comp_wait )
     {
-        /* clear 'ComWaitInt' in status register (WIC) */
-        status &= IOMMU_STATUS_COMP_WAIT_INT_MASK;
-        writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* RW1C 'ComWaitInt' in status register */
+        writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
         return;
     }
     AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str
         u64_to_reg(&iommu->ppr_log.reg_tail, val);
         break;
     case IOMMU_STATUS_MMIO_OFFSET:
-        u64_to_reg(&iommu->reg_status, val);
+        val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK |
+               IOMMU_STATUS_EVENT_LOG_INT_MASK |
+               IOMMU_STATUS_COMP_WAIT_INT_MASK |
+               IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_PPR_LOG_INT_MASK |
+               IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_GAPIC_LOG_INT_MASK;
+        u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) & ~val);
         break;
 
     default:
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st
         writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET);
 
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
     else
     {
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
 
@@ -410,7 +410,7 @@ static void iommu_reset_log(struct amd_i
                             void (*ctrl_func)(struct amd_iommu *iommu, int))
 {
     u32 entry;
-    int log_run, run_bit, of_bit;
+    int log_run, run_bit;
     int loop_count = 1000;
 
     BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log)));
@@ -419,10 +419,6 @@ static void iommu_reset_log(struct amd_i
         IOMMU_STATUS_EVENT_LOG_RUN_SHIFT :
         IOMMU_STATUS_PPR_LOG_RUN_SHIFT;
 
-    of_bit = ( log == &iommu->event_log ) ?
-        IOMMU_STATUS_EVENT_OVERFLOW_SHIFT :
-        IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT;
-
     /* wait until EventLogRun bit = 0 */
     do {
         entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
@@ -439,9 +435,10 @@ static void iommu_reset_log(struct amd_i
 
     ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
 
-    /*clear overflow bit */
-    iommu_clear_bit(&entry, of_bit);
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C overflow bit */
+    writel(log == &iommu->event_log ? IOMMU_STATUS_EVENT_OVERFLOW_MASK
+                                    : IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /*reset event log base address */
     log->head = 0;
@@ -611,22 +608,33 @@ static void iommu_check_event_log(struct
     u32 entry;
     unsigned long flags;
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     iommu_read_log(iommu, &iommu->event_log,
                    sizeof(event_entry_t), parse_event_log_entry);
 
     spin_lock_irqsave(&iommu->lock, flags);
     
-    /*check event overflow */
+    /* Check event overflow. */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
+        {
+            entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
+            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+            /*
+             * Re-schedule the tasklet to handle eventual log entries added
+             * between reading the log above and re-enabling the interrupt.
+             */
+            tasklet_schedule(&amd_iommu_irq_tasklet);
+        }
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -681,22 +689,33 @@ static void iommu_check_ppr_log(struct a
     u32 entry;
     unsigned long flags;
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     iommu_read_log(iommu, &iommu->ppr_log,
                    sizeof(ppr_entry_t), parse_ppr_log_entry);
     
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /*check event overflow */
+    /* Check event overflow. */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        if ( !(entry & IOMMU_CONTROL_PPR_LOG_INT_MASK) )
+        {
+            entry |= IOMMU_CONTROL_PPR_LOG_INT_MASK;
+            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+            /*
+             * Re-schedule the tasklet to handle eventual log entries added
+             * between reading the log above and re-enabling the interrupt.
+             */
+            tasklet_schedule(&amd_iommu_irq_tasklet);
+        }
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -733,11 +752,14 @@ static void iommu_interrupt_handler(int 
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /* Silence interrupts from both event and PPR logging */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
+    /*
+     * Silence interrupts from both event and PPR by clearing the
+     * enable logging bits in the control register
+     */
+    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -336,14 +336,17 @@
 #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_MASK		0x00002000
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT		13
+#define IOMMU_CONTROL_PPR_LOG_INT_MASK			0x00004000
+#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT			14
+#define IOMMU_CONTROL_PPR_ENABLE_MASK			0x00008000
+#define IOMMU_CONTROL_PPR_ENABLE_SHIFT			15
+#define IOMMU_CONTROL_GT_ENABLE_MASK			0x00010000
+#define IOMMU_CONTROL_GT_ENABLE_SHIFT			16
 #define IOMMU_CONTROL_RESTART_MASK			0x80000000
 #define IOMMU_CONTROL_RESTART_SHIFT			31
 
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT      13
-#define IOMMU_CONTROL_PPR_INT_SHIFT             14
-#define IOMMU_CONTROL_PPR_ENABLE_SHIFT          15
-#define IOMMU_CONTROL_GT_ENABLE_SHIFT           16
-
 /* Exclusion Register */
 #define IOMMU_EXCLUSION_BASE_LOW_OFFSET		0x20
 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET	0x24
@@ -395,9 +398,18 @@
 #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT	3
 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK	0x00000010
 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT	4
+#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK      0x00000020
 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT     5
+#define IOMMU_STATUS_PPR_LOG_INT_MASK           0x00000040
 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT          6
+#define IOMMU_STATUS_PPR_LOG_RUN_MASK           0x00000080
 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT          7
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK    0x00000100
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT   8
+#define IOMMU_STATUS_GAPIC_LOG_INT_MASK         0x00000200
+#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT        9
+#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK         0x00000400
+#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT        10
 
 /* I/O Page Table */
 #define IOMMU_PAGE_TABLE_ENTRY_SIZE	8



[-- Attachment #2: AMD-IOMMU-correct-rw1c-handling --]
[-- Type: application/octet-stream, Size: 11531 bytes --]

iommu/amd: Fix logic for clearing the IOMMU interrupt bits

The IOMMU interrupt bits in the IOMMU status registers are
"read-only, and write-1-to-clear (RW1C).  Therefore, the existing
logic which reads the register, set the bit, and then writing back
the values could accidentally clear certain bits if it has been set.

The correct logic would just be writing only the value which only
set the interrupt bits, and leave the rest to zeros.

This patch also, clean up #define masks as Jan has suggested.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

With iommu_interrupt_handler() properly having got switched its readl()
from status to control register, the subsequent writel() needed to be
switched too (and the RW1C comment there was bogus).

Some of the cleanup went too far - undone.

Further, with iommu_interrupt_handler() now actually disabling the
interrupt sources, they also need to get re-enabled by the tasklet once
it finished processing the respective log. This also implies re-running
the tasklet so that log entries added between reading the log and re-
enabling the interrupt will get handled in a timely manner.

Finally, guest write emulation to the status register needs to be done
with the RW1C (and RO for all other bits) semantics in mind too.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: Move most of what accumulated in patch 2 here. Re-schedule the
    tasklet when re-enabling a previously disabled interrupt source.

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -75,11 +75,9 @@ static void flush_command_buffer(struct 
     u32 cmd[4], status;
     int loop_count, comp_wait;
 
-    /* clear 'ComWaitInt' in status register (WIC) */
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
-                         IOMMU_STATUS_COMP_WAIT_INT_MASK,
-                         IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status);
-    writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C 'ComWaitInt' in status register */
+    writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /* send an empty COMPLETION_WAIT command to flush command buffer */
     cmd[3] = cmd[2] = 0;
@@ -103,9 +101,9 @@ static void flush_command_buffer(struct 
 
     if ( comp_wait )
     {
-        /* clear 'ComWaitInt' in status register (WIC) */
-        status &= IOMMU_STATUS_COMP_WAIT_INT_MASK;
-        writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+        /* RW1C 'ComWaitInt' in status register */
+        writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
+               iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
         return;
     }
     AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str
         u64_to_reg(&iommu->ppr_log.reg_tail, val);
         break;
     case IOMMU_STATUS_MMIO_OFFSET:
-        u64_to_reg(&iommu->reg_status, val);
+        val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK |
+               IOMMU_STATUS_EVENT_LOG_INT_MASK |
+               IOMMU_STATUS_COMP_WAIT_INT_MASK |
+               IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_PPR_LOG_INT_MASK |
+               IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK |
+               IOMMU_STATUS_GAPIC_LOG_INT_MASK;
+        u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) & ~val);
         break;
 
     default:
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st
         writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET);
 
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
     else
     {
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT);
-        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT);
+        iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
         iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT);
     }
 
@@ -410,7 +410,7 @@ static void iommu_reset_log(struct amd_i
                             void (*ctrl_func)(struct amd_iommu *iommu, int))
 {
     u32 entry;
-    int log_run, run_bit, of_bit;
+    int log_run, run_bit;
     int loop_count = 1000;
 
     BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log)));
@@ -419,10 +419,6 @@ static void iommu_reset_log(struct amd_i
         IOMMU_STATUS_EVENT_LOG_RUN_SHIFT :
         IOMMU_STATUS_PPR_LOG_RUN_SHIFT;
 
-    of_bit = ( log == &iommu->event_log ) ?
-        IOMMU_STATUS_EVENT_OVERFLOW_SHIFT :
-        IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT;
-
     /* wait until EventLogRun bit = 0 */
     do {
         entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
@@ -439,9 +435,10 @@ static void iommu_reset_log(struct amd_i
 
     ctrl_func(iommu, IOMMU_CONTROL_DISABLED);
 
-    /*clear overflow bit */
-    iommu_clear_bit(&entry, of_bit);
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    /* RW1C overflow bit */
+    writel(log == &iommu->event_log ? IOMMU_STATUS_EVENT_OVERFLOW_MASK
+                                    : IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
 
     /*reset event log base address */
     log->head = 0;
@@ -611,22 +608,33 @@ static void iommu_check_event_log(struct
     u32 entry;
     unsigned long flags;
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     iommu_read_log(iommu, &iommu->event_log,
                    sizeof(event_entry_t), parse_event_log_entry);
 
     spin_lock_irqsave(&iommu->lock, flags);
     
-    /*check event overflow */
+    /* Check event overflow. */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
     if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
+        {
+            entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
+            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+            /*
+             * Re-schedule the tasklet to handle eventual log entries added
+             * between reading the log above and re-enabling the interrupt.
+             */
+            tasklet_schedule(&amd_iommu_irq_tasklet);
+        }
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -681,22 +689,33 @@ static void iommu_check_ppr_log(struct a
     u32 entry;
     unsigned long flags;
 
+    /* RW1C interrupt status bit */
+    writel(IOMMU_STATUS_PPR_LOG_INT_MASK,
+           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+
     iommu_read_log(iommu, &iommu->ppr_log,
                    sizeof(ppr_entry_t), parse_ppr_log_entry);
     
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /*check event overflow */
+    /* Check event overflow. */
     entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-
     if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) )
         iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control);
-
-    /* reset interrupt status bit */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-
-    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    else
+    {
+        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+        if ( !(entry & IOMMU_CONTROL_PPR_LOG_INT_MASK) )
+        {
+            entry |= IOMMU_CONTROL_PPR_LOG_INT_MASK;
+            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+            /*
+             * Re-schedule the tasklet to handle eventual log entries added
+             * between reading the log above and re-enabling the interrupt.
+             */
+            tasklet_schedule(&amd_iommu_irq_tasklet);
+        }
+    }
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -733,11 +752,14 @@ static void iommu_interrupt_handler(int 
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    /* Silence interrupts from both event and PPR logging */
-    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-    iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
-    iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT);
-    writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
+    /*
+     * Silence interrupts from both event and PPR by clearing the
+     * enable logging bits in the control register
+     */
+    entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT);
+    iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT);
+    writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
 
     spin_unlock_irqrestore(&iommu->lock, flags);
 
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -336,14 +336,17 @@
 #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT			11
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK	0x00001000
 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT	12
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_MASK		0x00002000
+#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT		13
+#define IOMMU_CONTROL_PPR_LOG_INT_MASK			0x00004000
+#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT			14
+#define IOMMU_CONTROL_PPR_ENABLE_MASK			0x00008000
+#define IOMMU_CONTROL_PPR_ENABLE_SHIFT			15
+#define IOMMU_CONTROL_GT_ENABLE_MASK			0x00010000
+#define IOMMU_CONTROL_GT_ENABLE_SHIFT			16
 #define IOMMU_CONTROL_RESTART_MASK			0x80000000
 #define IOMMU_CONTROL_RESTART_SHIFT			31
 
-#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT      13
-#define IOMMU_CONTROL_PPR_INT_SHIFT             14
-#define IOMMU_CONTROL_PPR_ENABLE_SHIFT          15
-#define IOMMU_CONTROL_GT_ENABLE_SHIFT           16
-
 /* Exclusion Register */
 #define IOMMU_EXCLUSION_BASE_LOW_OFFSET		0x20
 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET	0x24
@@ -395,9 +398,18 @@
 #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT	3
 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK	0x00000010
 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT	4
+#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK      0x00000020
 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT     5
+#define IOMMU_STATUS_PPR_LOG_INT_MASK           0x00000040
 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT          6
+#define IOMMU_STATUS_PPR_LOG_RUN_MASK           0x00000080
 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT          7
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK    0x00000100
+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT   8
+#define IOMMU_STATUS_GAPIC_LOG_INT_MASK         0x00000200
+#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT        9
+#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK         0x00000400
+#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT        10
 
 /* I/O Page Table */
 #define IOMMU_PAGE_TABLE_ENTRY_SIZE	8

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH 2/2 v5] iommu/amd: Workaround for erratum 787
  2013-06-10 10:59   ` [PATCH 2/2 v3] " Jan Beulich
@ 2013-06-11  6:47     ` Jan Beulich
  2013-06-17 18:57       ` Suravee Suthikulanit
  0 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-11  6:47 UTC (permalink / raw)
  To: xen-devel; +Cc: Tim Deegan, suravee.suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 2049 bytes --]

The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
and event log interrupt bits to re-enable the interrupt. This is done by
writing 1 to the memory mapped register to clear the bit. Due to hardware bug,
if the driver tries to clear this bit while the IOMMU hardware also setting
this bit, the conflict will result with the bit being set. If the interrupt
handling code does not make sure to clear this bit, subsequent changes in the
event/PPR logs will no longer generating interrupts, and would result if
buffer overflow. After clearing the bits, the driver must read back
the register to verify.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Adjust to apply on top of heavily modified patch 1. Adjust flow to get away
with a single readl() in each instance of the status register checks.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
v5: Moved most of what accumulated here into patch 1. Rather than looping,
    re-schedule the tasklet to work around the erratum.
    (skipped v4 to remain in sync with patch 1)

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -636,6 +636,14 @@ static void iommu_check_event_log(struct
         }
     }
 
+    /*
+     * Workaround for erratum787:
+     * Re-check to make sure the bit has been cleared.
+     */
+    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK )
+        tasklet_schedule(&amd_iommu_irq_tasklet);
+
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -717,6 +725,14 @@ static void iommu_check_ppr_log(struct a
         }
     }
 
+    /*
+     * Workaround for erratum787:
+     * Re-check to make sure the bit has been cleared.
+     */
+    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    if ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK )
+        tasklet_schedule(&amd_iommu_irq_tasklet);
+
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
 




[-- Attachment #2: AMD-IOMMU-erratum-787-workaround --]
[-- Type: application/octet-stream, Size: 2030 bytes --]

iommu/amd: Workaround for erratum 787

The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
and event log interrupt bits to re-enable the interrupt. This is done by
writing 1 to the memory mapped register to clear the bit. Due to hardware bug,
if the driver tries to clear this bit while the IOMMU hardware also setting
this bit, the conflict will result with the bit being set. If the interrupt
handling code does not make sure to clear this bit, subsequent changes in the
event/PPR logs will no longer generating interrupts, and would result if
buffer overflow. After clearing the bits, the driver must read back
the register to verify.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Adjust to apply on top of heavily modified patch 1. Adjust flow to get away
with a single readl() in each instance of the status register checks.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
v5: Moved most of what accumulated here into patch 1. Rather than looping,
    re-schedule the tasklet to work around the erratum.
    (skipped v4 to remain in sync with patch 1)

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -636,6 +636,14 @@ static void iommu_check_event_log(struct
         }
     }
 
+    /*
+     * Workaround for erratum787:
+     * Re-check to make sure the bit has been cleared.
+     */
+    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK )
+        tasklet_schedule(&amd_iommu_irq_tasklet);
+
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -717,6 +725,14 @@ static void iommu_check_ppr_log(struct a
         }
     }
 
+    /*
+     * Workaround for erratum787:
+     * Re-check to make sure the bit has been cleared.
+     */
+    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+    if ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK )
+        tasklet_schedule(&amd_iommu_irq_tasklet);
+
     spin_unlock_irqrestore(&iommu->lock, flags);
 }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
  2013-06-11  6:40                   ` Jan Beulich
@ 2013-06-11  8:53                     ` Tim Deegan
  0 siblings, 0 replies; 50+ messages in thread
From: Tim Deegan @ 2013-06-11  8:53 UTC (permalink / raw)
  To: Jan Beulich; +Cc: suravee.suthikulpanit, xen-devel

At 07:40 +0100 on 11 Jun (1370936413), Jan Beulich wrote:
> >>> On 10.06.13 at 18:31, Tim Deegan <tim@xen.org> wrote:
> > At 16:03 +0100 on 10 Jun (1370880211), Jan Beulich wrote:
> > From my reading of the IOMMU spec the pending bit gets set whether the
> > enable bit is set or not:
> > 
> >   PPRInt: peripheral page service request interrupt. Revision 1:
> >   RO. Reset 0b. Reserved.  Revision 2: RW1C. Reset 0b. 1=PPR request
> >   entry written to the PPR log by the IOMMU. 0=No PPR entry written to
> >   the PPR log by the IOMMU. An interrupt is generated when PPRInt=1b and
> >   MMIO Offset 0018h[PPRIntEn]=1b.
> > 
> > and
> > 
> >   EventLogInt: event log interrupt. RW1C. Reset 0b. 1=Event entry
> >   written to the event log by the IOMMU. 0=No event entry written to the
> >   event log by the IOMMU. An interrupt is generated when EventLogInt=1b
> >   and MMIO Offset 0018h[EventIntEn]=1b.
> > 
> > so we should be OK without a second pass if the pending bit is still
> > clear after unmasking the interrupt.
> 
> Do we want to rely on this? I don't think so, as the names of the
> bits suggest otherwise. I'll send v5 in a minute, and I think this will
> work for both possible cases.

I think it's pretty clear from the text that the bit gets set when an
entry is written, even if an interrupt doesn't get raised.  To me, that
means we can replace the mandatory re-run on interrupt enable with a
check of the status register (i.e. the check we'll do for erratum 787
anyway).

But if you're not comfortable with that, the v5 you just posted looks
correct to me, and (at least until we have restartable IOMMU faults)
that path won't be hot enough to worry about the efficiency.  So both v5
patches are Reviewed-by: Tim Deegan <tim@xen.org>.

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-11  6:47       ` [PATCH 1/2 v5] " Jan Beulich
@ 2013-06-11 23:03         ` Suravee Suthikulanit
  2013-06-12  6:24           ` Jan Beulich
  0 siblings, 1 reply; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-11 23:03 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, xen-devel

On 6/11/2013 1:47 AM, Jan Beulich wrote:
> @@ -611,22 +608,33 @@ static void iommu_check_event_log(struct
>       u32 entry;
>       unsigned long flags;
>   
> +    /* RW1C interrupt status bit */
> +    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
> +           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +
>       iommu_read_log(iommu, &iommu->event_log,
>                      sizeof(event_entry_t), parse_event_log_entry);
>   
>       spin_lock_irqsave(&iommu->lock, flags);
>       
> -    /*check event overflow */
> +    /* Check event overflow. */
>       entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> -
>       if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
>           iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control);
> -
> -    /* reset interrupt status bit */
> -    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> -    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
> -
> -    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    else
> +    {
> +        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
> +        if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
> +        {
> +            entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
> +            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
> +            /*
> +             * Re-schedule the tasklet to handle eventual log entries added
> +             * between reading the log above and re-enabling the interrupt.
> +             */
> +            tasklet_schedule(&amd_iommu_irq_tasklet);
> +        }
> +    }
If more entries are added to the event log during the time that event 
log interrupt is disabled (in the control register),
the IOMMU hardware will generate interrupt once the the interrupt enable 
bit in the control register changes from 0 to 1 and set the status 
register.  Since the "iommu_interrupt_handler" code is already calling 
"schedule_tasklet",  we should not need to "re-schedule" tasklet here.  
I have confirmed the hardware behavior described with the hardware 
designer.  This is also the same on the PPR log.

Suravee

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-11 23:03         ` Suravee Suthikulanit
@ 2013-06-12  6:24           ` Jan Beulich
  2013-06-12 22:37             ` Suravee Suthikulpanit
  2013-06-17 18:59             ` [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits Suravee Suthikulanit
  0 siblings, 2 replies; 50+ messages in thread
From: Jan Beulich @ 2013-06-12  6:24 UTC (permalink / raw)
  To: Suravee Suthikulanit; +Cc: Tim Deegan, xen-devel

>>> On 12.06.13 at 01:03, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
wrote:
> On 6/11/2013 1:47 AM, Jan Beulich wrote:
>> @@ -611,22 +608,33 @@ static void iommu_check_event_log(struct
>>       u32 entry;
>>       unsigned long flags;
>>   
>> +    /* RW1C interrupt status bit */
>> +    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
>> +           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>> +
>>       iommu_read_log(iommu, &iommu->event_log,
>>                      sizeof(event_entry_t), parse_event_log_entry);
>>   
>>       spin_lock_irqsave(&iommu->lock, flags);
>>       
>> -    /*check event overflow */
>> +    /* Check event overflow. */
>>       entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>> -
>>       if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
>>           iommu_reset_log(iommu, &iommu->event_log, 
> set_iommu_event_log_control);
>> -
>> -    /* reset interrupt status bit */
>> -    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>> -    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
>> -
>> -    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>> +    else
>> +    {
>> +        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
>> +        if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
>> +        {
>> +            entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
>> +            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
>> +            /*
>> +             * Re-schedule the tasklet to handle eventual log entries added
>> +             * between reading the log above and re-enabling the interrupt.
>> +             */
>> +            tasklet_schedule(&amd_iommu_irq_tasklet);
>> +        }
>> +    }
> If more entries are added to the event log during the time that event 
> log interrupt is disabled (in the control register),
> the IOMMU hardware will generate interrupt once the the interrupt enable 
> bit in the control register changes from 0 to 1 and set the status 
> register.  Since the "iommu_interrupt_handler" code is already calling 
> "schedule_tasklet",  we should not need to "re-schedule" tasklet here.  
> I have confirmed the hardware behavior described with the hardware 
> designer.  This is also the same on the PPR log.

And also the same between v1 and v2 hardware? Again, I'd like to
be on the safe side, and rather do a reschedule too much than one
too little. And in any case, having your documentation made more
precise in these respects would be much appreciated.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-12  6:24           ` Jan Beulich
@ 2013-06-12 22:37             ` Suravee Suthikulpanit
  2013-06-13  1:44               ` Suravee Suthikulpanit
  2013-06-17 18:59             ` [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits Suravee Suthikulanit
  1 sibling, 1 reply; 50+ messages in thread
From: Suravee Suthikulpanit @ 2013-06-12 22:37 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Hurwitz, Sherry, Tim Deegan, xen-devel

On 6/12/2013 1:24 AM, Jan Beulich wrote:
>> If more entries are added to the event log during the time that event
>> log interrupt is disabled (in the control register),
>> the IOMMU hardware will generate interrupt once the the interrupt enable
>> bit in the control register changes from 0 to 1 and set the status
>> register.  Since the "iommu_interrupt_handler" code is already calling
>> "schedule_tasklet",  we should not need to "re-schedule" tasklet here.
>> I have confirmed the hardware behavior described with the hardware
>> designer.  This is also the same on the PPR log.
> And also the same between v1 and v2 hardware? Again, I'd like to
> be on the safe side, and rather do a reschedule too much than one
> too little. And in any case, having your documentation made more
> precise in these respects would be much appreciated.
>
> Jan
>
>
Understand.  I apologize if the AMD IOMMU specification does not 
describe the behavior quite clearly.  Let me know if I could help 
clarifing any issues with the hardware designers.

Since we are modifying the IOMMU interrupt enabling/disabling, I have 
been doing some more testing on the IOMMU interrupt handling. I found 
that IOMMU MSI interrupt is currently broken, but I think this is 
because of some older changes.  I am still tracking down the issue, and 
will update my findings.

Thank you,

Suravee

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-12 22:37             ` Suravee Suthikulpanit
@ 2013-06-13  1:44               ` Suravee Suthikulpanit
  2013-06-13  7:54                 ` Jan Beulich
                                   ` (2 more replies)
  0 siblings, 3 replies; 50+ messages in thread
From: Suravee Suthikulpanit @ 2013-06-13  1:44 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Shin, Jacob, xen-devel, Tim Deegan, Hurwitz, Sherry

On 6/12/2013 5:37 PM, Suravee Suthikulpanit wrote:
> On 6/12/2013 1:24 AM, Jan Beulich wrote:
>>> If more entries are added to the event log during the time that event
>>> log interrupt is disabled (in the control register),
>>> the IOMMU hardware will generate interrupt once the the interrupt 
>>> enable
>>> bit in the control register changes from 0 to 1 and set the status
>>> register.  Since the "iommu_interrupt_handler" code is already calling
>>> "schedule_tasklet",  we should not need to "re-schedule" tasklet here.
>>> I have confirmed the hardware behavior described with the hardware
>>> designer.  This is also the same on the PPR log.
>> And also the same between v1 and v2 hardware? Again, I'd like to
>> be on the safe side, and rather do a reschedule too much than one
>> too little. And in any case, having your documentation made more
>> precise in these respects would be much appreciated.
>>
>> Jan
>>
>>
> Understand.  I apologize if the AMD IOMMU specification does not 
> describe the behavior quite clearly.  Let me know if I could help 
> clarifing any issues with the hardware designers.
>
> Since we are modifying the IOMMU interrupt enabling/disabling, I have 
> been doing some more testing on the IOMMU interrupt handling. I found 
> that IOMMU MSI interrupt is currently broken, but I think this is 
> because of some older changes.  I am still tracking down the issue, 
> and will update my findings.
>
> Thank you,
>
> Suravee

The following commit broke the IOMMU MSI interrupt:

2012-11-28    899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: 
include IOMMU interrupt information in 'M' debug key output
(http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b50a981c551eeec49e6)

This patch also need the following patch to resolve kernel panic:

c759fee45bf44f2947a3480d54c03ff7e028c39e AMD IOMMU: add locking missing 
from c/s 26198:ba90ecb0231f
(http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=c759fee45bf44f2947a3480d54c03ff7e028c39e)

I'll update once I root cause the issue.

Suravee

>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-13  1:44               ` Suravee Suthikulpanit
@ 2013-06-13  7:54                 ` Jan Beulich
  2013-06-13 13:48                   ` Suravee Suthikulpanit
  2013-06-13 14:20                 ` George Dunlap
  2013-06-13 15:58                 ` Jan Beulich
  2 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-13  7:54 UTC (permalink / raw)
  To: Suravee Suthikulpanit, George Dunlap
  Cc: Tim Deegan, xen-devel, Jacob Shin, Sherry Hurwitz

>>> On 13.06.13 at 03:44, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote:
> On 6/12/2013 5:37 PM, Suravee Suthikulpanit wrote:
>> Since we are modifying the IOMMU interrupt enabling/disabling, I have 
>> been doing some more testing on the IOMMU interrupt handling. I found 
>> that IOMMU MSI interrupt is currently broken, but I think this is 
>> because of some older changes.  I am still tracking down the issue, 
>> and will update my findings.
> 
> The following commit broke the IOMMU MSI interrupt:
> 
> 2012-11-28    899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: 
> include IOMMU interrupt information in 'M' debug key output
> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b5 
> 0a981c551eeec49e6)
> 
> This patch also need the following patch to resolve kernel panic:
> 
> c759fee45bf44f2947a3480d54c03ff7e028c39e AMD IOMMU: add locking missing 
> from c/s 26198:ba90ecb0231f
> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=c759fee45bf44f2947a3480 
> d54c03ff7e028c39e)
> 
> I'll update once I root cause the issue.

I'll also take a look, but it would help to know in what way it is
broken: Some sort of crash, no interrupt delivered, ... This surely
needs to be resolved before 4.3 can go out - George, can you put
this on your bug list, please?

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-13  7:54                 ` Jan Beulich
@ 2013-06-13 13:48                   ` Suravee Suthikulpanit
  0 siblings, 0 replies; 50+ messages in thread
From: Suravee Suthikulpanit @ 2013-06-13 13:48 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George Dunlap, Tim Deegan, xen-devel, Jacob Shin, Sherry Hurwitz


[-- Attachment #1.1: Type: text/plain, Size: 1866 bytes --]

On 6/13/2013 2:54 AM, Jan Beulich wrote:
>>>> On 13.06.13 at 03:44, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote:
>> On 6/12/2013 5:37 PM, Suravee Suthikulpanit wrote:
>>> Since we are modifying the IOMMU interrupt enabling/disabling, I have
>>> been doing some more testing on the IOMMU interrupt handling. I found
>>> that IOMMU MSI interrupt is currently broken, but I think this is
>>> because of some older changes.  I am still tracking down the issue,
>>> and will update my findings.
>> The following commit broke the IOMMU MSI interrupt:
>>
>> 2012-11-28    899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU:
>> include IOMMU interrupt information in 'M' debug key output
>> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b5
>> 0a981c551eeec49e6)
>>
>> This patch also need the following patch to resolve kernel panic:
>>
>> c759fee45bf44f2947a3480d54c03ff7e028c39e AMD IOMMU: add locking missing
>> from c/s 26198:ba90ecb0231f
>> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=c759fee45bf44f2947a3480
>> d54c03ff7e028c39e)
>>
>> I'll update once I root cause the issue.
> I'll also take a look, but it would help to know in what way it is
> broken: Some sort of crash, no interrupt delivered, ... This surely
> needs to be resolved before 4.3 can go out - George, can you put
> this on your bug list, please?
>
> Jan
>
BasicallyI amnot seeing any  interrupts coming in from the IOMMU.   I was testing it by setting the
ComWaitIntEn bit of the IOMMU control register to let the IOMMU hardware generate MSI interrupts for
the COMPLETION_WAIT command.  I am not seeing any interrupts (i.e. The interrupt handler and
tasklet handler function is never executed).  I have double checked this in the latest Xen-4.2.x, and they
are working fine.  The same MSI interrupt is also used for PPR and Event Log.

Suravee


[-- Attachment #1.2: Type: text/html, Size: 3099 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-13  1:44               ` Suravee Suthikulpanit
  2013-06-13  7:54                 ` Jan Beulich
@ 2013-06-13 14:20                 ` George Dunlap
  2013-06-13 14:30                   ` Processed: " xen
  2013-06-13 15:58                 ` Jan Beulich
  2 siblings, 1 reply; 50+ messages in thread
From: George Dunlap @ 2013-06-13 14:20 UTC (permalink / raw)
  To: Suravee Suthikulpanit
  Cc: Tim Deegan, Hurwitz, Sherry, Shin, Jacob, Jan Beulich, xen-devel

create ^
title MSI interrupts broken with IOMMU after c/s 899110e
thanks

On Thu, Jun 13, 2013 at 2:44 AM, Suravee Suthikulpanit
<suravee.suthikulpanit@amd.com> wrote:
> On 6/12/2013 5:37 PM, Suravee Suthikulpanit wrote:
>>
>> On 6/12/2013 1:24 AM, Jan Beulich wrote:
>>>>
>>>> If more entries are added to the event log during the time that event
>>>> log interrupt is disabled (in the control register),
>>>> the IOMMU hardware will generate interrupt once the the interrupt enable
>>>> bit in the control register changes from 0 to 1 and set the status
>>>> register.  Since the "iommu_interrupt_handler" code is already calling
>>>> "schedule_tasklet",  we should not need to "re-schedule" tasklet here.
>>>> I have confirmed the hardware behavior described with the hardware
>>>> designer.  This is also the same on the PPR log.
>>>
>>> And also the same between v1 and v2 hardware? Again, I'd like to
>>> be on the safe side, and rather do a reschedule too much than one
>>> too little. And in any case, having your documentation made more
>>> precise in these respects would be much appreciated.
>>>
>>> Jan
>>>
>>>
>> Understand.  I apologize if the AMD IOMMU specification does not describe
>> the behavior quite clearly.  Let me know if I could help clarifing any
>> issues with the hardware designers.
>>
>> Since we are modifying the IOMMU interrupt enabling/disabling, I have been
>> doing some more testing on the IOMMU interrupt handling. I found that IOMMU
>> MSI interrupt is currently broken, but I think this is because of some older
>> changes.  I am still tracking down the issue, and will update my findings.
>>
>> Thank you,
>>
>> Suravee
>
>
> The following commit broke the IOMMU MSI interrupt:
>
> 2012-11-28    899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: include
> IOMMU interrupt information in 'M' debug key output
> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b50a981c551eeec49e6)
>
> This patch also need the following patch to resolve kernel panic:
>
> c759fee45bf44f2947a3480d54c03ff7e028c39e AMD IOMMU: add locking missing from
> c/s 26198:ba90ecb0231f
> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=c759fee45bf44f2947a3480d54c03ff7e028c39e)
>
> I'll update once I root cause the issue.
>
>
> Suravee
>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> http://lists.xen.org/xen-devel
>>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Processed: Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-13 14:20                 ` George Dunlap
@ 2013-06-13 14:30                   ` xen
  0 siblings, 0 replies; 50+ messages in thread
From: xen @ 2013-06-13 14:30 UTC (permalink / raw)
  To: George Dunlap, xen-devel

Processing commands for xen@bugs.xenproject.org:

> create ^
Command failed: Addresses: dunlapg@umich.edu are not permitted to use `create' at /srv/xen-devel-bugs/lib/emesinae/control.pl line 171, <M> line 48.
Stop processing here.

---
Xen Hypervisor Bug Tracker
See http://wiki.xen.org/wiki/Reporting_Bugs_against_Xen for information on reporting bugs
Contact xen-bugs-owner@bugs.xenproject.org with any infrastructure issues

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-13  1:44               ` Suravee Suthikulpanit
  2013-06-13  7:54                 ` Jan Beulich
  2013-06-13 14:20                 ` George Dunlap
@ 2013-06-13 15:58                 ` Jan Beulich
  2013-06-13 16:34                   ` Suravee Suthikulanit
  2 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-13 15:58 UTC (permalink / raw)
  To: Suravee Suthikulpanit; +Cc: Tim Deegan, xen-devel, Jacob Shin, Sherry Hurwitz

>>> On 13.06.13 at 03:44, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote:
> The following commit broke the IOMMU MSI interrupt:
> 
> 2012-11-28    899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: 
> include IOMMU interrupt information in 'M' debug key output
> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b5 
> 0a981c551eeec49e6)

Having gone over the changes again, this still looks pretty innocent/
mechanical to me - I can't see what may have got broken.
Considering that this is the change adding respective information to
'M' output - what does 'M' show for the IOMMU entry/entries?

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-13 15:58                 ` Jan Beulich
@ 2013-06-13 16:34                   ` Suravee Suthikulanit
  2013-06-14  6:27                     ` Jan Beulich
  0 siblings, 1 reply; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-13 16:34 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, xen-devel, Jacob Shin, Sherry Hurwitz

On 6/13/2013 10:58 AM, Jan Beulich wrote:
>>>> On 13.06.13 at 03:44, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote:
>> The following commit broke the IOMMU MSI interrupt:
>>
>> 2012-11-28    899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU:
>> include IOMMU interrupt information in 'M' debug key output
>> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b5
>> 0a981c551eeec49e6)
> Having gone over the changes again, this still looks pretty innocent/
> mechanical to me - I can't see what may have got broken.
> Considering that this is the change adding respective information to
> 'M' output - what does 'M' show for the IOMMU entry/entries?
>
> Jan
>
>
Basically, the only different is this line that only appears in the "Bad" version.

     (XEN)  MSI     56 vec=28  fixed  edge deassert phys    cpu dest=00000001 mask=0/0/1

"xl debug-key i" also show the following information

     (XEN)    IRQ:  56 affinity:1 vec:28 type=AMD-IOMMU-MSI   status=00000000 mapped, unbound

Not sure what "status=0" means.
   
Before: (Good)
(XEN) MSI information:
(XEN)  MSI     57 vec=c0 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     58 vec=c8 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     59 vec=d0 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     60 vec=d8 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     61 vec=29 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI-X   62 vec=31 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   63 vec=39 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   64 vec=41 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   65 vec=49 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   66 vec=51 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   67 vec=59 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI     68 vec=69 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI-X   69 vec=79 lowest  edge   assert  log lowest dest=00000003 mask=1/1/1
(XEN)  MSI-X   70 vec=81 lowest  edge   assert  log lowest dest=00000003 mask=1/1/1
(XEN)  MSI-X   71 vec=89 lowest  edge   assert  log lowest dest=00000003 mask=1/1/1
(XEN)  MSI-X   72 vec=99 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   73 vec=a1 lowest  edge   assert  log lowest dest=00000002 mask=1/0/0
(XEN)  MSI-X   74 vec=a9 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI     75 vec=b9 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     76 vec=c1 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1


After: (Bad)
(XEN) MSI information:
(XEN)  MSI     56 vec=28  fixed  edge deassert phys    cpu dest=00000001 mask=0/0/1
(XEN)  MSI     57 vec=c0 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     58 vec=c8 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     59 vec=d0 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     60 vec=d8 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     61 vec=29 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI-X   62 vec=31 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   63 vec=39 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   64 vec=41 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   65 vec=49 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   66 vec=51 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   67 vec=59 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI     68 vec=71 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI-X   69 vec=79 lowest  edge   assert  log lowest dest=00000003 mask=1/1/1
(XEN)  MSI-X   70 vec=81 lowest  edge   assert  log lowest dest=00000003 mask=1/1/1
(XEN)  MSI-X   71 vec=89 lowest  edge   assert  log lowest dest=00000003 mask=1/1/1
(XEN)  MSI-X   72 vec=99 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI-X   73 vec=a1 lowest  edge   assert  log lowest dest=00000002 mask=1/0/0
(XEN)  MSI-X   74 vec=a9 lowest  edge   assert  log lowest dest=00000001 mask=1/0/0
(XEN)  MSI     75 vec=b9 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1
(XEN)  MSI     76 vec=c1 lowest  edge   assert  log lowest dest=00000001 mask=0/1/1

Suravee

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-13 16:34                   ` Suravee Suthikulanit
@ 2013-06-14  6:27                     ` Jan Beulich
  2013-06-14  6:40                       ` Jan Beulich
  0 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-14  6:27 UTC (permalink / raw)
  To: Suravee Suthikulanit; +Cc: Tim Deegan, xen-devel, Jacob Shin, Sherry Hurwitz

>>> On 13.06.13 at 18:34, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote:
> Basically, the only different is this line that only appears in the "Bad" 
> version.
> 
>      (XEN)  MSI     56 vec=28  fixed  edge deassert phys    cpu dest=00000001 mask=0/0/1
> 
> "xl debug-key i" also show the following information
> 
>      (XEN)    IRQ:  56 affinity:1 vec:28 type=AMD-IOMMU-MSI   status=00000000 mapped, unbound

There are several questionable things here:
- the interrupt being of "deassert" kind
- destination mode being physical (while all others are logical)
- the interrupt being unbound (i.e. there's no action associated
  with it, and hence no handler would ever get called)

> Not sure what "status=0" means.

This says that none of the IRQ_* flags are set in desc->status,
which isn't in itself problematic for non-guest interrupts.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-14  6:27                     ` Jan Beulich
@ 2013-06-14  6:40                       ` Jan Beulich
  2013-06-14  7:14                         ` [PATCH] AMD IOMMU: make interrupt work again Jan Beulich
  0 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-14  6:40 UTC (permalink / raw)
  To: Suravee Suthikulanit; +Cc: Tim Deegan, xen-devel, Jacob Shin, Sherry Hurwitz

>>> On 14.06.13 at 08:27, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>> On 13.06.13 at 18:34, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> 
> wrote:
>> Basically, the only different is this line that only appears in the "Bad" 
>> version.
>> 
>>      (XEN)  MSI     56 vec=28  fixed  edge deassert phys    cpu 
> dest=00000001 mask=0/0/1
>> 
>> "xl debug-key i" also show the following information
>> 
>>      (XEN)    IRQ:  56 affinity:1 vec:28 type=AMD-IOMMU-MSI   status=00000000 
> mapped, unbound
> 
> There are several questionable things here:
> - the interrupt being of "deassert" kind
> - destination mode being physical (while all others are logical)
> - the interrupt being unbound (i.e. there's no action associated
>   with it, and hence no handler would ever get called)

And I think I see now at least part of what's missing: Neither
msi_compose_msg() nor write_msi_msg() get (indirectly) called
from set_iommu_interrupt_handler(). Which was that (wrong)
way already before said c/s, but got masked by
iommu_msi_set_affinity() doing a full setup rather than
incremental modification as set_msi_affinity() does. In order to
fix this reasonably cleanly I'd like to do a little bit of code
re-structuring, so it'll be more than a couple of minutes until I
could send you a patch.

Plus that - to me - doesn't explain the NULL action pointer yet.
But wait - was that log perhaps taken with the tree rewound to
said broken commit? I.e. missing commit d739470b? That would
explain it.

Jan

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH] AMD IOMMU: make interrupt work again
  2013-06-14  6:40                       ` Jan Beulich
@ 2013-06-14  7:14                         ` Jan Beulich
  2013-06-14 16:10                           ` Suravee Suthikulanit
  0 siblings, 1 reply; 50+ messages in thread
From: Jan Beulich @ 2013-06-14  7:14 UTC (permalink / raw)
  To: Suravee Suthikulanit; +Cc: Tim Deegan, xen-devel, Jacob Shin, Sherry Hurwitz

[-- Attachment #1: Type: text/plain, Size: 5953 bytes --]

>>> On 14.06.13 at 08:40, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>> On 14.06.13 at 08:27, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>>> On 13.06.13 at 18:34, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote:
>>> Basically, the only different is this line that only appears in the "Bad" 
>>> version.
>>> 
>>>      (XEN)  MSI     56 vec=28  fixed  edge deassert phys    cpu 
>> dest=00000001 mask=0/0/1
>>> 
>>> "xl debug-key i" also show the following information
>>> 
>>>      (XEN)    IRQ:  56 affinity:1 vec:28 type=AMD-IOMMU-MSI   status=00000000 
>> mapped, unbound
>> 
>> There are several questionable things here:
>> - the interrupt being of "deassert" kind
>> - destination mode being physical (while all others are logical)
>> - the interrupt being unbound (i.e. there's no action associated
>>   with it, and hence no handler would ever get called)
> 
> And I think I see now at least part of what's missing: Neither
> msi_compose_msg() nor write_msi_msg() get (indirectly) called
> from set_iommu_interrupt_handler(). Which was that (wrong)
> way already before said c/s, but got masked by
> iommu_msi_set_affinity() doing a full setup rather than
> incremental modification as set_msi_affinity() does. In order to
> fix this reasonably cleanly I'd like to do a little bit of code
> re-structuring, so it'll be more than a couple of minutes until I
> could send you a patch.

So here's a first try.

Jan

Commit 899110e3 ("AMD IOMMU: include IOMMU interrupt information in 'M'
debug key output") made the AMD IOMMU MSI setup code use more of the
generic MSI setup code (as other than for VT-d this is an ordinary MSI-
capable PCI device), but failed to notice that till now interrupt setup
there _required_ the subsequent affinity setup to be done, as that was
the only point where the MSI message would get written. The generic MSI
affinity setting routine, however, does only an incremental change,
i.e. relies on this setup to have been done before.

In order to not make the code even more clumsy, introduce a new low
level helper routine __setup_msi_irq(), thus eliminating the need for
the AMD IOMMU code to directly fiddle with the IRQ descriptor.

Reported-by: Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -472,11 +472,18 @@ static struct msi_desc* alloc_msi_entry(
 
 int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
 {
+    return __setup_msi_irq(desc, msidesc,
+                           msi_maskable_irq(msidesc) ? &pci_msi_maskable
+                                                     : &pci_msi_nonmaskable);
+}
+
+int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
+                    hw_irq_controller *handler)
+{
     struct msi_msg msg;
 
     desc->msi_desc = msidesc;
-    desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable
-                                              : &pci_msi_nonmaskable;
+    desc->handler = handler;
     msi_compose_msg(desc, &msg);
     return write_msi_msg(msidesc, &msg);
 }
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -748,7 +748,7 @@ static void iommu_interrupt_handler(int 
 static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
 {
     int irq, ret;
-    struct irq_desc *desc;
+    hw_irq_controller *handler;
     unsigned long flags;
     u16 control;
 
@@ -759,7 +759,6 @@ static bool_t __init set_iommu_interrupt
         return 0;
     }
 
-    desc = irq_to_desc(irq);
     spin_lock_irqsave(&pcidevs_lock, flags);
     iommu->msi.dev = pci_get_pdev(iommu->seg, PCI_BUS(iommu->bdf),
                                   PCI_DEVFN2(iommu->bdf));
@@ -771,7 +770,6 @@ static bool_t __init set_iommu_interrupt
                         PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
         return 0;
     }
-    desc->msi_desc = &iommu->msi;
     control = pci_conf_read16(iommu->seg, PCI_BUS(iommu->bdf),
                               PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
                               iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
@@ -781,14 +779,15 @@ static bool_t __init set_iommu_interrupt
         iommu->msi.msi_attrib.maskbit = 1;
         iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos,
                                                 is_64bit_address(control));
-        desc->handler = &iommu_maskable_msi_type;
+        handler = &iommu_maskable_msi_type;
     }
     else
-        desc->handler = &iommu_msi_type;
-    ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
+        handler = &iommu_msi_type;
+    ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler);
+    if ( !ret )
+        ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
     if ( ret )
     {
-        desc->handler = &no_irq_type;
         destroy_irq(irq);
         AMD_IOMMU_DEBUG("can't request irq\n");
         return 0;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -72,6 +72,7 @@ struct msi_msg {
 };
 
 struct irq_desc;
+struct hw_interrupt_type;
 struct msi_desc;
 /* Helper functions */
 extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
@@ -79,6 +80,8 @@ extern void pci_disable_msi(struct msi_d
 extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
 extern int setup_msi_irq(struct irq_desc *, struct msi_desc *);
+extern int __setup_msi_irq(struct irq_desc *, struct msi_desc *,
+                           const struct hw_interrupt_type *);
 extern void teardown_msi_irq(int irq);
 extern int msi_free_vector(struct msi_desc *entry);
 extern int pci_restore_msi_state(struct pci_dev *pdev);



[-- Attachment #2: AMD-IOMMU-fix-IRQ.patch --]
[-- Type: text/plain, Size: 4523 bytes --]

AMD IOMMU: make interrupt work again

Commit 899110e3 ("AMD IOMMU: include IOMMU interrupt information in 'M'
debug key output") made the AMD IOMMU MSI setup code use more of the
generic MSI setup code (as other than for VT-d this is an ordinary MSI-
capable PCI device), but failed to notice that till now interrupt setup
there _required_ the subsequent affinity setup to be done, as that was
the only point where the MSI message would get written. The generic MSI
affinity setting routine, however, does only an incremental change,
i.e. relies on this setup to have been done before.

In order to not make the code even more clumsy, introduce a new low
level helper routine __setup_msi_irq(), thus eliminating the need for
the AMD IOMMU code to directly fiddle with the IRQ descriptor.

Reported-by: Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -472,11 +472,18 @@ static struct msi_desc* alloc_msi_entry(
 
 int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
 {
+    return __setup_msi_irq(desc, msidesc,
+                           msi_maskable_irq(msidesc) ? &pci_msi_maskable
+                                                     : &pci_msi_nonmaskable);
+}
+
+int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
+                    hw_irq_controller *handler)
+{
     struct msi_msg msg;
 
     desc->msi_desc = msidesc;
-    desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable
-                                              : &pci_msi_nonmaskable;
+    desc->handler = handler;
     msi_compose_msg(desc, &msg);
     return write_msi_msg(msidesc, &msg);
 }
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -748,7 +748,7 @@ static void iommu_interrupt_handler(int 
 static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
 {
     int irq, ret;
-    struct irq_desc *desc;
+    hw_irq_controller *handler;
     unsigned long flags;
     u16 control;
 
@@ -759,7 +759,6 @@ static bool_t __init set_iommu_interrupt
         return 0;
     }
 
-    desc = irq_to_desc(irq);
     spin_lock_irqsave(&pcidevs_lock, flags);
     iommu->msi.dev = pci_get_pdev(iommu->seg, PCI_BUS(iommu->bdf),
                                   PCI_DEVFN2(iommu->bdf));
@@ -771,7 +770,6 @@ static bool_t __init set_iommu_interrupt
                         PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
         return 0;
     }
-    desc->msi_desc = &iommu->msi;
     control = pci_conf_read16(iommu->seg, PCI_BUS(iommu->bdf),
                               PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
                               iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
@@ -781,14 +779,15 @@ static bool_t __init set_iommu_interrupt
         iommu->msi.msi_attrib.maskbit = 1;
         iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos,
                                                 is_64bit_address(control));
-        desc->handler = &iommu_maskable_msi_type;
+        handler = &iommu_maskable_msi_type;
     }
     else
-        desc->handler = &iommu_msi_type;
-    ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
+        handler = &iommu_msi_type;
+    ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler);
+    if ( !ret )
+        ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
     if ( ret )
     {
-        desc->handler = &no_irq_type;
         destroy_irq(irq);
         AMD_IOMMU_DEBUG("can't request irq\n");
         return 0;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -72,6 +72,7 @@ struct msi_msg {
 };
 
 struct irq_desc;
+struct hw_interrupt_type;
 struct msi_desc;
 /* Helper functions */
 extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
@@ -79,6 +80,8 @@ extern void pci_disable_msi(struct msi_d
 extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
 extern int setup_msi_irq(struct irq_desc *, struct msi_desc *);
+extern int __setup_msi_irq(struct irq_desc *, struct msi_desc *,
+                           const struct hw_interrupt_type *);
 extern void teardown_msi_irq(int irq);
 extern int msi_free_vector(struct msi_desc *entry);
 extern int pci_restore_msi_state(struct pci_dev *pdev);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH] AMD IOMMU: make interrupt work again
  2013-06-14  7:14                         ` [PATCH] AMD IOMMU: make interrupt work again Jan Beulich
@ 2013-06-14 16:10                           ` Suravee Suthikulanit
  0 siblings, 0 replies; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-14 16:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, xen-devel, Shin, Jacob, Hurwitz, Sherry

On 6/14/2013 2:14 AM, Jan Beulich wrote:
>>>> On 14.06.13 at 08:40, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>>> On 14.06.13 at 08:27, "Jan Beulich" <JBeulich@suse.com> wrote:
>>>>>> On 13.06.13 at 18:34, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote:
>>>> Basically, the only different is this line that only appears in the "Bad"
>>>> version.
>>>>
>>>>       (XEN)  MSI     56 vec=28  fixed  edge deassert phys    cpu
>>> dest=00000001 mask=0/0/1
>>>> "xl debug-key i" also show the following information
>>>>
>>>>       (XEN)    IRQ:  56 affinity:1 vec:28 type=AMD-IOMMU-MSI   status=00000000
>>> mapped, unbound
>>>
>>> There are several questionable things here:
>>> - the interrupt being of "deassert" kind
>>> - destination mode being physical (while all others are logical)
>>> - the interrupt being unbound (i.e. there's no action associated
>>>    with it, and hence no handler would ever get called)
>> And I think I see now at least part of what's missing: Neither
>> msi_compose_msg() nor write_msi_msg() get (indirectly) called
>> from set_iommu_interrupt_handler(). Which was that (wrong)
>> way already before said c/s, but got masked by
>> iommu_msi_set_affinity() doing a full setup rather than
>> incremental modification as set_msi_affinity() does. In order to
>> fix this reasonably cleanly I'd like to do a little bit of code
>> re-structuring, so it'll be more than a couple of minutes until I
>> could send you a patch.
> So here's a first try.
>
> Jan
This fixes the issue.  Here is the output from the xl debug-key i and M.

(XEN)  MSI     56 vec=28 lowest  edge   assert  log lowest dest=00000001 
mask=0/0/?

XEN)    IRQ:  56 affinity:1 vec:28 type=AMD-IOMMU-MSI status=00000000 
iommu_interrupt_handler+0/0x66

Acked: - Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Thank you Jan.

Suravee

>
> Commit 899110e3 ("AMD IOMMU: include IOMMU interrupt information in 'M'
> debug key output") made the AMD IOMMU MSI setup code use more of the
> generic MSI setup code (as other than for VT-d this is an ordinary MSI-
> capable PCI device), but failed to notice that till now interrupt setup
> there _required_ the subsequent affinity setup to be done, as that was
> the only point where the MSI message would get written. The generic MSI
> affinity setting routine, however, does only an incremental change,
> i.e. relies on this setup to have been done before.
>
> In order to not make the code even more clumsy, introduce a new low
> level helper routine __setup_msi_irq(), thus eliminating the need for
> the AMD IOMMU code to directly fiddle with the IRQ descriptor.
>
> Reported-by: Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -472,11 +472,18 @@ static struct msi_desc* alloc_msi_entry(
>   
>   int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
>   {
> +    return __setup_msi_irq(desc, msidesc,
> +                           msi_maskable_irq(msidesc) ? &pci_msi_maskable
> +                                                     : &pci_msi_nonmaskable);
> +}
> +
> +int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
> +                    hw_irq_controller *handler)
> +{
>       struct msi_msg msg;
>   
>       desc->msi_desc = msidesc;
> -    desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable
> -                                              : &pci_msi_nonmaskable;
> +    desc->handler = handler;
>       msi_compose_msg(desc, &msg);
>       return write_msi_msg(msidesc, &msg);
>   }
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -748,7 +748,7 @@ static void iommu_interrupt_handler(int
>   static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
>   {
>       int irq, ret;
> -    struct irq_desc *desc;
> +    hw_irq_controller *handler;
>       unsigned long flags;
>       u16 control;
>   
> @@ -759,7 +759,6 @@ static bool_t __init set_iommu_interrupt
>           return 0;
>       }
>   
> -    desc = irq_to_desc(irq);
>       spin_lock_irqsave(&pcidevs_lock, flags);
>       iommu->msi.dev = pci_get_pdev(iommu->seg, PCI_BUS(iommu->bdf),
>                                     PCI_DEVFN2(iommu->bdf));
> @@ -771,7 +770,6 @@ static bool_t __init set_iommu_interrupt
>                           PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
>           return 0;
>       }
> -    desc->msi_desc = &iommu->msi;
>       control = pci_conf_read16(iommu->seg, PCI_BUS(iommu->bdf),
>                                 PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
>                                 iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
> @@ -781,14 +779,15 @@ static bool_t __init set_iommu_interrupt
>           iommu->msi.msi_attrib.maskbit = 1;
>           iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos,
>                                                   is_64bit_address(control));
> -        desc->handler = &iommu_maskable_msi_type;
> +        handler = &iommu_maskable_msi_type;
>       }
>       else
> -        desc->handler = &iommu_msi_type;
> -    ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
> +        handler = &iommu_msi_type;
> +    ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler);
> +    if ( !ret )
> +        ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
>       if ( ret )
>       {
> -        desc->handler = &no_irq_type;
>           destroy_irq(irq);
>           AMD_IOMMU_DEBUG("can't request irq\n");
>           return 0;
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -72,6 +72,7 @@ struct msi_msg {
>   };
>   
>   struct irq_desc;
> +struct hw_interrupt_type;
>   struct msi_desc;
>   /* Helper functions */
>   extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
> @@ -79,6 +80,8 @@ extern void pci_disable_msi(struct msi_d
>   extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
>   extern void pci_cleanup_msi(struct pci_dev *pdev);
>   extern int setup_msi_irq(struct irq_desc *, struct msi_desc *);
> +extern int __setup_msi_irq(struct irq_desc *, struct msi_desc *,
> +                           const struct hw_interrupt_type *);
>   extern void teardown_msi_irq(int irq);
>   extern int msi_free_vector(struct msi_desc *entry);
>   extern int pci_restore_msi_state(struct pci_dev *pdev);
>
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 2/2 v5] iommu/amd: Workaround for erratum 787
  2013-06-11  6:47     ` [PATCH 2/2 v5] " Jan Beulich
@ 2013-06-17 18:57       ` Suravee Suthikulanit
  0 siblings, 0 replies; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-17 18:57 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, xen-devel

On 6/11/2013 1:47 AM, Jan Beulich wrote:
> The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
> and event log interrupt bits to re-enable the interrupt. This is done by
> writing 1 to the memory mapped register to clear the bit. Due to hardware bug,
> if the driver tries to clear this bit while the IOMMU hardware also setting
> this bit, the conflict will result with the bit being set. If the interrupt
> handling code does not make sure to clear this bit, subsequent changes in the
> event/PPR logs will no longer generating interrupts, and would result if
> buffer overflow. After clearing the bits, the driver must read back
> the register to verify.
>
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>
> Adjust to apply on top of heavily modified patch 1. Adjust flow to get away
> with a single readl() in each instance of the status register checks.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> v5: Moved most of what accumulated here into patch 1. Rather than looping,
>      re-schedule the tasklet to work around the erratum.
>      (skipped v4 to remain in sync with patch 1)
>
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -636,6 +636,14 @@ static void iommu_check_event_log(struct
>           }
>       }
>   
> +    /*
> +     * Workaround for erratum787:
> +     * Re-check to make sure the bit has been cleared.
> +     */
> +    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK )
> +        tasklet_schedule(&amd_iommu_irq_tasklet);
> +
>       spin_unlock_irqrestore(&iommu->lock, flags);
>   }
>   
> @@ -717,6 +725,14 @@ static void iommu_check_ppr_log(struct a
>           }
>       }
>   
> +    /*
> +     * Workaround for erratum787:
> +     * Re-check to make sure the bit has been cleared.
> +     */
> +    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
> +    if ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK )
> +        tasklet_schedule(&amd_iommu_irq_tasklet);
> +
>       spin_unlock_irqrestore(&iommu->lock, flags);
>   }
>   
>
>
>
Acked: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
  2013-06-12  6:24           ` Jan Beulich
  2013-06-12 22:37             ` Suravee Suthikulpanit
@ 2013-06-17 18:59             ` Suravee Suthikulanit
  1 sibling, 0 replies; 50+ messages in thread
From: Suravee Suthikulanit @ 2013-06-17 18:59 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Tim Deegan, xen-devel

On 6/12/2013 1:24 AM, Jan Beulich wrote:
>>>> On 12.06.13 at 01:03, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>
> wrote:
>> On 6/11/2013 1:47 AM, Jan Beulich wrote:
>>> @@ -611,22 +608,33 @@ static void iommu_check_event_log(struct
>>>        u32 entry;
>>>        unsigned long flags;
>>>    
>>> +    /* RW1C interrupt status bit */
>>> +    writel(IOMMU_STATUS_EVENT_LOG_INT_MASK,
>>> +           iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>>> +
>>>        iommu_read_log(iommu, &iommu->event_log,
>>>                       sizeof(event_entry_t), parse_event_log_entry);
>>>    
>>>        spin_lock_irqsave(&iommu->lock, flags);
>>>        
>>> -    /*check event overflow */
>>> +    /* Check event overflow. */
>>>        entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>>> -
>>>        if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) )
>>>            iommu_reset_log(iommu, &iommu->event_log,
>> set_iommu_event_log_control);
>>> -
>>> -    /* reset interrupt status bit */
>>> -    entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>>> -    iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT);
>>> -
>>> -    writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
>>> +    else
>>> +    {
>>> +        entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
>>> +        if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) )
>>> +        {
>>> +            entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK;
>>> +            writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
>>> +            /*
>>> +             * Re-schedule the tasklet to handle eventual log entries added
>>> +             * between reading the log above and re-enabling the interrupt.
>>> +             */
>>> +            tasklet_schedule(&amd_iommu_irq_tasklet);
>>> +        }
>>> +    }
>> If more entries are added to the event log during the time that event
>> log interrupt is disabled (in the control register),
>> the IOMMU hardware will generate interrupt once the the interrupt enable
>> bit in the control register changes from 0 to 1 and set the status
>> register.  Since the "iommu_interrupt_handler" code is already calling
>> "schedule_tasklet",  we should not need to "re-schedule" tasklet here.
>> I have confirmed the hardware behavior described with the hardware
>> designer.  This is also the same on the PPR log.
> And also the same between v1 and v2 hardware? Again, I'd like to
> be on the safe side, and rather do a reschedule too much than one
> too little. And in any case, having your documentation made more
> precise in these respects would be much appreciated.
>
> Jan
>
>
Acked: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

^ permalink raw reply	[flat|nested] 50+ messages in thread

end of thread, other threads:[~2013-06-17 18:59 UTC | newest]

Thread overview: 50+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-06-10  5:05 [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits suravee.suthikulpanit
2013-06-10  5:05 ` [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787 suravee.suthikulpanit
2013-06-10  9:35   ` Tim Deegan
2013-06-10  9:47     ` Jan Beulich
2013-06-10 10:40       ` Tim Deegan
2013-06-10 10:53         ` Jan Beulich
2013-06-10 12:43           ` Tim Deegan
2013-06-10 13:23             ` Jan Beulich
2013-06-10 13:41             ` Jan Beulich
2013-06-10 13:56               ` Tim Deegan
2013-06-10 13:55             ` Jan Beulich
2013-06-10 15:03               ` Jan Beulich
2013-06-10 16:31                 ` Tim Deegan
2013-06-10 23:13                   ` Suravee Suthikulanit
2013-06-11  6:45                     ` Jan Beulich
2013-06-11  6:40                   ` Jan Beulich
2013-06-11  8:53                     ` Tim Deegan
2013-06-10 13:53           ` Suravee Suthikulanit
2013-06-10 13:59             ` Jan Beulich
2013-06-10 15:11               ` Suravee Suthikulanit
2013-06-10 15:21                 ` Jan Beulich
2013-06-10 10:59   ` [PATCH 2/2 v3] " Jan Beulich
2013-06-11  6:47     ` [PATCH 2/2 v5] " Jan Beulich
2013-06-17 18:57       ` Suravee Suthikulanit
2013-06-10 10:05 ` [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits Jan Beulich
2013-06-10 10:56 ` [PATCH 1/2 v3] " Jan Beulich
2013-06-10 11:02   ` Jan Beulich
2013-06-10 12:18   ` Tim Deegan
2013-06-10 12:31     ` Jan Beulich
2013-06-10 13:58       ` Suravee Suthikulanit
2013-06-10 12:41     ` [PATCH 1/2 v4] " Jan Beulich
2013-06-10 12:46       ` Tim Deegan
2013-06-10 13:49       ` George Dunlap
2013-06-10 14:08         ` Jan Beulich
2013-06-11  6:47       ` [PATCH 1/2 v5] " Jan Beulich
2013-06-11 23:03         ` Suravee Suthikulanit
2013-06-12  6:24           ` Jan Beulich
2013-06-12 22:37             ` Suravee Suthikulpanit
2013-06-13  1:44               ` Suravee Suthikulpanit
2013-06-13  7:54                 ` Jan Beulich
2013-06-13 13:48                   ` Suravee Suthikulpanit
2013-06-13 14:20                 ` George Dunlap
2013-06-13 14:30                   ` Processed: " xen
2013-06-13 15:58                 ` Jan Beulich
2013-06-13 16:34                   ` Suravee Suthikulanit
2013-06-14  6:27                     ` Jan Beulich
2013-06-14  6:40                       ` Jan Beulich
2013-06-14  7:14                         ` [PATCH] AMD IOMMU: make interrupt work again Jan Beulich
2013-06-14 16:10                           ` Suravee Suthikulanit
2013-06-17 18:59             ` [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits Suravee Suthikulanit

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.