All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/hvm: implement save/restore for posted interrupts
@ 2014-07-03 15:09 Olaf Hering
  2014-07-03 15:27 ` Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 60+ messages in thread
From: Olaf Hering @ 2014-07-03 15:09 UTC (permalink / raw)
  To: xen-devel
  Cc: Kevin Tian, Keir Fraser, Jan Beulich, Eddie Dong, Olaf Hering,
	Dongxiao Xu, Jun Nakajima

Saving and restoring a PVonHVM guest on a host which has the VMX
"Posted Interrupt Processing" feature enabled will fail because the
xen-platform-pci device does not receive interrupts anymore after
restore. The reason is that the IRQs are not maintained in APIC_IRR,
but in a separate PIR array. This info is lost during the save
operation.

Syncing the PIR state into IRR during save, and restoring the state
later will fix 'xm save/restore' of a PVonHVM guest.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
---

Jan suggested to use the TMR flag to set 'trig' for vlapic_set_irq.
We are not sure if thats the correct approach, instead of forcing
trig=1. 
Comments are welcome.


 xen/arch/x86/hvm/vlapic.c        |  4 ++++
 xen/arch/x86/hvm/vmx/vmx.c       | 14 ++++++++++++++
 xen/include/asm-x86/hvm/hvm.h    |  1 +
 xen/include/asm-x86/hvm/vlapic.h |  2 ++
 4 files changed, 21 insertions(+)

diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index cd7e872..7201544 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -1179,6 +1179,8 @@ static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
 
     for_each_vcpu ( d, v )
     {
+        if ( hvm_funcs.sync_pir_to_irr )
+            hvm_funcs.sync_pir_to_irr(v);
         s = vcpu_vlapic(v);
         if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
             break;
@@ -1230,6 +1232,8 @@ static int lapic_load_regs(struct domain *d, hvm_domain_context_t *h)
     if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 ) 
         return -EINVAL;
 
+    if ( hvm_funcs.sync_irr_to_pir )
+        hvm_funcs.sync_irr_to_pir(v);
     if ( hvm_funcs.process_isr )
         hvm_funcs.process_isr(vlapic_find_highest_isr(s), v);
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 2caa04a..85df77d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1654,6 +1654,18 @@ static void vmx_sync_pir_to_irr(struct vcpu *v)
         vlapic_set_vector(i, &vlapic->regs->data[APIC_IRR]);
 }
 
+static void vmx_sync_irr_to_pir(struct vcpu *v)
+{
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    unsigned int vector;
+
+    for ( vector = 0; vector < NR_VECTORS; vector++ ) {
+        if ( vlapic_test_vector(vector, &vlapic->regs->data[APIC_IRR]) )
+            vlapic_set_irq(vlapic, vector,
+                    vlapic_test_vector(vector, &vlapic->regs->data[APIC_TMR]));
+    }
+}
+
 static void vmx_handle_eoi(u8 vector)
 {
     unsigned long status;
@@ -1737,6 +1749,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .process_isr          = vmx_process_isr,
     .deliver_posted_intr  = vmx_deliver_posted_intr,
     .sync_pir_to_irr      = vmx_sync_pir_to_irr,
+    .sync_irr_to_pir      = vmx_sync_irr_to_pir,
     .handle_eoi           = vmx_handle_eoi,
     .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
     .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf,
@@ -1783,6 +1796,7 @@ const struct hvm_function_table * __init start_vmx(void)
     {
         vmx_function_table.deliver_posted_intr = NULL;
         vmx_function_table.sync_pir_to_irr = NULL;
+        vmx_function_table.sync_irr_to_pir = NULL;
     }
 
     if ( cpu_has_vmx_ept
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0ebd478..c762f1e 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -194,6 +194,7 @@ struct hvm_function_table {
     void (*process_isr)(int isr, struct vcpu *v);
     void (*deliver_posted_intr)(struct vcpu *v, u8 vector);
     void (*sync_pir_to_irr)(struct vcpu *v);
+    void (*sync_irr_to_pir)(struct vcpu *v);
     void (*handle_eoi)(u8 vector);
 
     /*Walk nested p2m  */
diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
index 66f0aff..2a83a1e 100644
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -58,6 +58,8 @@
 
 #define VEC_POS(v) ((v) % 32)
 #define REG_POS(v) (((v) / 32) * 0x10)
+#define vlapic_test_vector(vec, bitmap)                         \
+    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_set_vector(vec, bitmap)                         \
     test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_clear_vector(vec, bitmap)                       \

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-03 15:09 [PATCH] x86/hvm: implement save/restore for posted interrupts Olaf Hering
@ 2014-07-03 15:27 ` Jan Beulich
  2014-07-09 11:24 ` Zhang, Yang Z
  2014-07-25  9:40 ` Jan Beulich
  2 siblings, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2014-07-03 15:27 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Kevin Tian, Keir Fraser, Eddie Dong, xen-devel, Dongxiao Xu,
	Jun Nakajima

>>> On 03.07.14 at 17:09, <olaf@aepfle.de> wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1654,6 +1654,18 @@ static void vmx_sync_pir_to_irr(struct vcpu *v)
>          vlapic_set_vector(i, &vlapic->regs->data[APIC_IRR]);
>  }
>  
> +static void vmx_sync_irr_to_pir(struct vcpu *v)
> +{
> +    struct vlapic *vlapic = vcpu_vlapic(v);
> +    unsigned int vector;
> +
> +    for ( vector = 0; vector < NR_VECTORS; vector++ ) {

Coding style.

> +        if ( vlapic_test_vector(vector, &vlapic->regs->data[APIC_IRR]) )
> +            vlapic_set_irq(vlapic, vector,
> +                    vlapic_test_vector(vector, &vlapic->regs->data[APIC_TMR]));

You would get away with using vlapic_test_and_clear_vector() (i.e.
without introducing vlapic_test_vector()), since vlapic_set_irq() will
set the flag again if it was found set here.

And still the question remains whether to really use vlapic_set_irq()
here, or rather open code the pieces of it that we actually need (at
once allowing to eliminate two indirect function calls).

Let's see what the VMX maintainers have to say.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-03 15:09 [PATCH] x86/hvm: implement save/restore for posted interrupts Olaf Hering
  2014-07-03 15:27 ` Jan Beulich
@ 2014-07-09 11:24 ` Zhang, Yang Z
  2014-07-09 21:14   ` Tian, Kevin
  2014-07-25  9:40 ` Jan Beulich
  2 siblings, 1 reply; 60+ messages in thread
From: Zhang, Yang Z @ 2014-07-09 11:24 UTC (permalink / raw)
  To: Olaf Hering, xen-devel
  Cc: Tian, Kevin, Keir Fraser, Jan Beulich, Dong, Eddie, Xu, Dongxiao,
	Nakajima, Jun

Olaf Hering wrote on 2014-07-03:
> Saving and restoring a PVonHVM guest on a host which has the VMX "Posted
> Interrupt Processing" feature enabled will fail because the xen-platform-pci
> device does not receive interrupts anymore after restore. The reason is that
> the IRQs are not maintained in APIC_IRR, but in a separate PIR array. This info
> is lost during the save operation.
> 
> Syncing the PIR state into IRR during save, and restoring the state later will fix
> 'xm save/restore' of a PVonHVM guest.
> 
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
> ---
> 
> Jan suggested to use the TMR flag to set 'trig' for vlapic_set_irq.
> We are not sure if thats the correct approach, instead of forcing trig=1.
> Comments are welcome.
> 
> 
>  xen/arch/x86/hvm/vlapic.c        |  4 ++++
>  xen/arch/x86/hvm/vmx/vmx.c       | 14 ++++++++++++++
>  xen/include/asm-x86/hvm/hvm.h    |  1 +
>  xen/include/asm-x86/hvm/vlapic.h |  2 ++
>  4 files changed, 21 insertions(+)
> 
> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index
> cd7e872..7201544 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -1179,6 +1179,8 @@ static int lapic_save_regs(struct domain *d,
> hvm_domain_context_t *h)
> 
>      for_each_vcpu ( d, v )
>      {
> +        if ( hvm_funcs.sync_pir_to_irr )
> +            hvm_funcs.sync_pir_to_irr(v);
>          s = vcpu_vlapic(v);
>          if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
>              break;
> @@ -1230,6 +1232,8 @@ static int lapic_load_regs(struct domain *d,
> hvm_domain_context_t *h)
>      if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
>          return -EINVAL;
> 
> +    if ( hvm_funcs.sync_irr_to_pir )
> +        hvm_funcs.sync_irr_to_pir(v);

This is redundant. Interrupt pending in irr will be injected to guest correctly, so there is no need to sync it to pir.

>      if ( hvm_funcs.process_isr )
>          hvm_funcs.process_isr(vlapic_find_highest_isr(s), v);
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index
> 2caa04a..85df77d 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1654,6 +1654,18 @@ static void vmx_sync_pir_to_irr(struct vcpu *v)
>          vlapic_set_vector(i, &vlapic->regs->data[APIC_IRR]);  }
> 
> +static void vmx_sync_irr_to_pir(struct vcpu *v) {
> +    struct vlapic *vlapic = vcpu_vlapic(v);
> +    unsigned int vector;
> +
> +    for ( vector = 0; vector < NR_VECTORS; vector++ ) {
> +        if ( vlapic_test_vector(vector, &vlapic->regs->data[APIC_IRR]) )
> +            vlapic_set_irq(vlapic, vector,
> +                    vlapic_test_vector(vector,
> &vlapic->regs->data[APIC_TMR]));
> +    }
> +}
> +
>  static void vmx_handle_eoi(u8 vector)
>  {
>      unsigned long status;
> @@ -1737,6 +1749,7 @@ static struct hvm_function_table __initdata
> vmx_function_table = {
>      .process_isr          = vmx_process_isr,
>      .deliver_posted_intr  = vmx_deliver_posted_intr,
>      .sync_pir_to_irr      = vmx_sync_pir_to_irr,
> +    .sync_irr_to_pir      = vmx_sync_irr_to_pir,
>      .handle_eoi           = vmx_handle_eoi,
>      .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
>      .hypervisor_cpuid_leaf = vmx_hypervisor_cpuid_leaf, @@ -1783,6
> +1796,7 @@ const struct hvm_function_table * __init start_vmx(void)
>      {
>          vmx_function_table.deliver_posted_intr = NULL;
>          vmx_function_table.sync_pir_to_irr = NULL;
> +        vmx_function_table.sync_irr_to_pir = NULL;
>      }
> 
>      if ( cpu_has_vmx_ept
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 0ebd478..c762f1e 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -194,6 +194,7 @@ struct hvm_function_table {
>      void (*process_isr)(int isr, struct vcpu *v);
>      void (*deliver_posted_intr)(struct vcpu *v, u8 vector);
>      void (*sync_pir_to_irr)(struct vcpu *v);
> +    void (*sync_irr_to_pir)(struct vcpu *v);
>      void (*handle_eoi)(u8 vector);
> 
>      /*Walk nested p2m  */
> diff --git a/xen/include/asm-x86/hvm/vlapic.h
> b/xen/include/asm-x86/hvm/vlapic.h
> index 66f0aff..2a83a1e 100644
> --- a/xen/include/asm-x86/hvm/vlapic.h
> +++ b/xen/include/asm-x86/hvm/vlapic.h
> @@ -58,6 +58,8 @@
> 
>  #define VEC_POS(v) ((v) % 32)
>  #define REG_POS(v) (((v) / 32) * 0x10)
> +#define vlapic_test_vector(vec, bitmap)                         \
> +    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_set_vector(vec, bitmap)
> \
>      test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_clear_vector(vec, bitmap)
> \
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-09 11:24 ` Zhang, Yang Z
@ 2014-07-09 21:14   ` Tian, Kevin
  2014-07-16 14:28     ` Olaf Hering
  0 siblings, 1 reply; 60+ messages in thread
From: Tian, Kevin @ 2014-07-09 21:14 UTC (permalink / raw)
  To: Zhang, Yang Z, Olaf Hering, xen-devel
  Cc: Dong, Eddie, Xu, Dongxiao, Keir Fraser, Nakajima, Jun, Jan Beulich

> From: Zhang, Yang Z
> Sent: Wednesday, July 09, 2014 4:25 AM
> 
> Olaf Hering wrote on 2014-07-03:
> > Saving and restoring a PVonHVM guest on a host which has the VMX "Posted
> > Interrupt Processing" feature enabled will fail because the xen-platform-pci
> > device does not receive interrupts anymore after restore. The reason is that
> > the IRQs are not maintained in APIC_IRR, but in a separate PIR array. This
> info
> > is lost during the save operation.
> >
> > Syncing the PIR state into IRR during save, and restoring the state later will
> fix
> > 'xm save/restore' of a PVonHVM guest.
> >
> > Signed-off-by: Olaf Hering <olaf@aepfle.de>
> > ---
> >
> > Jan suggested to use the TMR flag to set 'trig' for vlapic_set_irq.
> > We are not sure if thats the correct approach, instead of forcing trig=1.
> > Comments are welcome.
> >
> >
> >  xen/arch/x86/hvm/vlapic.c        |  4 ++++
> >  xen/arch/x86/hvm/vmx/vmx.c       | 14 ++++++++++++++
> >  xen/include/asm-x86/hvm/hvm.h    |  1 +
> >  xen/include/asm-x86/hvm/vlapic.h |  2 ++
> >  4 files changed, 21 insertions(+)
> >
> > diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index
> > cd7e872..7201544 100644
> > --- a/xen/arch/x86/hvm/vlapic.c
> > +++ b/xen/arch/x86/hvm/vlapic.c
> > @@ -1179,6 +1179,8 @@ static int lapic_save_regs(struct domain *d,
> > hvm_domain_context_t *h)
> >
> >      for_each_vcpu ( d, v )
> >      {
> > +        if ( hvm_funcs.sync_pir_to_irr )
> > +            hvm_funcs.sync_pir_to_irr(v);
> >          s = vcpu_vlapic(v);
> >          if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) !=
> 0 )
> >              break;
> > @@ -1230,6 +1232,8 @@ static int lapic_load_regs(struct domain *d,
> > hvm_domain_context_t *h)
> >      if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
> >          return -EINVAL;
> >
> > +    if ( hvm_funcs.sync_irr_to_pir )
> > +        hvm_funcs.sync_irr_to_pir(v);
> 
> This is redundant. Interrupt pending in irr will be injected to guest correctly, so
> there is no need to sync it to pir.
> 

agree. pending irr has to be injected somewhere after restore, and at that point
irr->pir has already been covered. otherwise it'd be a more general issue.

could you try whether only adding sync_pir_to_irr is enough?

Thanks
Kevin

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-09 21:14   ` Tian, Kevin
@ 2014-07-16 14:28     ` Olaf Hering
  2014-07-16 16:11       ` Tian, Kevin
  0 siblings, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-07-16 14:28 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel, Xu, Dongxiao,
	Jan Beulich, Zhang, Yang Z

On Wed, Jul 09, Tian, Kevin wrote:

> > From: Zhang, Yang Z
> > > +++ b/xen/arch/x86/hvm/vlapic.c
> > > @@ -1179,6 +1179,8 @@ static int lapic_save_regs(struct domain *d,
> > > hvm_domain_context_t *h)
> > >
> > >      for_each_vcpu ( d, v )
> > >      {
> > > +        if ( hvm_funcs.sync_pir_to_irr )
> > > +            hvm_funcs.sync_pir_to_irr(v);
> > >          s = vcpu_vlapic(v);
> > >          if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) !=
> > 0 )
> > >              break;
> > > @@ -1230,6 +1232,8 @@ static int lapic_load_regs(struct domain *d,
> > > hvm_domain_context_t *h)
> > >      if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
> > >          return -EINVAL;
> > >
> > > +    if ( hvm_funcs.sync_irr_to_pir )
> > > +        hvm_funcs.sync_irr_to_pir(v);
> > This is redundant. Interrupt pending in irr will be injected to guest correctly, so
> > there is no need to sync it to pir.
> agree. pending irr has to be injected somewhere after restore, and at that point
> irr->pir has already been covered. otherwise it'd be a more general issue.
> could you try whether only adding sync_pir_to_irr is enough?

It continues to fail if I change just lapic_save_regs.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-16 14:28     ` Olaf Hering
@ 2014-07-16 16:11       ` Tian, Kevin
  2014-07-17  7:11         ` Olaf Hering
  0 siblings, 1 reply; 60+ messages in thread
From: Tian, Kevin @ 2014-07-16 16:11 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel, Xu, Dongxiao,
	Jan Beulich, Zhang, Yang Z

> From: Olaf Hering [mailto:olaf@aepfle.de]
> Sent: Wednesday, July 16, 2014 7:29 AM
> 
> On Wed, Jul 09, Tian, Kevin wrote:
> 
> > > From: Zhang, Yang Z
> > > > +++ b/xen/arch/x86/hvm/vlapic.c
> > > > @@ -1179,6 +1179,8 @@ static int lapic_save_regs(struct domain *d,
> > > > hvm_domain_context_t *h)
> > > >
> > > >      for_each_vcpu ( d, v )
> > > >      {
> > > > +        if ( hvm_funcs.sync_pir_to_irr )
> > > > +            hvm_funcs.sync_pir_to_irr(v);
> > > >          s = vcpu_vlapic(v);
> > > >          if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h,
> s->regs)) !=
> > > 0 )
> > > >              break;
> > > > @@ -1230,6 +1232,8 @@ static int lapic_load_regs(struct domain *d,
> > > > hvm_domain_context_t *h)
> > > >      if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
> > > >          return -EINVAL;
> > > >
> > > > +    if ( hvm_funcs.sync_irr_to_pir )
> > > > +        hvm_funcs.sync_irr_to_pir(v);
> > > This is redundant. Interrupt pending in irr will be injected to guest correctly,
> so
> > > there is no need to sync it to pir.
> > agree. pending irr has to be injected somewhere after restore, and at that
> point
> > irr->pir has already been covered. otherwise it'd be a more general issue.
> > could you try whether only adding sync_pir_to_irr is enough?
> 
> It continues to fail if I change just lapic_save_regs.
> 

could you figure out the reason? irr->pir propagation should happen automatically
when injecting a pending irr, which is supposed to be triggered in the restore path.
Otherwise I don't understand how it's handled even w/o pir concept...

Thanks
Kevin

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-16 16:11       ` Tian, Kevin
@ 2014-07-17  7:11         ` Olaf Hering
  2014-07-17  9:29           ` Zhang, Yang Z
  0 siblings, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-07-17  7:11 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel, Xu, Dongxiao,
	Jan Beulich, Zhang, Yang Z

On Wed, Jul 16, Tian, Kevin wrote:

> could you figure out the reason? irr->pir propagation should happen automatically
> when injecting a pending irr, which is supposed to be triggered in the restore path.
> Otherwise I don't understand how it's handled even w/o pir concept...

I can probably do that. And I think initially I just changed
lapic_save_regs, which was not enough. I think in the end the reason is
that with posted irqs the code paths are different and just having IRR
set is not enough.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-17  7:11         ` Olaf Hering
@ 2014-07-17  9:29           ` Zhang, Yang Z
  2014-07-18 13:58             ` Olaf Hering
  2014-07-25 21:38             ` Tian, Kevin
  0 siblings, 2 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-07-17  9:29 UTC (permalink / raw)
  To: Olaf Hering, Tian, Kevin
  Cc: Keir Fraser, Jan Beulich, Dong, Eddie, xen-devel, Xu, Dongxiao,
	Nakajima, Jun

Olaf Hering wrote on 2014-07-17:
> On Wed, Jul 16, Tian, Kevin wrote:
> 
>> could you figure out the reason? irr->pir propagation should happen
>> automatically when injecting a pending irr, which is supposed to be
>> triggered in the restore path. Otherwise I don't understand how it's
>> handled even w/o pir concept...
> 
> I can probably do that. And I think initially I just changed
> lapic_save_regs, which was not enough. I think in the end the reason
> is that with posted irqs the code paths are different and just having IRR set is not enough.

It doesn't make sense. PIR is more like a tmp_IRR. All interrupts pending in PIR will finally sync to IRR by hardware. In your case, you already record it in IRR, it should be enough.
The problem I am seeing is that the eoi_exit_bitmap need to be setup correct after migration, please try attached patch to see whether it solves your problem:

diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index cd7e872..d9368a7 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -131,7 +131,7 @@ void vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig)
         vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]);
 
     if ( hvm_funcs.update_eoi_exit_bitmap )
-        hvm_funcs.update_eoi_exit_bitmap(target, vec, trig);
+        hvm_funcs.update_eoi_exit_bitmap(target, vec, trig, 0);
 
     if ( hvm_funcs.deliver_posted_intr )
         hvm_funcs.deliver_posted_intr(target, vec);
@@ -1179,6 +1179,9 @@ static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
 
     for_each_vcpu ( d, v )
     {
+        if ( hvm_funcs.sync_pir_to_irr )
+            hvm_funcs.sync_pir_to_irr(v);
+
         s = vcpu_vlapic(v);
         if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
             break;
@@ -1230,6 +1233,9 @@ static int lapic_load_regs(struct domain *d, hvm_domain_context_t *h)
     if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 ) 
         return -EINVAL;
 
+    if ( hvm_funcs.update_eoi_exit_bitmap )
+        hvm_funcs.update_eoi_exit_bitmap(v, 0, 0, 1);
+
     if ( hvm_funcs.process_isr )
         hvm_funcs.process_isr(vlapic_find_highest_isr(s), v);
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 2caa04a..9a2dfd6 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1567,12 +1567,28 @@ static void vmx_set_info_guest(struct vcpu *v)
     vmx_vmcs_exit(v);
 }
 
-static void vmx_update_eoi_exit_bitmap(struct vcpu *v, u8 vector, u8 trig)
+static void vmx_rebuild_eoi_exit_bitmap(struct vcpu *v)
 {
-    if ( trig )
-        vmx_set_eoi_exit_bitmap(v, vector);
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    unsigned int vector;
+
+    for ( vector = 0; vector < NR_VECTORS; vector++ )
+        if ( vlapic_test_vector(vector, &vlapic->regs->data[APIC_IRR]) )
+            set_bit(vector, v->arch.hvm_vmx.eoi_exit_bitmap);
+}
+
+static void vmx_update_eoi_exit_bitmap(struct vcpu *v, u8 vector,
+                                       u8 trig, bool_t rebuild)
+{
+    if ( rebuild )
+        vmx_rebuild_eoi_exit_bitmap(v);
     else
-        vmx_clear_eoi_exit_bitmap(v, vector);
+    {
+        if ( trig )
+            vmx_set_eoi_exit_bitmap(v, vector);
+        else
+            vmx_clear_eoi_exit_bitmap(v, vector);
+    }
 }
 
 static int vmx_virtual_intr_delivery_enabled(void)
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 0ebd478..3e2e365 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -189,7 +189,8 @@ struct hvm_function_table {
     void (*nhvm_domain_relinquish_resources)(struct domain *d);
 
     /* Virtual interrupt delivery */
-    void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector, u8 trig);
+    void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector,
+                                   u8 trig, bool_t rebuild);
     int (*virtual_intr_delivery_enabled)(void);
     void (*process_isr)(int isr, struct vcpu *v);
     void (*deliver_posted_intr)(struct vcpu *v, u8 vector);
diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
index 66f0aff..2a83a1e 100644
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -58,6 +58,8 @@
 
 #define VEC_POS(v) ((v) % 32)
 #define REG_POS(v) (((v) / 32) * 0x10)
+#define vlapic_test_vector(vec, bitmap)                         \
+    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_set_vector(vec, bitmap)                         \
     test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_clear_vector(vec, bitmap)

Best regards,
Yang

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-17  9:29           ` Zhang, Yang Z
@ 2014-07-18 13:58             ` Olaf Hering
  2014-07-22 23:06               ` Zhang, Yang Z
  2014-07-25 21:38             ` Tian, Kevin
  1 sibling, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-07-18 13:58 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

On Thu, Jul 17, Zhang, Yang Z wrote:

> Olaf Hering wrote on 2014-07-17:
> > I can probably do that. And I think initially I just changed
> > lapic_save_regs, which was not enough. I think in the end the reason
> > is that with posted irqs the code paths are different and just
> > having IRR set is not enough.
> It doesn't make sense. PIR is more like a tmp_IRR. All interrupts
> pending in PIR will finally sync to IRR by hardware. In your case, you
> already record it in IRR, it should be enough.  The problem I am
> seeing is that the eoi_exit_bitmap need to be setup correct after
> migration, please try attached patch to see whether it solves your
> problem:

No, this patch does not work for me. Does it work for you?

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-18 13:58             ` Olaf Hering
@ 2014-07-22 23:06               ` Zhang, Yang Z
  2014-07-23  8:03                 ` Olaf Hering
  0 siblings, 1 reply; 60+ messages in thread
From: Zhang, Yang Z @ 2014-07-22 23:06 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

Olaf Hering wrote on 2014-07-18:
> On Thu, Jul 17, Zhang, Yang Z wrote:
> 
>> Olaf Hering wrote on 2014-07-17:
>>> I can probably do that. And I think initially I just changed
>>> lapic_save_regs, which was not enough. I think in the end the
>>> reason is that with posted irqs the code paths are different and
>>> just having IRR set is not enough.
>> It doesn't make sense. PIR is more like a tmp_IRR. All interrupts
>> pending in PIR will finally sync to IRR by hardware. In your case,
>> you already record it in IRR, it should be enough.  The problem I am
>> seeing is that the eoi_exit_bitmap need to be setup correct after
>> migration, please try attached patch to see whether it solves your
>> problem:
> 
> No, this patch does not work for me. Does it work for you?

I cannot reproduce this issue in my side. It still need your help to figure out it.

> 
> Olaf


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-22 23:06               ` Zhang, Yang Z
@ 2014-07-23  8:03                 ` Olaf Hering
  0 siblings, 0 replies; 60+ messages in thread
From: Olaf Hering @ 2014-07-23  8:03 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

On Tue, Jul 22, Zhang, Yang Z wrote:

> I cannot reproduce this issue in my side. It still need your help to
> figure out it.

Please try the sles11sp3-dvd1.iso, assign it to a HVM guest and boot
into the installer with 'start_shell netsetup=dhcp'. Then save/restore
the guest. "xenstore-ls -f | sort | grep state" will show "4" before
save and "2" after restore.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-03 15:09 [PATCH] x86/hvm: implement save/restore for posted interrupts Olaf Hering
  2014-07-03 15:27 ` Jan Beulich
  2014-07-09 11:24 ` Zhang, Yang Z
@ 2014-07-25  9:40 ` Jan Beulich
  2014-07-25 12:49   ` Tian, Kevin
  2 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2014-07-25  9:40 UTC (permalink / raw)
  To: Kevin Tian, Yang Z Zhang
  Cc: Olaf Hering, Keir Fraser, Eddie Dong, Donald D Dugger, xen-devel,
	Dongxiao Xu, Jun Nakajima

>>> On 03.07.14 at 17:09, <olaf@aepfle.de> wrote:
> Saving and restoring a PVonHVM guest on a host which has the VMX
> "Posted Interrupt Processing" feature enabled will fail because the
> xen-platform-pci device does not receive interrupts anymore after
> restore. The reason is that the IRQs are not maintained in APIC_IRR,
> but in a separate PIR array. This info is lost during the save
> operation.
> 
> Syncing the PIR state into IRR during save, and restoring the state
> later will fix 'xm save/restore' of a PVonHVM guest.
> 
> Signed-off-by: Olaf Hering <olaf@aepfle.de>

Guys, where are we with this? Afaict all simplification suggestions
you provided so far yielded a non-working patch. Are you actively
working on getting a working alternative, or can we get an ack on
what Olaf submitted?

Thanks, Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-25  9:40 ` Jan Beulich
@ 2014-07-25 12:49   ` Tian, Kevin
  2014-07-25 13:59     ` Jan Beulich
  0 siblings, 1 reply; 60+ messages in thread
From: Tian, Kevin @ 2014-07-25 12:49 UTC (permalink / raw)
  To: Jan Beulich, Zhang, Yang Z
  Cc: Olaf Hering, Keir Fraser, Dong, Eddie, Dugger, Donald D,
	xen-devel, Xu, Dongxiao, Nakajima, Jun

> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Friday, July 25, 2014 2:41 AM
> 
> >>> On 03.07.14 at 17:09, <olaf@aepfle.de> wrote:
> > Saving and restoring a PVonHVM guest on a host which has the VMX
> > "Posted Interrupt Processing" feature enabled will fail because the
> > xen-platform-pci device does not receive interrupts anymore after
> > restore. The reason is that the IRQs are not maintained in APIC_IRR,
> > but in a separate PIR array. This info is lost during the save
> > operation.
> >
> > Syncing the PIR state into IRR during save, and restoring the state
> > later will fix 'xm save/restore' of a PVonHVM guest.
> >
> > Signed-off-by: Olaf Hering <olaf@aepfle.de>
> 
> Guys, where are we with this? Afaict all simplification suggestions
> you provided so far yielded a non-working patch. Are you actively
> working on getting a working alternative, or can we get an ack on
> what Olaf submitted?
> 

I hope Olaf can figure out why sync_irr_to_pir is actually required before
I can ack.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-25 12:49   ` Tian, Kevin
@ 2014-07-25 13:59     ` Jan Beulich
  2014-07-25 21:31       ` Tian, Kevin
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2014-07-25 13:59 UTC (permalink / raw)
  To: Kevin Tian
  Cc: Olaf Hering, Keir Fraser, Eddie Dong, Donald D Dugger, xen-devel,
	Dongxiao Xu, Jun Nakajima, Yang Z Zhang

>>> On 25.07.14 at 14:49, <kevin.tian@intel.com> wrote:
>>  From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: Friday, July 25, 2014 2:41 AM
>> 
>> >>> On 03.07.14 at 17:09, <olaf@aepfle.de> wrote:
>> > Saving and restoring a PVonHVM guest on a host which has the VMX
>> > "Posted Interrupt Processing" feature enabled will fail because the
>> > xen-platform-pci device does not receive interrupts anymore after
>> > restore. The reason is that the IRQs are not maintained in APIC_IRR,
>> > but in a separate PIR array. This info is lost during the save
>> > operation.
>> >
>> > Syncing the PIR state into IRR during save, and restoring the state
>> > later will fix 'xm save/restore' of a PVonHVM guest.
>> >
>> > Signed-off-by: Olaf Hering <olaf@aepfle.de>
>> 
>> Guys, where are we with this? Afaict all simplification suggestions
>> you provided so far yielded a non-working patch. Are you actively
>> working on getting a working alternative, or can we get an ack on
>> what Olaf submitted?
>> 
> 
> I hope Olaf can figure out why sync_irr_to_pir is actually required before
> I can ack.

I was thinking it would be the other way around: You suggesting it's
not needed (which is wrong according to observations) would offer
an explanation and/or a working alternative fix. I don't think you can
expect Olaf to explain why what you suggest doesn't work, even
more so with _you_ being the VMX maintainer.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-25 13:59     ` Jan Beulich
@ 2014-07-25 21:31       ` Tian, Kevin
  2014-07-28  6:46         ` Jan Beulich
  0 siblings, 1 reply; 60+ messages in thread
From: Tian, Kevin @ 2014-07-25 21:31 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Olaf Hering, Keir Fraser, Dong, Eddie, Dugger, Donald D,
	xen-devel, Xu, Dongxiao, Nakajima, Jun, Zhang, Yang Z

> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Friday, July 25, 2014 7:00 AM
> 
> >>> On 25.07.14 at 14:49, <kevin.tian@intel.com> wrote:
> >>  From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: Friday, July 25, 2014 2:41 AM
> >>
> >> >>> On 03.07.14 at 17:09, <olaf@aepfle.de> wrote:
> >> > Saving and restoring a PVonHVM guest on a host which has the VMX
> >> > "Posted Interrupt Processing" feature enabled will fail because the
> >> > xen-platform-pci device does not receive interrupts anymore after
> >> > restore. The reason is that the IRQs are not maintained in APIC_IRR,
> >> > but in a separate PIR array. This info is lost during the save
> >> > operation.
> >> >
> >> > Syncing the PIR state into IRR during save, and restoring the state
> >> > later will fix 'xm save/restore' of a PVonHVM guest.
> >> >
> >> > Signed-off-by: Olaf Hering <olaf@aepfle.de>
> >>
> >> Guys, where are we with this? Afaict all simplification suggestions
> >> you provided so far yielded a non-working patch. Are you actively
> >> working on getting a working alternative, or can we get an ack on
> >> what Olaf submitted?
> >>
> >
> > I hope Olaf can figure out why sync_irr_to_pir is actually required before
> > I can ack.
> 
> I was thinking it would be the other way around: You suggesting it's
> not needed (which is wrong according to observations) would offer
> an explanation and/or a working alternative fix. I don't think you can
> expect Olaf to explain why what you suggest doesn't work, even
> more so with _you_ being the VMX maintainer.
> 

Well, my read of this patch is that it hides some problem other place by
forcing posted injection at restore. As Yang has pointed out, it is not
necessary once the notification has been synced to IRR it's done (will 
be noted before vmentry). So at restore path, it's just about how pending 
IRR is handled. Now the problem is that Yang can't reproduce the problem 
locally (let's see any change with Olaf's further information), so we need 
Olaf's help to figure out the real culprit with our input.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-17  9:29           ` Zhang, Yang Z
  2014-07-18 13:58             ` Olaf Hering
@ 2014-07-25 21:38             ` Tian, Kevin
  2014-07-28  6:52               ` Olaf Hering
  1 sibling, 1 reply; 60+ messages in thread
From: Tian, Kevin @ 2014-07-25 21:38 UTC (permalink / raw)
  To: Zhang, Yang Z, Olaf Hering
  Cc: Keir Fraser, Jan Beulich, Dong, Eddie, xen-devel, Xu, Dongxiao,
	Nakajima, Jun

> From: Zhang, Yang Z
> Sent: Thursday, July 17, 2014 2:30 AM
> 
> Olaf Hering wrote on 2014-07-17:
> > On Wed, Jul 16, Tian, Kevin wrote:
> >
> >> could you figure out the reason? irr->pir propagation should happen
> >> automatically when injecting a pending irr, which is supposed to be
> >> triggered in the restore path. Otherwise I don't understand how it's
> >> handled even w/o pir concept...
> >
> > I can probably do that. And I think initially I just changed
> > lapic_save_regs, which was not enough. I think in the end the reason
> > is that with posted irqs the code paths are different and just having IRR set is
> not enough.
> 
> It doesn't make sense. PIR is more like a tmp_IRR. All interrupts pending in PIR
> will finally sync to IRR by hardware. In your case, you already record it in IRR,
> it should be enough.
> The problem I am seeing is that the eoi_exit_bitmap need to be setup correct
> after migration, please try attached patch to see whether it solves your
> problem:

Hi, Yang,

I'm wondering whether eoi_exit_bitmap really matters here (though necessary),
because it is not about injection?

Olaf, could you help with one experiment, to check any pending IRR at restore
when disabling posted interrupt? I'm thinking whether this would be a more
general problem (though unlikely)...

btw, did you run any workload cross save/restore? does it only happen with
xen-platform-pci device?

Thanks
Kevin

> 
> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> index cd7e872..d9368a7 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -131,7 +131,7 @@ void vlapic_set_irq(struct vlapic *vlapic, uint8_t vec,
> uint8_t trig)
>          vlapic_set_vector(vec, &vlapic->regs->data[APIC_TMR]);
> 
>      if ( hvm_funcs.update_eoi_exit_bitmap )
> -        hvm_funcs.update_eoi_exit_bitmap(target, vec, trig);
> +        hvm_funcs.update_eoi_exit_bitmap(target, vec, trig, 0);
> 
>      if ( hvm_funcs.deliver_posted_intr )
>          hvm_funcs.deliver_posted_intr(target, vec);
> @@ -1179,6 +1179,9 @@ static int lapic_save_regs(struct domain *d,
> hvm_domain_context_t *h)
> 
>      for_each_vcpu ( d, v )
>      {
> +        if ( hvm_funcs.sync_pir_to_irr )
> +            hvm_funcs.sync_pir_to_irr(v);
> +
>          s = vcpu_vlapic(v);
>          if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) !=
> 0 )
>              break;
> @@ -1230,6 +1233,9 @@ static int lapic_load_regs(struct domain *d,
> hvm_domain_context_t *h)
>      if ( hvm_load_entry(LAPIC_REGS, h, s->regs) != 0 )
>          return -EINVAL;
> 
> +    if ( hvm_funcs.update_eoi_exit_bitmap )
> +        hvm_funcs.update_eoi_exit_bitmap(v, 0, 0, 1);
> +
>      if ( hvm_funcs.process_isr )
>          hvm_funcs.process_isr(vlapic_find_highest_isr(s), v);
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 2caa04a..9a2dfd6 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1567,12 +1567,28 @@ static void vmx_set_info_guest(struct vcpu *v)
>      vmx_vmcs_exit(v);
>  }
> 
> -static void vmx_update_eoi_exit_bitmap(struct vcpu *v, u8 vector, u8 trig)
> +static void vmx_rebuild_eoi_exit_bitmap(struct vcpu *v)
>  {
> -    if ( trig )
> -        vmx_set_eoi_exit_bitmap(v, vector);
> +    struct vlapic *vlapic = vcpu_vlapic(v);
> +    unsigned int vector;
> +
> +    for ( vector = 0; vector < NR_VECTORS; vector++ )
> +        if ( vlapic_test_vector(vector, &vlapic->regs->data[APIC_IRR]) )
> +            set_bit(vector, v->arch.hvm_vmx.eoi_exit_bitmap);
> +}
> +
> +static void vmx_update_eoi_exit_bitmap(struct vcpu *v, u8 vector,
> +                                       u8 trig, bool_t rebuild)
> +{
> +    if ( rebuild )
> +        vmx_rebuild_eoi_exit_bitmap(v);
>      else
> -        vmx_clear_eoi_exit_bitmap(v, vector);
> +    {
> +        if ( trig )
> +            vmx_set_eoi_exit_bitmap(v, vector);
> +        else
> +            vmx_clear_eoi_exit_bitmap(v, vector);
> +    }
>  }
> 
>  static int vmx_virtual_intr_delivery_enabled(void)
> diff --git a/xen/include/asm-x86/hvm/hvm.h
> b/xen/include/asm-x86/hvm/hvm.h
> index 0ebd478..3e2e365 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -189,7 +189,8 @@ struct hvm_function_table {
>      void (*nhvm_domain_relinquish_resources)(struct domain *d);
> 
>      /* Virtual interrupt delivery */
> -    void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector, u8 trig);
> +    void (*update_eoi_exit_bitmap)(struct vcpu *v, u8 vector,
> +                                   u8 trig, bool_t rebuild);
>      int (*virtual_intr_delivery_enabled)(void);
>      void (*process_isr)(int isr, struct vcpu *v);
>      void (*deliver_posted_intr)(struct vcpu *v, u8 vector);
> diff --git a/xen/include/asm-x86/hvm/vlapic.h
> b/xen/include/asm-x86/hvm/vlapic.h
> index 66f0aff..2a83a1e 100644
> --- a/xen/include/asm-x86/hvm/vlapic.h
> +++ b/xen/include/asm-x86/hvm/vlapic.h
> @@ -58,6 +58,8 @@
> 
>  #define VEC_POS(v) ((v) % 32)
>  #define REG_POS(v) (((v) / 32) * 0x10)
> +#define vlapic_test_vector(vec, bitmap)                         \
> +    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_set_vector(vec, bitmap)
> \
>      test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_clear_vector(vec, bitmap)
> 
> Best regards,
> Yang
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-25 21:31       ` Tian, Kevin
@ 2014-07-28  6:46         ` Jan Beulich
  2014-07-28  7:52           ` Wu, Feng
  2014-07-28  8:17           ` Zhang, Yang Z
  0 siblings, 2 replies; 60+ messages in thread
From: Jan Beulich @ 2014-07-28  6:46 UTC (permalink / raw)
  To: Kevin Tian
  Cc: Olaf Hering, Keir Fraser, Eddie Dong, Donald D Dugger, xen-devel,
	Dongxiao Xu, Jun Nakajima, Yang Z Zhang

>>> On 25.07.14 at 23:31, <kevin.tian@intel.com> wrote:
> Well, my read of this patch is that it hides some problem other place by
> forcing posted injection at restore. As Yang has pointed out, it is not
> necessary once the notification has been synced to IRR it's done (will 
> be noted before vmentry). So at restore path, it's just about how pending 
> IRR is handled. Now the problem is that Yang can't reproduce the problem 
> locally (let's see any change with Olaf's further information), so we need 
> Olaf's help to figure out the real culprit with our input.

Searching the SDM I can't find any reference to IRR uses during
interrupt recognition. The only reference I can find is that during
delivery the IRR bit gets cleared and the new RVI determined by
scanning IRR. Hence I wonder whether setting RVI post-restore
instead of sync-ing IRR to PIR is what is needed?

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-25 21:38             ` Tian, Kevin
@ 2014-07-28  6:52               ` Olaf Hering
  2014-07-29  7:48                 ` Zhang, Yang Z
  0 siblings, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-07-28  6:52 UTC (permalink / raw)
  To: Tian, Kevin
  Cc: Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel, Xu, Dongxiao,
	Jan Beulich, Zhang, Yang Z

On Fri, Jul 25, Tian, Kevin wrote:

> btw, did you run any workload cross save/restore? does it only happen with
> xen-platform-pci device?

Yes, only with xen-platform-pci. And in my testing only when the
interface provided by netfront is up (ip link set up dev eth0).

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-28  6:46         ` Jan Beulich
@ 2014-07-28  7:52           ` Wu, Feng
  2014-07-28  8:17           ` Zhang, Yang Z
  1 sibling, 0 replies; 60+ messages in thread
From: Wu, Feng @ 2014-07-28  7:52 UTC (permalink / raw)
  To: Jan Beulich, Tian, Kevin
  Cc: Olaf Hering, Keir Fraser, Dong, Eddie, Dugger, Donald D,
	xen-devel, Xu, Dongxiao, Nakajima, Jun, Zhang, Yang Z



> -----Original Message-----
> From: xen-devel-bounces@lists.xen.org
> [mailto:xen-devel-bounces@lists.xen.org] On Behalf Of Jan Beulich
> Sent: Monday, July 28, 2014 2:47 PM
> To: Tian, Kevin
> Cc: Olaf Hering; Keir Fraser; Dong, Eddie; Dugger, Donald D;
> xen-devel@lists.xen.org; Xu, Dongxiao; Nakajima, Jun; Zhang, Yang Z
> Subject: Re: [Xen-devel] [PATCH] x86/hvm: implement save/restore for posted
> interrupts
> 
> >>> On 25.07.14 at 23:31, <kevin.tian@intel.com> wrote:
> > Well, my read of this patch is that it hides some problem other place by
> > forcing posted injection at restore. As Yang has pointed out, it is not
> > necessary once the notification has been synced to IRR it's done (will
> > be noted before vmentry). So at restore path, it's just about how pending
> > IRR is handled. Now the problem is that Yang can't reproduce the problem
> > locally (let's see any change with Olaf's further information), so we need
> > Olaf's help to figure out the real culprit with our input.
> 
> Searching the SDM I can't find any reference to IRR uses during
> interrupt recognition. The only reference I can find is that during
> delivery the IRR bit gets cleared and the new RVI determined by
> scanning IRR. Hence I wonder whether setting RVI post-restore
> instead of sync-ing IRR to PIR is what is needed?

If RVI needs to be restored, what about SVI?

Thanks,
Feng

> 
> Jan
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-28  6:46         ` Jan Beulich
  2014-07-28  7:52           ` Wu, Feng
@ 2014-07-28  8:17           ` Zhang, Yang Z
  2014-07-28  9:02             ` Jan Beulich
  1 sibling, 1 reply; 60+ messages in thread
From: Zhang, Yang Z @ 2014-07-28  8:17 UTC (permalink / raw)
  To: Jan Beulich, Tian, Kevin
  Cc: Olaf Hering, Keir Fraser, Dong, Eddie, Dugger, Donald D,
	xen-devel, Xu, Dongxiao, Nakajima, Jun

Jan Beulich wrote on 2014-07-28:
>>>> On 25.07.14 at 23:31, <kevin.tian@intel.com> wrote:
>> Well, my read of this patch is that it hides some problem other
>> place by forcing posted injection at restore. As Yang has pointed
>> out, it is not necessary once the notification has been synced to
>> IRR it's done (will be noted before vmentry). So at restore path,
>> it's just about how pending IRR is handled. Now the problem is that
>> Yang can't reproduce the problem locally (let's see any change with
>> Olaf's further information), so we need Olaf's help to figure out
>> the real culprit with
> our input.
> 
> Searching the SDM I can't find any reference to IRR uses during
> interrupt recognition. The only reference I can find is that during
> delivery the IRR bit gets cleared and the new RVI determined by
> scanning IRR. Hence I wonder whether setting RVI post-restore instead of sync-ing IRR to PIR is what is needed?

vmx_intr_assist() will do it.

> 
> Jan


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-28  8:17           ` Zhang, Yang Z
@ 2014-07-28  9:02             ` Jan Beulich
  2014-07-28  9:50               ` Liuqiming (John)
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2014-07-28  9:02 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Kevin Tian, Keir Fraser, Eddie Dong, Donald D Dugger, xen-devel,
	Dongxiao Xu, Jun Nakajima, Yang Z Zhang

>>> On 28.07.14 at 10:17, <yang.z.zhang@intel.com> wrote:
> Jan Beulich wrote on 2014-07-28:
>>>>> On 25.07.14 at 23:31, <kevin.tian@intel.com> wrote:
>>> Well, my read of this patch is that it hides some problem other
>>> place by forcing posted injection at restore. As Yang has pointed
>>> out, it is not necessary once the notification has been synced to
>>> IRR it's done (will be noted before vmentry). So at restore path,
>>> it's just about how pending IRR is handled. Now the problem is that
>>> Yang can't reproduce the problem locally (let's see any change with
>>> Olaf's further information), so we need Olaf's help to figure out
>>> the real culprit with
>> our input.
>> 
>> Searching the SDM I can't find any reference to IRR uses during
>> interrupt recognition. The only reference I can find is that during
>> delivery the IRR bit gets cleared and the new RVI determined by
>> scanning IRR. Hence I wonder whether setting RVI post-restore instead of 
> sync-ing IRR to PIR is what is needed?
> 
> vmx_intr_assist() will do it.

Olaf,

any chance you could check that this is actually happening?

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-28  9:02             ` Jan Beulich
@ 2014-07-28  9:50               ` Liuqiming (John)
  0 siblings, 0 replies; 60+ messages in thread
From: Liuqiming (John) @ 2014-07-28  9:50 UTC (permalink / raw)
  To: Jan Beulich, Olaf Hering
  Cc: Kevin Tian, Keir Fraser, Luonengjun, Yanqiangjun, Eddie Dong,
	Donald D Dugger, xen-devel, Dongxiao Xu, Fanhenglong,
	Jun Nakajima, Yang Z Zhang

Hi Olaf,

I think I had meet the same problem when I tried to backport apic-v to xen 4.1.2.

When apic-v enabled, suse11 guest will hang after restore. And I tried other OSs, the result like this

Suse11 sp1,suse11 sp2, suse11 sp3 have this problem
Ubuntu12, suse10sp1,redhat 5.5 do not have this problem

And here is the odd thing:
If you disable all network device before save, then suse11 will not hang either.

So I think it is a pvdriver issue, and after some investigation, I pretty sure it is eventchannel handle issue in certain kernel version,
after this commit https://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/commit/?id=76d2160147f43f982dfe881404cfde9fd0a9da21

I don't understand what happens extractly, it seems that when pvdriver try to suspend device it will disable irq, but because above kernel commit, the disable irq function not work.
So in vioapic route table of guest, remote_irr field of this irq is not cleared, and after restore xen will not deliver event channel irq to this guest because remote_irr has pending irq.

I'm not sure is this the same problem, but you can try my patch. This patch is base on xen 4.1.2, and it is a temporary solution. Hope it helps.

diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index acc9197..0d064db 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -270,6 +270,9 @@ void hvm_set_callback_via(struct domain *d, uint64_t via)
     unsigned int gsi=0, pdev=0, pintx=0;
     uint8_t via_type;

+    struct hvm_hw_vioapic *vioapic = domain_vioapic(d);
+    union vioapic_redir_entry *ent = NULL;
+
     via_type = (uint8_t)(via >> 56) + 1;
     if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) ||
          (via_type > HVMIRQ_callback_vector) )
@@ -290,6 +293,14 @@ void hvm_set_callback_via(struct domain *d, uint64_t via)
         case HVMIRQ_callback_pci_intx:
             pdev  = hvm_irq->callback_via.pci.dev;
             pintx = hvm_irq->callback_via.pci.intx;
+
+            gsi = hvm_pci_intx_gsi(pdev, pintx);
+            ent = &vioapic->redirtbl[gsi];
+            if ( !ent->fields.mask && ent->fields.trig_mode != VIOAPIC_EDGE_TRIG ){
+                printk("clear remote_irr when set HVMIRQ callback\n");
+                ent->fields.remote_irr = 0;
+            }
+
             __hvm_pci_intx_deassert(d, pdev, pintx);
             break;
         default:


> -----Original Message-----
> From: xen-devel-bounces@lists.xen.org
> [mailto:xen-devel-bounces@lists.xen.org] On Behalf Of Jan Beulich
> Sent: Monday, July 28, 2014 5:02 PM
> To: Olaf Hering
> Cc: Kevin Tian; Keir Fraser; Eddie Dong; Donald D Dugger;
> xen-devel@lists.xen.org; Dongxiao Xu; Jun Nakajima; Yang Z Zhang
> Subject: Re: [Xen-devel] [PATCH] x86/hvm: implement save/restore for
> posted interrupts
> 
> >>> On 28.07.14 at 10:17, <yang.z.zhang@intel.com> wrote:
> > Jan Beulich wrote on 2014-07-28:
> >>>>> On 25.07.14 at 23:31, <kevin.tian@intel.com> wrote:
> >>> Well, my read of this patch is that it hides some problem other
> >>> place by forcing posted injection at restore. As Yang has pointed
> >>> out, it is not necessary once the notification has been synced to
> >>> IRR it's done (will be noted before vmentry). So at restore path,
> >>> it's just about how pending IRR is handled. Now the problem is that
> >>> Yang can't reproduce the problem locally (let's see any change with
> >>> Olaf's further information), so we need Olaf's help to figure out
> >>> the real culprit with
> >> our input.
> >>
> >> Searching the SDM I can't find any reference to IRR uses during
> >> interrupt recognition. The only reference I can find is that during
> >> delivery the IRR bit gets cleared and the new RVI determined by
> >> scanning IRR. Hence I wonder whether setting RVI post-restore instead of
> > sync-ing IRR to PIR is what is needed?
> >
> > vmx_intr_assist() will do it.
> 
> Olaf,
> 
> any chance you could check that this is actually happening?
> 
> Jan
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-28  6:52               ` Olaf Hering
@ 2014-07-29  7:48                 ` Zhang, Yang Z
  2014-07-29  8:27                   ` Jan Beulich
  2014-08-02 10:59                   ` Olaf Hering
  0 siblings, 2 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-07-29  7:48 UTC (permalink / raw)
  To: Olaf Hering, Tian, Kevin
  Cc: Keir Fraser, Jan Beulich, Dong, Eddie, xen-devel, Xu, Dongxiao,
	Nakajima, Jun

Olaf Hering wrote on 2014-07-28:
> On Fri, Jul 25, Tian, Kevin wrote:
> 
>> btw, did you run any workload cross save/restore? does it only
>> happen with xen-platform-pci device?
> 
> Yes, only with xen-platform-pci. And in my testing only when the
> interface provided by netfront is up (ip link set up dev eth0).
> 
> Olaf

Hi Olaf, can you try the following patch? I expect this patch will fix the issue.

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index f6409d6..f0a371b 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1517,6 +1517,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
 {
     unsigned long status;
     u8 old;
+    int vector;
+    struct vlapic *s = vcpu_vlapic(v);
 
     if ( isr < 0 )
         isr = 0;
@@ -1530,6 +1532,14 @@ static void vmx_process_isr(int isr, struct vcpu *v)
         status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
         __vmwrite(GUEST_INTR_STATUS, status);
     }
+    for ( vector = 0; vector < NR_VECTORS; vector++ )
+        if (vlapic_test_vector(vector, &s->regs->data[APIC_ISR]))
+            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
+        
+    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
+    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
+    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
+    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
     vmx_vmcs_exit(v);
 }
 
diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
index 66f0aff..2a83a1e 100644
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -58,6 +58,8 @@
 
 #define VEC_POS(v) ((v) % 32)
 #define REG_POS(v) (((v) / 32) * 0x10)
+#define vlapic_test_vector(vec, bitmap)                         \
+    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_set_vector(vec, bitmap)                         \
     test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_clear_vector(vec, bitmap)

Best regards,
Yang

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-29  7:48                 ` Zhang, Yang Z
@ 2014-07-29  8:27                   ` Jan Beulich
  2014-08-02 10:59                   ` Olaf Hering
  1 sibling, 0 replies; 60+ messages in thread
From: Jan Beulich @ 2014-07-29  8:27 UTC (permalink / raw)
  To: Yang Z Zhang
  Cc: Kevin Tian, Keir Fraser, Eddie Dong, Olaf Hering, xen-devel,
	Dongxiao Xu, Jun Nakajima

>>> On 29.07.14 at 09:48, <yang.z.zhang@intel.com> wrote:
> @@ -1530,6 +1532,14 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>          status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
>          __vmwrite(GUEST_INTR_STATUS, status);
>      }
> +    for ( vector = 0; vector < NR_VECTORS; vector++ )
> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_ISR]))
> +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
> +        
> +    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
> +    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
> +    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
> +    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);

But that gets executed always, not just once post-migration.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-07-29  7:48                 ` Zhang, Yang Z
  2014-07-29  8:27                   ` Jan Beulich
@ 2014-08-02 10:59                   ` Olaf Hering
  2014-08-04  1:08                     ` Zhang, Yang Z
  1 sibling, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-08-02 10:59 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

On Tue, Jul 29, Zhang, Yang Z wrote:

> Olaf Hering wrote on 2014-07-28:
> > On Fri, Jul 25, Tian, Kevin wrote:
> >> btw, did you run any workload cross save/restore? does it only
> >> happen with xen-platform-pci device?
> > Yes, only with xen-platform-pci. And in my testing only when the
> > interface provided by netfront is up (ip link set up dev eth0).
> Hi Olaf, can you try the following patch? I expect this patch will fix the issue.

No, that did not help. I tried it on top of Xen 4.2 from sles11sp3
update channel.

Olaf

> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index f6409d6..f0a371b 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1517,6 +1517,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>  {
>      unsigned long status;
>      u8 old;
> +    int vector;
> +    struct vlapic *s = vcpu_vlapic(v);
>  
>      if ( isr < 0 )
>          isr = 0;
> @@ -1530,6 +1532,14 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>          status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
>          __vmwrite(GUEST_INTR_STATUS, status);
>      }
> +    for ( vector = 0; vector < NR_VECTORS; vector++ )
> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_ISR]))
> +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
> +        
> +    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
> +    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
> +    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
> +    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
>      vmx_vmcs_exit(v);
>  }
>  
> diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
> index 66f0aff..2a83a1e 100644
> --- a/xen/include/asm-x86/hvm/vlapic.h
> +++ b/xen/include/asm-x86/hvm/vlapic.h
> @@ -58,6 +58,8 @@
>  
>  #define VEC_POS(v) ((v) % 32)
>  #define REG_POS(v) (((v) / 32) * 0x10)
> +#define vlapic_test_vector(vec, bitmap)                         \
> +    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_set_vector(vec, bitmap)                         \
>      test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_clear_vector(vec, bitmap)

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-08-02 10:59                   ` Olaf Hering
@ 2014-08-04  1:08                     ` Zhang, Yang Z
  2014-08-04  7:50                       ` Olaf Hering
  0 siblings, 1 reply; 60+ messages in thread
From: Zhang, Yang Z @ 2014-08-04  1:08 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

Olaf Hering wrote on 2014-08-02:
> On Tue, Jul 29, Zhang, Yang Z wrote:
> 
>> Olaf Hering wrote on 2014-07-28:
>>> On Fri, Jul 25, Tian, Kevin wrote:
>>>> btw, did you run any workload cross save/restore? does it only
>>>> happen with xen-platform-pci device?
>>> Yes, only with xen-platform-pci. And in my testing only when the
>>> interface provided by netfront is up (ip link set up dev eth0).
>> Hi Olaf, can you try the following patch? I expect this patch will fix the issue.
> 
> No, that did not help. I tried it on top of Xen 4.2 from sles11sp3 update channel.

This is really strange. With this patch, i cannot reproduce the issue with sles11sp3 on latest Xen. 

Besides, according the response from John, the issue they observed is exactly the eoi bitmap update issue.

BTW, my patch is based on your sync_pir_to_irr changes. I am not sure whether you applied the following changes when doing testing. But you should see the failure ratio is reduced obviously even without it, right?

@@ -1179,6 +1179,9 @@ static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
 
     for_each_vcpu ( d, v ) 
     {   
+        if ( hvm_funcs.sync_pir_to_irr )
+            hvm_funcs.sync_pir_to_irr(v);
+
         s = vcpu_vlapic(v);
         if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 ) 
             break;


> 
> Olaf
> 
>> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
>> index f6409d6..f0a371b 100644
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -1517,6 +1517,8 @@ static void vmx_process_isr(int isr, struct
>> vcpu
>> *v)  {
>>      unsigned long status;
>>      u8 old;
>> +    int vector;
>> +    struct vlapic *s = vcpu_vlapic(v);
>> 
>>      if ( isr < 0 )
>>          isr = 0;
>> @@ -1530,6 +1532,14 @@ static void vmx_process_isr(int isr, struct
>> vcpu
> *v)
>>          status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
>>          __vmwrite(GUEST_INTR_STATUS, status);
>>      }
>> +    for ( vector = 0; vector < NR_VECTORS; vector++ )
>> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_ISR]))
>> +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
>> +
>> +    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
>> +    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
>> +    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
>> +    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
>>      vmx_vmcs_exit(v);
>>  }
>> diff --git a/xen/include/asm-x86/hvm/vlapic.h
>> b/xen/include/asm-x86/hvm/vlapic.h
>> index 66f0aff..2a83a1e 100644
>> --- a/xen/include/asm-x86/hvm/vlapic.h
>> +++ b/xen/include/asm-x86/hvm/vlapic.h
>> @@ -58,6 +58,8 @@
>> 
>>  #define VEC_POS(v) ((v) % 32)
>>  #define REG_POS(v) (((v) / 32) * 0x10)
>> +#define vlapic_test_vector(vec, bitmap)                         \
>> +    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>>  #define vlapic_test_and_set_vector(vec, bitmap)
> \
>>      test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) +
>> REG_POS(vec)))  #define vlapic_test_and_clear_vector(vec, bitmap)


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-08-04  1:08                     ` Zhang, Yang Z
@ 2014-08-04  7:50                       ` Olaf Hering
  2014-08-04  7:57                         ` Zhang, Yang Z
  0 siblings, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-08-04  7:50 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

On Mon, Aug 04, Zhang, Yang Z wrote:

> This is really strange. With this patch, i cannot reproduce the issue with sles11sp3 on latest Xen. 
> 
> Besides, according the response from John, the issue they observed is exactly the eoi bitmap update issue.
> 
> BTW, my patch is based on your sync_pir_to_irr changes. I am not sure
> whether you applied the following changes when doing testing. But you
> should see the failure ratio is reduced obviously even without it,
> right?

I have used this patch now, the sync_pir_to_irr part was indeed missing.
But, still no joy for me. This is what I used:


---
 xen/arch/x86/hvm/vlapic.c        |    3 +++
 xen/arch/x86/hvm/vmx/vmx.c       |   10 ++++++++++
 xen/include/asm-x86/hvm/vlapic.h |    2 ++
 3 files changed, 15 insertions(+)

Index: xen-4.2.4-testing/xen/arch/x86/hvm/vlapic.c
===================================================================
--- xen-4.2.4-testing.orig/xen/arch/x86/hvm/vlapic.c
+++ xen-4.2.4-testing/xen/arch/x86/hvm/vlapic.c
@@ -1164,6 +1164,9 @@ static int lapic_save_regs(struct domain
 
     for_each_vcpu ( d, v )
     {
+        if ( hvm_funcs.sync_pir_to_irr )
+            hvm_funcs.sync_pir_to_irr(v);
+
         s = vcpu_vlapic(v);
         if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
             break;
Index: xen-4.2.4-testing/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- xen-4.2.4-testing.orig/xen/arch/x86/hvm/vmx/vmx.c
+++ xen-4.2.4-testing/xen/arch/x86/hvm/vmx/vmx.c
@@ -1606,6 +1606,8 @@ static void vmx_process_isr(int isr, str
 {
     unsigned long status;
     u8 old;
+    int vector;
+    struct vlapic *s = vcpu_vlapic(v);
 
     if ( !cpu_has_vmx_virtual_intr_delivery )
         return;
@@ -1622,6 +1624,14 @@ static void vmx_process_isr(int isr, str
         status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
         __vmwrite(GUEST_INTR_STATUS, status);
     }
+    for ( vector = 0; vector < NR_VECTORS; vector++ )
+        if (vlapic_test_vector(vector, &s->regs->data[APIC_ISR]))
+            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
+
+    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
+    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
+    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
+    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
     vmx_vmcs_exit(v);
 }
 
Index: xen-4.2.4-testing/xen/include/asm-x86/hvm/vlapic.h
===================================================================
--- xen-4.2.4-testing.orig/xen/include/asm-x86/hvm/vlapic.h
+++ xen-4.2.4-testing/xen/include/asm-x86/hvm/vlapic.h
@@ -60,6 +60,8 @@
 
 #define VEC_POS(v) ((v) % 32)
 #define REG_POS(v) (((v) / 32) * 0x10)
+#define vlapic_test_vector(vec, bitmap)                         \
+    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_set_vector(vec, bitmap)                         \
     test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_clear_vector(vec, bitmap)                       \


Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-08-04  7:50                       ` Olaf Hering
@ 2014-08-04  7:57                         ` Zhang, Yang Z
  2014-08-05 10:32                           ` Olaf Hering
  0 siblings, 1 reply; 60+ messages in thread
From: Zhang, Yang Z @ 2014-08-04  7:57 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

Olaf Hering wrote on 2014-08-04:
> On Mon, Aug 04, Zhang, Yang Z wrote:
> 
>> This is really strange. With this patch, i cannot reproduce the issue
>> with sles11sp3 on latest Xen.
>> 
>> Besides, according the response from John, the issue they observed is
>> exactly the eoi bitmap update issue.
>> 
>> BTW, my patch is based on your sync_pir_to_irr changes. I am not
>> sure whether you applied the following changes when doing testing.
>> But you should see the failure ratio is reduced obviously even
>> without it, right?
> 
> I have used this patch now, the sync_pir_to_irr part was indeed missing.
> But, still no joy for me. This is what I used:

Have you tired latest Xen? With this patch, i didn't see any issue after more than 20 rounds migration testing. Without it, the problem is observed with less than 10 rounds migration.

> 
> 
> ---
>  xen/arch/x86/hvm/vlapic.c        |    3 +++
>  xen/arch/x86/hvm/vmx/vmx.c       |   10 ++++++++++
>  xen/include/asm-x86/hvm/vlapic.h |    2 ++
>  3 files changed, 15 insertions(+)
> Index: xen-4.2.4-testing/xen/arch/x86/hvm/vlapic.c
> ================================================================ === ---
> xen-4.2.4-testing.orig/xen/arch/x86/hvm/vlapic.c +++
> xen-4.2.4-testing/xen/arch/x86/hvm/vlapic.c @@ -1164,6 +1164,9 @@ static
> int lapic_save_regs(struct domain
> 
>      for_each_vcpu ( d, v )
>      {
> +        if ( hvm_funcs.sync_pir_to_irr )
> +            hvm_funcs.sync_pir_to_irr(v);
> +
>          s = vcpu_vlapic(v);
>          if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
>              break;
> Index: xen-4.2.4-testing/xen/arch/x86/hvm/vmx/vmx.c
> ================================================================ === ---
> xen-4.2.4-testing.orig/xen/arch/x86/hvm/vmx/vmx.c +++
> xen-4.2.4-testing/xen/arch/x86/hvm/vmx/vmx.c @@ -1606,6 +1606,8 @@
> static void vmx_process_isr(int isr, str  {
>      unsigned long status;
>      u8 old;
> +    int vector;
> +    struct vlapic *s = vcpu_vlapic(v);
> 
>      if ( !cpu_has_vmx_virtual_intr_delivery )
>          return; @@ -1622,6 +1624,14 @@ static void vmx_process_isr(int
>          isr, str status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
>          __vmwrite(GUEST_INTR_STATUS, status);
>      }
> +    for ( vector = 0; vector < NR_VECTORS; vector++ ) +        if
> (vlapic_test_vector(vector, &s->regs->data[APIC_ISR])) +           
> set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap); + +   
> __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]); +   
> __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]); +   
> __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]); +   
> __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
>      vmx_vmcs_exit(v);
>  }
> Index: xen-4.2.4-testing/xen/include/asm-x86/hvm/vlapic.h
> ================================================================ === ---
> xen-4.2.4-testing.orig/xen/include/asm-x86/hvm/vlapic.h +++
> xen-4.2.4-testing/xen/include/asm-x86/hvm/vlapic.h @@ -60,6 +60,8 @@
> 
>  #define VEC_POS(v) ((v) % 32)
>  #define REG_POS(v) (((v) / 32) * 0x10)
> +#define vlapic_test_vector(vec, bitmap)                         \
> +    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_set_vector(vec, bitmap) \
>      test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) +
> REG_POS(vec)))  #define vlapic_test_and_clear_vector(vec, bitmap) \
> 
> 
> Olaf


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-08-04  7:57                         ` Zhang, Yang Z
@ 2014-08-05 10:32                           ` Olaf Hering
  2014-08-08  0:18                             ` Zhang, Yang Z
                                               ` (2 more replies)
  0 siblings, 3 replies; 60+ messages in thread
From: Olaf Hering @ 2014-08-05 10:32 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

On Mon, Aug 04, Zhang, Yang Z wrote:

> Have you tired latest Xen? With this patch, i didn't see any issue
> after more than 20 rounds migration testing. Without it, the problem
> is observed with less than 10 rounds migration.

I use save/restore, and even with the change the 'state' remains "2"
right away. My base is a7d8ce7 ("x86/gdbsx: security audit of
{,un}pausevcpu and domstatus hypercalls").

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-08-05 10:32                           ` Olaf Hering
@ 2014-08-08  0:18                             ` Zhang, Yang Z
  2014-09-01  6:44                             ` Zhang, Yang Z
  2014-09-18  3:29                             ` Zhang, Yang Z
  2 siblings, 0 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-08-08  0:18 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Keir Fraser, Nakajima, Jun, Dong, Eddie, xen-devel,
	Xu, Dongxiao, Jan Beulich

Olaf Hering wrote on 2014-08-05:
> On Mon, Aug 04, Zhang, Yang Z wrote:
> 
>> Have you tired latest Xen? With this patch, i didn't see any issue
>> after more than 20 rounds migration testing. Without it, the problem
>> is observed with less than 10 rounds migration.
> 
> I use save/restore, and even with the change the 'state' remains "2"
> right away. My base is a7d8ce7 ("x86/gdbsx: security audit of
> {,un}pausevcpu and domstatus hypercalls").

Can you tell how you reproduce it? I am following the steps you told me previously, but I cannot reproduce it with this fixing.

> 
> Olaf


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-08-05 10:32                           ` Olaf Hering
  2014-08-08  0:18                             ` Zhang, Yang Z
@ 2014-09-01  6:44                             ` Zhang, Yang Z
  2014-09-18  3:29                             ` Zhang, Yang Z
  2 siblings, 0 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-09-01  6:44 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Keir Fraser, Jan Beulich, Dong, Eddie, xen-devel,
	Nakajima, Jun

Zhang, Yang Z wrote on 2014-08-08:
> Olaf Hering wrote on 2014-08-05:
>> On Mon, Aug 04, Zhang, Yang Z wrote:
>> 
>>> Have you tired latest Xen? With this patch, i didn't see any issue
>>> after more than 20 rounds migration testing. Without it, the
>>> problem is observed with less than 10 rounds migration.
>> 
>> I use save/restore, and even with the change the 'state' remains "2"
>> right away. My base is a7d8ce7 ("x86/gdbsx: security audit of
>> {,un}pausevcpu and domstatus hypercalls").
> 
> Can you tell how you reproduce it? I am following the steps you told
> me previously, but I cannot reproduce it with this fixing.

Any update on this?

> 
>> 
>> Olaf
> 
> 
> Best regards,
> Yang
>


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-08-05 10:32                           ` Olaf Hering
  2014-08-08  0:18                             ` Zhang, Yang Z
  2014-09-01  6:44                             ` Zhang, Yang Z
@ 2014-09-18  3:29                             ` Zhang, Yang Z
  2014-09-19 13:32                               ` Olaf Hering
  2 siblings, 1 reply; 60+ messages in thread
From: Zhang, Yang Z @ 2014-09-18  3:29 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Keir Fraser, Jan Beulich, Dong, Eddie, xen-devel,
	Nakajima, Jun

Zhang, Yang Z wrote on 2014-09-01:
> Zhang, Yang Z wrote on 2014-08-08:
>> Olaf Hering wrote on 2014-08-05:
>>> On Mon, Aug 04, Zhang, Yang Z wrote:
>>> 
>>>> Have you tired latest Xen? With this patch, i didn't see any issue
>>>> after more than 20 rounds migration testing. Without it, the
>>>> problem is observed with less than 10 rounds migration.
>>> 
>>> I use save/restore, and even with the change the 'state' remains "2"
>>> right away. My base is a7d8ce7 ("x86/gdbsx: security audit of
>>> {,un}pausevcpu and domstatus hypercalls").
>> 
>> Can you tell how you reproduce it? I am following the steps you told
>> me previously, but I cannot reproduce it with this fixing.
> 
> Any update on this?

Ping again.

> 
>> 
>>> 
>>> Olaf
>> 
>> 
>> Best regards,
>> Yang
>> 
> 
> 
> Best regards,
> Yang


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-09-18  3:29                             ` Zhang, Yang Z
@ 2014-09-19 13:32                               ` Olaf Hering
  2014-09-19 13:39                                 ` Jan Beulich
  2014-09-30 16:19                                 ` Jan Beulich
  0 siblings, 2 replies; 60+ messages in thread
From: Olaf Hering @ 2014-09-19 13:32 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Keir Fraser, Jan Beulich, Dong, Eddie, xen-devel,
	Nakajima, Jun

On Thu, Sep 18, Zhang, Yang Z wrote:

> Zhang, Yang Z wrote on 2014-09-01:
> > Zhang, Yang Z wrote on 2014-08-08:
> >> Can you tell how you reproduce it? I am following the steps you told
> >> me previously, but I cannot reproduce it with this fixing.
> > Any update on this?
> Ping again.

I will have a look at this issue next week.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-09-19 13:32                               ` Olaf Hering
@ 2014-09-19 13:39                                 ` Jan Beulich
  2014-09-19 17:51                                   ` Andrew Cooper
  2014-09-30 16:19                                 ` Jan Beulich
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2014-09-19 13:39 UTC (permalink / raw)
  To: Olaf Hering, Yang Z Zhang
  Cc: Keir Fraser, Kevin Tian, Eddie Dong, Jun Nakajima, xen-devel

>>> On 19.09.14 at 15:32, <olaf@aepfle.de> wrote:
> On Thu, Sep 18, Zhang, Yang Z wrote:
> 
>> Zhang, Yang Z wrote on 2014-09-01:
>> > Zhang, Yang Z wrote on 2014-08-08:
>> >> Can you tell how you reproduce it? I am following the steps you told
>> >> me previously, but I cannot reproduce it with this fixing.
>> > Any update on this?
>> Ping again.
> 
> I will have a look at this issue next week.

Btw., Andrew Cooper hinted at the possibility of this series

http://lists.xenproject.org/archives/html/xen-devel/2014-09/msg02753.html

(particularly patch 2) having an effect on migration problems
between APICV-capable and -incapable systems, i.e. perhaps
worth a try in this context too.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-09-19 13:39                                 ` Jan Beulich
@ 2014-09-19 17:51                                   ` Andrew Cooper
  2014-09-22 22:43                                     ` Tian, Kevin
  0 siblings, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2014-09-19 17:51 UTC (permalink / raw)
  To: Jan Beulich, Olaf Hering, Yang Z Zhang
  Cc: Eddie Dong, Kevin Tian, Keir Fraser, Jun Nakajima, xen-devel

On 19/09/2014 14:39, Jan Beulich wrote:
>>>> On 19.09.14 at 15:32, <olaf@aepfle.de> wrote:
>> On Thu, Sep 18, Zhang, Yang Z wrote:
>>
>>> Zhang, Yang Z wrote on 2014-09-01:
>>>> Zhang, Yang Z wrote on 2014-08-08:
>>>>> Can you tell how you reproduce it? I am following the steps you told
>>>>> me previously, but I cannot reproduce it with this fixing.
>>>> Any update on this?
>>> Ping again.
>> I will have a look at this issue next week.
> Btw., Andrew Cooper hinted at the possibility of this series
>
> http://lists.xenproject.org/archives/html/xen-devel/2014-09/msg02753.html
>
> (particularly patch 2) having an effect on migration problems
> between APICV-capable and -incapable systems, i.e. perhaps
> worth a try in this context too.
>
> Jan

The issue XenServer observes is that problems occur even when migrating 
between two identical machines, one with APICV enabled, and one disabled.

Given the now-discovered issues with Xen's x2apic handling, it is 
entirely possible that the differences between the emulated and real 
APICs was enough to cause issues.   And yet, it could be a mixture of 
all of the issues identified so far.

~Andrew

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-09-19 17:51                                   ` Andrew Cooper
@ 2014-09-22 22:43                                     ` Tian, Kevin
  2014-09-24 16:26                                       ` Malcolm Crossley
  0 siblings, 1 reply; 60+ messages in thread
From: Tian, Kevin @ 2014-09-22 22:43 UTC (permalink / raw)
  To: Andrew Cooper, Jan Beulich, Olaf Hering, Zhang, Yang Z
  Cc: Dong, Eddie, Keir Fraser, Nakajima, Jun, xen-devel

> From: Andrew Cooper
> Sent: Friday, September 19, 2014 10:52 AM
> 
> On 19/09/2014 14:39, Jan Beulich wrote:
> >>>> On 19.09.14 at 15:32, <olaf@aepfle.de> wrote:
> >> On Thu, Sep 18, Zhang, Yang Z wrote:
> >>
> >>> Zhang, Yang Z wrote on 2014-09-01:
> >>>> Zhang, Yang Z wrote on 2014-08-08:
> >>>>> Can you tell how you reproduce it? I am following the steps you told
> >>>>> me previously, but I cannot reproduce it with this fixing.
> >>>> Any update on this?
> >>> Ping again.
> >> I will have a look at this issue next week.
> > Btw., Andrew Cooper hinted at the possibility of this series
> >
> > http://lists.xenproject.org/archives/html/xen-devel/2014-09/msg02753.html
> >
> > (particularly patch 2) having an effect on migration problems
> > between APICV-capable and -incapable systems, i.e. perhaps
> > worth a try in this context too.
> >
> > Jan
> 
> The issue XenServer observes is that problems occur even when migrating
> between two identical machines, one with APICV enabled, and one disabled.
> 
> Given the now-discovered issues with Xen's x2apic handling, it is
> entirely possible that the differences between the emulated and real
> APICs was enough to cause issues.   And yet, it could be a mixture of
> all of the issues identified so far.
> 

Have to say I didn't see any explicit message that this issue is related to 
two machines with different capabilities (one with APICv enabled, and
one disabled).

by reading original commit message:
----
Saving and restoring a PVonHVM guest on a host which has the VMX
"Posted Interrupt Processing" feature enabled will fail because the
xen-platform-pci device does not receive interrupts anymore after
restore. The reason is that the IRQs are not maintained in APIC_IRR,
but in a separate PIR array. This info is lost during the save
operation.
----

I really thought it's related to migration between two machines which
both have APICv enabled. Not sure whether Yang realized this point.

Thanks
Kevin

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-09-22 22:43                                     ` Tian, Kevin
@ 2014-09-24 16:26                                       ` Malcolm Crossley
  0 siblings, 0 replies; 60+ messages in thread
From: Malcolm Crossley @ 2014-09-24 16:26 UTC (permalink / raw)
  To: xen-devel

On 22/09/14 23:43, Tian, Kevin wrote:
>> From: Andrew Cooper
>> Sent: Friday, September 19, 2014 10:52 AM
>>
>> On 19/09/2014 14:39, Jan Beulich wrote:
>>>>>> On 19.09.14 at 15:32, <olaf@aepfle.de> wrote:
>>>> On Thu, Sep 18, Zhang, Yang Z wrote:
>>>>
>>>>> Zhang, Yang Z wrote on 2014-09-01:
>>>>>> Zhang, Yang Z wrote on 2014-08-08:
>>>>>>> Can you tell how you reproduce it? I am following the steps you told
>>>>>>> me previously, but I cannot reproduce it with this fixing.
>>>>>> Any update on this?
>>>>> Ping again.
>>>> I will have a look at this issue next week.
>>> Btw., Andrew Cooper hinted at the possibility of this series
>>>
>>> http://lists.xenproject.org/archives/html/xen-devel/2014-09/msg02753.html
>>>
>>> (particularly patch 2) having an effect on migration problems
>>> between APICV-capable and -incapable systems, i.e. perhaps
>>> worth a try in this context too.
>>>
>>> Jan
>>
>> The issue XenServer observes is that problems occur even when migrating
>> between two identical machines, one with APICV enabled, and one disabled.
>>
>> Given the now-discovered issues with Xen's x2apic handling, it is
>> entirely possible that the differences between the emulated and real
>> APICs was enough to cause issues.   And yet, it could be a mixture of
>> all of the issues identified so far.
>>
> 
> Have to say I didn't see any explicit message that this issue is related to 
> two machines with different capabilities (one with APICv enabled, and
> one disabled).
> 
> by reading original commit message:
> ----
> Saving and restoring a PVonHVM guest on a host which has the VMX
> "Posted Interrupt Processing" feature enabled will fail because the
> xen-platform-pci device does not receive interrupts anymore after
> restore. The reason is that the IRQs are not maintained in APIC_IRR,
> but in a separate PIR array. This info is lost during the save
> operation.
> ----
> 
> I really thought it's related to migration between two machines which
> both have APICv enabled. Not sure whether Yang realized this point.
> 

We have done some internal XenServer testing on this issue:

We install Citrix Windows PV drivers into a Windows Server 2008 32bit
VM, this is effectively a PVHVM guest.

After migrating the VM between two Haswell-EP servers, the Windows PV
drivers stop working and we are unable to access storage or network.

When we apply Olaf original patch then we don't lose storage or network
accesss via the PV drivers.

We are continuing to test but I thought we'd share that Olaf's original
patch resolves the PVHVM APIC-v issue for Xenserver.

Malcolm


> Thanks
> Kevin
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-09-19 13:32                               ` Olaf Hering
  2014-09-19 13:39                                 ` Jan Beulich
@ 2014-09-30 16:19                                 ` Jan Beulich
  2014-10-01  8:07                                   ` Olaf Hering
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2014-09-30 16:19 UTC (permalink / raw)
  To: Olaf Hering; +Cc: Yang Z Zhang, Kevin Tian, Eddie Dong, Jun Nakajima, xen-devel

>>> On 19.09.14 at 15:32, <olaf@aepfle.de> wrote:
> On Thu, Sep 18, Zhang, Yang Z wrote:
> 
>> Zhang, Yang Z wrote on 2014-09-01:
>> > Zhang, Yang Z wrote on 2014-08-08:
>> >> Can you tell how you reproduce it? I am following the steps you told
>> >> me previously, but I cannot reproduce it with this fixing.
>> > Any update on this?
>> Ping again.
> 
> I will have a look at this issue next week.

Any update here? It would really be nice to get this settled,
considering for how long this has been pending. Yet I think without
Intel having a way to repro we're not going to make any progress.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-09-30 16:19                                 ` Jan Beulich
@ 2014-10-01  8:07                                   ` Olaf Hering
  2014-10-01  8:29                                     ` Jan Beulich
  0 siblings, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-10-01  8:07 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Yang Z Zhang, Kevin Tian, Eddie Dong, Jun Nakajima, xen-devel

On Tue, Sep 30, Jan Beulich wrote:

> >>> On 19.09.14 at 15:32, <olaf@aepfle.de> wrote:
> > On Thu, Sep 18, Zhang, Yang Z wrote:
> > 
> >> Zhang, Yang Z wrote on 2014-09-01:
> >> > Zhang, Yang Z wrote on 2014-08-08:
> >> >> Can you tell how you reproduce it? I am following the steps you told
> >> >> me previously, but I cannot reproduce it with this fixing.
> >> > Any update on this?
> >> Ping again.
> > I will have a look at this issue next week.
> Any update here? It would really be nice to get this settled,
> considering for how long this has been pending. Yet I think without
> Intel having a way to repro we're not going to make any progress.

I have this still on my radar. Just the testsystem is still busy. There
are just very few hosts available with the required hardware features.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-01  8:07                                   ` Olaf Hering
@ 2014-10-01  8:29                                     ` Jan Beulich
  2014-10-01 20:13                                       ` Olaf Hering
  0 siblings, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2014-10-01  8:29 UTC (permalink / raw)
  To: Olaf Hering; +Cc: Yang Z Zhang, Kevin Tian, Eddie Dong, Jun Nakajima, xen-devel

>>> On 01.10.14 at 10:07, <olaf@aepfle.de> wrote:
> On Tue, Sep 30, Jan Beulich wrote:
> 
>> >>> On 19.09.14 at 15:32, <olaf@aepfle.de> wrote:
>> > On Thu, Sep 18, Zhang, Yang Z wrote:
>> > 
>> >> Zhang, Yang Z wrote on 2014-09-01:
>> >> > Zhang, Yang Z wrote on 2014-08-08:
>> >> >> Can you tell how you reproduce it? I am following the steps you told
>> >> >> me previously, but I cannot reproduce it with this fixing.
>> >> > Any update on this?
>> >> Ping again.
>> > I will have a look at this issue next week.
>> Any update here? It would really be nice to get this settled,
>> considering for how long this has been pending. Yet I think without
>> Intel having a way to repro we're not going to make any progress.
> 
> I have this still on my radar. Just the testsystem is still busy. There
> are just very few hosts available with the required hardware features.

But I though it's not so much that you need a test system than
you describing to Intel folks the precise conditions needed for
reproduction.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-01  8:29                                     ` Jan Beulich
@ 2014-10-01 20:13                                       ` Olaf Hering
  2014-10-02  6:51                                         ` Jan Beulich
  2014-10-08  7:37                                         ` Zhang, Yang Z
  0 siblings, 2 replies; 60+ messages in thread
From: Olaf Hering @ 2014-10-01 20:13 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Yang Z Zhang, Kevin Tian, Eddie Dong, Jun Nakajima, xen-devel

On Wed, Oct 01, Jan Beulich wrote:

> But I though it's not so much that you need a test system than
> you describing to Intel folks the precise conditions needed for
> reproduction.

Finally I got access to a system, perhaps I can use it for a while.

It has sles11sp3 installed. So I installed the latest kernel and xen
packages from sles11sp3, and used this domU.cfg:


name="sles11sp3_full_bug866902_nfsroot"
uuid="071cdcce-4c2c-48ba-b09a-6984ac91692b"
memory=512
vcpus=1
on_poweroff="destroy"
on_reboot="restart"
on_crash="coredump-destroy"
localtime=0
keymap="de"

builder="hvm"
device_model="/usr/lib/xen/bin/qemu-dm"
kernel="/usr/lib/xen/boot/hvmloader"
boot="d"
disk=[ 
        'file:/nfsmnt/bug866902/vdisk-sles11sp3_full_bug866902_nfsroot-disk0,hda,w',
        'file:/nfsmnt/iso/SLES-11-SP3-DVD-x86_64-GM-DVD1.iso,hdc:cdrom,r'
]
vif=[ 'bridge=br0,type=netfront,mac=00:16:3e:4e:7c:4b', ]
stdvga=0
vnc=1
vncunused=1
viridian=0
acpi=1
pae=1
apic=1

serial="pty"


Then these commands to boot the HVM guest into the provided iso image:

xm new ~/ohering/sles11sp3_full_bug866902_nfsroot.cfg
xm start sles11sp3_full_bug866902_nfsroot
xm vnc sles11sp3_full_bug866902_nfsroot &

 -> select "Rescue system"
 -> set cmdline to "netsetup=dhcp sysrq_always_enabled"
 -> RETURN
 -> wait until login prompt appears, login as root with no password
 -> run dmesg, its nearly empty

xm sysrq sles11sp3_full_bug866902_nfsroot s
 
 -> run dmesg, should show the sysrequest
 -> remember dmesg timestamp

xm save -f sles11sp3_full_bug866902_nfsroot /dev/shm/sles11sp3_full_bug866902_nfsroot.dump
xm restore /dev/shm/sles11sp3_full_bug866902_nfsroot.dump
xm sysrq sles11sp3_full_bug866902_nfsroot s
xm vnc sles11sp3_full_bug866902_nfsroot &

 -> run dmesg, there is suspend/resume output but just a single sysrequest
 -> expected are two sysrequest entries

xm destroy sles11sp3_full_bug866902_nfsroot


If I use openSUSE-13.1-DVD-x86_64.iso instead of the sles11sp3.iso the
guest is frozen, no cursor blinking, no input possible. Not sure why that happens.


Then I tried todays staging branch + v5 of my --prefix series:

insserv -r xendomains
insserv -r xencommons
rpm -Uvh xen-upstream.rpm
cp -avLt /boot --parents /opt/xen/staging-upstream/boot/xen.gz
vi /boot/grub/menu.lst
/opt/xen/staging-upstream/etc/init.d/xencommons start
/opt/xen/staging-upstream/sbin/xl -vvvv create -ddddd -V ~/ohering/sles11sp3_full_bug866902_nfsroot.cfg

 -> select "Rescue system"
 -> set cmdline to "netsetup=dhcp sysrq_always_enabled"
 -> RETURN
 -> wait until login prompt appears, login as root with no password
 -> run dmesg, its nearly empty

/opt/xen/staging-upstream/sbin/xl -vvvv sysrq sles11sp3_full_bug866902_nfsroot s

 -> run dmesg, should show the sysrequest
 -> remember dmesg timestamp

/opt/xen/staging-upstream/sbin/xl -vvvv save -f sles11sp3_full_bug866902_nfsroot /dev/shm/sles11sp3_full_bug866902_nfsroot.dump
/opt/xen/staging-upstream/sbin/xl -vvvv restore /dev/shm/sles11sp3_full_bug866902_nfsroot.dump
/opt/xen/staging-upstream/sbin/xl -vvvv sysrq sles11sp3_full_bug866902_nfsroot s
/opt/xen/staging-upstream/sbin/xl -vvvv vnc sles11sp3_full_bug866902_nfsroot &

 -> run dmesg, there is suspend/resume output but just a single sysrequest
 -> expected are two sysrequest entries

/opt/xen/staging-upstream/sbin/xl -vvvv destroy sles11sp3_full_bug866902_nfsroot


So this is my repro case. Hopefully this will work for folks at Intel.

Tomorrow I will try if any of the suggested patches will fix xen-upsteam.


One thing I'm not sure about is the firmware of such IvyBridge system.
The firmware of the system I can use for testing dumps alot to serial
during startup. Not sure if its EFI or not. At least it boots into
grub1, so I suspect it has a legacy BIOS. If an EFI system is used,
would that change anything? The initial report I got was appearently
also with legacy BIOS. Did Intel test with EFI by any chance?

What I have is some Intel Romley box with "Intel(R) Xeon(R) CPU E5-2697
v2 @ 2.70GHz", dmesg has:
<7>DMI: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.R3.27.D685.1305151734 05/15/2013


Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-01 20:13                                       ` Olaf Hering
@ 2014-10-02  6:51                                         ` Jan Beulich
  2014-10-02  8:10                                           ` Andrew Cooper
  2014-10-08  7:37                                         ` Zhang, Yang Z
  1 sibling, 1 reply; 60+ messages in thread
From: Jan Beulich @ 2014-10-02  6:51 UTC (permalink / raw)
  To: Olaf Hering; +Cc: Yang Z Zhang, Kevin Tian, Eddie Dong, Jun Nakajima, xen-devel

>>> On 01.10.14 at 22:13, <olaf@aepfle.de> wrote:
> One thing I'm not sure about is the firmware of such IvyBridge system.
> The firmware of the system I can use for testing dumps alot to serial
> during startup. Not sure if its EFI or not. At least it boots into
> grub1, so I suspect it has a legacy BIOS. If an EFI system is used,
> would that change anything? The initial report I got was appearently
> also with legacy BIOS. Did Intel test with EFI by any chance?

I don't think firmware should matter here at all. It should merely be
the exact CPU model that would likely matter.

Jan

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-02  6:51                                         ` Jan Beulich
@ 2014-10-02  8:10                                           ` Andrew Cooper
  2014-10-02  8:20                                             ` Olaf Hering
  0 siblings, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2014-10-02  8:10 UTC (permalink / raw)
  To: Jan Beulich, Olaf Hering
  Cc: Yang Z Zhang, Kevin Tian, Eddie Dong, Jun Nakajima, xen-devel

On 02/10/2014 07:51, Jan Beulich wrote:
>>>> On 01.10.14 at 22:13, <olaf@aepfle.de> wrote:
>> One thing I'm not sure about is the firmware of such IvyBridge system.
>> The firmware of the system I can use for testing dumps alot to serial
>> during startup. Not sure if its EFI or not. At least it boots into
>> grub1, so I suspect it has a legacy BIOS. If an EFI system is used,
>> would that change anything? The initial report I got was appearently
>> also with legacy BIOS. Did Intel test with EFI by any chance?
> I don't think firmware should matter here at all. It should merely be
> the exact CPU model that would likely matter.

XenServer can replicate this problem on all APICV capable hardware we
own, and observe the problem being fixed in all cases using v5 of the
patch from this series.

There are some unrelated issues in the heterogeneous pooling case which
cause problems, but any two homogeneous systems, one with APICV enabled
and one with APICV disabled will reliably demonstrate the issue.

~Andrew

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-02  8:10                                           ` Andrew Cooper
@ 2014-10-02  8:20                                             ` Olaf Hering
  0 siblings, 0 replies; 60+ messages in thread
From: Olaf Hering @ 2014-10-02  8:20 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Kevin Tian, Jan Beulich, Eddie Dong, xen-devel, Jun Nakajima,
	Yang Z Zhang

On Thu, Oct 02, Andrew Cooper wrote:

> On 02/10/2014 07:51, Jan Beulich wrote:
> >>>> On 01.10.14 at 22:13, <olaf@aepfle.de> wrote:
> >> One thing I'm not sure about is the firmware of such IvyBridge system.
> >> The firmware of the system I can use for testing dumps alot to serial
> >> during startup. Not sure if its EFI or not. At least it boots into
> >> grub1, so I suspect it has a legacy BIOS. If an EFI system is used,
> >> would that change anything? The initial report I got was appearently
> >> also with legacy BIOS. Did Intel test with EFI by any chance?
> > I don't think firmware should matter here at all. It should merely be
> > the exact CPU model that would likely matter.
> 
> XenServer can replicate this problem on all APICV capable hardware we
> own, and observe the problem being fixed in all cases using v5 of the
> patch from this series.

What patch is that? URL?


Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-01 20:13                                       ` Olaf Hering
  2014-10-02  6:51                                         ` Jan Beulich
@ 2014-10-08  7:37                                         ` Zhang, Yang Z
  2014-10-08  7:40                                           ` Olaf Hering
  2014-10-08  8:28                                           ` Olaf Hering
  1 sibling, 2 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-10-08  7:37 UTC (permalink / raw)
  To: Olaf Hering, Jan Beulich
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, xen-devel

Olaf Hering wrote on 2014-10-02:
> On Wed, Oct 01, Jan Beulich wrote:
> 
> > But I though it's not so much that you need a test system than you
> > describing to Intel folks the precise conditions needed for
> > reproduction.
> 
> Finally I got access to a system, perhaps I can use it for a while.
> 
> It has sles11sp3 installed. So I installed the latest kernel and xen packages from
> sles11sp3, and used this domU.cfg:
> 
> 
> name="sles11sp3_full_bug866902_nfsroot"
> uuid="071cdcce-4c2c-48ba-b09a-6984ac91692b"
> memory=512
> vcpus=1
> on_poweroff="destroy"
> on_reboot="restart"
> on_crash="coredump-destroy"
> localtime=0
> keymap="de"
> 
> builder="hvm"
> device_model="/usr/lib/xen/bin/qemu-dm"
> kernel="/usr/lib/xen/boot/hvmloader"
> boot="d"
> disk=[
> 
> 'file:/nfsmnt/bug866902/vdisk-sles11sp3_full_bug866902_nfsroot-disk0,hda,w'
> ,
> 
> 'file:/nfsmnt/iso/SLES-11-SP3-DVD-x86_64-GM-DVD1.iso,hdc:cdrom,r'
> ]
> vif=[ 'bridge=br0,type=netfront,mac=00:16:3e:4e:7c:4b', ]
> stdvga=0
> vnc=1
> vncunused=1
> viridian=0
> acpi=1
> pae=1
> apic=1
> 
> serial="pty"
> 
> 
> Then these commands to boot the HVM guest into the provided iso image:
> 
> xm new ~/ohering/sles11sp3_full_bug866902_nfsroot.cfg
> xm start sles11sp3_full_bug866902_nfsroot xm vnc
> sles11sp3_full_bug866902_nfsroot &
> 
>  -> select "Rescue system"
>  -> set cmdline to "netsetup=dhcp sysrq_always_enabled"
>  -> RETURN
>  -> wait until login prompt appears, login as root with no password  -> run
> dmesg, its nearly empty
> 
> xm sysrq sles11sp3_full_bug866902_nfsroot s
> 
>  -> run dmesg, should show the sysrequest  -> remember dmesg timestamp
> 
> xm save -f sles11sp3_full_bug866902_nfsroot
> /dev/shm/sles11sp3_full_bug866902_nfsroot.dump
> xm restore /dev/shm/sles11sp3_full_bug866902_nfsroot.dump
> xm sysrq sles11sp3_full_bug866902_nfsroot s xm vnc
> sles11sp3_full_bug866902_nfsroot &
> 
>  -> run dmesg, there is suspend/resume output but just a single sysrequest  ->
> expected are two sysrequest entries
> 
> xm destroy sles11sp3_full_bug866902_nfsroot
> 
> 
> If I use openSUSE-13.1-DVD-x86_64.iso instead of the sles11sp3.iso the guest
> is frozen, no cursor blinking, no input possible. Not sure why that happens.
> 
> 
> Then I tried todays staging branch + v5 of my --prefix series:

What's the v5 patch?

> 
> insserv -r xendomains
> insserv -r xencommons
> rpm -Uvh xen-upstream.rpm
> cp -avLt /boot --parents /opt/xen/staging-upstream/boot/xen.gz
> vi /boot/grub/menu.lst
> /opt/xen/staging-upstream/etc/init.d/xencommons start
> /opt/xen/staging-upstream/sbin/xl -vvvv create -ddddd -V
> ~/ohering/sles11sp3_full_bug866902_nfsroot.cfg
> 
>  -> select "Rescue system"
>  -> set cmdline to "netsetup=dhcp sysrq_always_enabled"
>  -> RETURN
>  -> wait until login prompt appears, login as root with no password  -> run
> dmesg, its nearly empty
> 
> /opt/xen/staging-upstream/sbin/xl -vvvv sysrq
> sles11sp3_full_bug866902_nfsroot s
> 
>  -> run dmesg, should show the sysrequest  -> remember dmesg timestamp
> 
> /opt/xen/staging-upstream/sbin/xl -vvvv save -f
> sles11sp3_full_bug866902_nfsroot
> /dev/shm/sles11sp3_full_bug866902_nfsroot.dump
> /opt/xen/staging-upstream/sbin/xl -vvvv restore
> /dev/shm/sles11sp3_full_bug866902_nfsroot.dump
> /opt/xen/staging-upstream/sbin/xl -vvvv sysrq
> sles11sp3_full_bug866902_nfsroot s /opt/xen/staging-upstream/sbin/xl -vvvv
> vnc sles11sp3_full_bug866902_nfsroot &
> 
>  -> run dmesg, there is suspend/resume output but just a single sysrequest  ->
> expected are two sysrequest entries
> 
> /opt/xen/staging-upstream/sbin/xl -vvvv destroy
> sles11sp3_full_bug866902_nfsroot
> 
> 
> So this is my repro case. Hopefully this will work for folks at Intel.

Can you reproduce it with APICv disabled? You can add apicv=0 into your grub to disable it.

> 
> Tomorrow I will try if any of the suggested patches will fix xen-upsteam.
> 
> 
> One thing I'm not sure about is the firmware of such IvyBridge system.
> The firmware of the system I can use for testing dumps alot to serial during
> startup. Not sure if its EFI or not. At least it boots into grub1, so I suspect it has
> a legacy BIOS. If an EFI system is used, would that change anything? The initial
> report I got was appearently also with legacy BIOS. Did Intel test with EFI by
> any chance?
> 
> What I have is some Intel Romley box with "Intel(R) Xeon(R) CPU E5-2697
> v2 @ 2.70GHz", dmesg has:
> <7>DMI: Intel Corporation S2600CP/S2600CP, BIOS
> RMLSDP.86I.R3.27.D685.1305151734 05/15/2013
> 
> 
> Olaf

Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-08  7:37                                         ` Zhang, Yang Z
@ 2014-10-08  7:40                                           ` Olaf Hering
  2014-10-08  7:55                                             ` Zhang, Yang Z
  2014-10-08  8:28                                           ` Olaf Hering
  1 sibling, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-10-08  7:40 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

On Wed, Oct 08, Zhang, Yang Z wrote:

> What's the v5 patch?

The current state of staging, and also master because a push just
happend.

> > So this is my repro case. Hopefully this will work for folks at Intel.
> 
> Can you reproduce it with APICv disabled? You can add apicv=0 into your grub to disable it.

I will check with current staging tree, have the system still accessible.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-08  7:40                                           ` Olaf Hering
@ 2014-10-08  7:55                                             ` Zhang, Yang Z
  0 siblings, 0 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-10-08  7:55 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

Olaf Hering wrote on 2014-10-08:
> On Wed, Oct 08, Zhang, Yang Z wrote:
> 
>> What's the v5 patch?
> 
> The current state of staging, and also master because a push just happend.

Ok. I see it. So it has nothing to do with APICv. But Andrew said it fixed APICv migration issue. Strange!

> 
>>> So this is my repro case. Hopefully this will work for folks at Intel.
>> 
>> Can you reproduce it with APICv disabled? You can add apicv=0 into
>> your grub
> to disable it.
> 
> I will check with current staging tree, have the system still accessible.

I remember you told me it is able to observer the issue via xenstore-ls: 
http://lists.xenproject.org/archives/html/xen-devel/2014-07/msg02881.html

I was able to reproduce it with Xen upstream. And it is disappearing with my fixing patch( http://lists.xenproject.org/archives/html/xen-devel/2014-07/msg03729.html )
Does it work for you?

> 
> Olaf


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-08  7:37                                         ` Zhang, Yang Z
  2014-10-08  7:40                                           ` Olaf Hering
@ 2014-10-08  8:28                                           ` Olaf Hering
  2014-10-08  8:35                                             ` Zhang, Yang Z
  1 sibling, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-10-08  8:28 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

On Wed, Oct 08, Zhang, Yang Z wrote:

> Can you reproduce it with APICv disabled? You can add apicv=0 into
> your grub to disable it.

Using 'apicv=0' with staging helps, the guest resumes properly.

Will now try the patch you mentioned in the other mail.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-08  8:28                                           ` Olaf Hering
@ 2014-10-08  8:35                                             ` Zhang, Yang Z
  2014-10-08  8:54                                               ` Olaf Hering
  0 siblings, 1 reply; 60+ messages in thread
From: Zhang, Yang Z @ 2014-10-08  8:35 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

Olaf Hering wrote on 2014-10-08:
> On Wed, Oct 08, Zhang, Yang Z wrote:
> 
>> Can you reproduce it with APICv disabled? You can add apicv=0 into
>> your grub to disable it.
> 
> Using 'apicv=0' with staging helps, the guest resumes properly.

Does 'resumes properly' mean there are two sysrq in dmesg after resume?

> 
> Will now try the patch you mentioned in the other mail.

Thanks. I am set upping the environment for testing now.

> 
> Olaf


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-08  8:35                                             ` Zhang, Yang Z
@ 2014-10-08  8:54                                               ` Olaf Hering
  2014-10-09  7:24                                                 ` Zhang, Yang Z
  0 siblings, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-10-08  8:54 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

On Wed, Oct 08, Zhang, Yang Z wrote:

> Olaf Hering wrote on 2014-10-08:
> > On Wed, Oct 08, Zhang, Yang Z wrote:
> >> Can you reproduce it with APICv disabled? You can add apicv=0 into
> >> your grub to disable it.
> > Using 'apicv=0' with staging helps, the guest resumes properly.
> Does 'resumes properly' mean there are two sysrq in dmesg after resume?

Yes.

> > Will now try the patch you mentioned in the other mail.
> Thanks. I am set upping the environment for testing now.

That patch (Message-ID: <20140804075007.GA11609@aepfle.de>) does not help.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-08  8:54                                               ` Olaf Hering
@ 2014-10-09  7:24                                                 ` Zhang, Yang Z
  2014-10-09  8:41                                                   ` Fabio Fantoni
                                                                     ` (3 more replies)
  0 siblings, 4 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-10-09  7:24 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

[-- Attachment #1: Type: text/plain, Size: 3111 bytes --]

Olaf Hering wrote on 2014-10-08:
> On Wed, Oct 08, Zhang, Yang Z wrote:
> 
>> Olaf Hering wrote on 2014-10-08:
>>> On Wed, Oct 08, Zhang, Yang Z wrote:
>>>> Can you reproduce it with APICv disabled? You can add apicv=0
>>>> into your grub to disable it.
>>> Using 'apicv=0' with staging helps, the guest resumes properly.
>> Does 'resumes properly' mean there are two sysrq in dmesg after resume?
> 
> Yes.
> 
>>> Will now try the patch you mentioned in the other mail.
>> Thanks. I am set upping the environment for testing now.
> 
> That patch (Message-ID: <20140804075007.GA11609@aepfle.de>) does not
> help.

Sorry. I forget to tell that that patch must combine with one fixing from your patch.
I rebased it based on latest Xen and the attached patch includes all fixings. Could you have a try? It works on my side, hope it helps.

diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index 99ae1be..e702ed3 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -1259,6 +1259,9 @@ static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
 
     for_each_vcpu ( d, v )
     {
+        if ( hvm_funcs.sync_pir_to_irr )
+            hvm_funcs.sync_pir_to_irr(v);
+
         s = vcpu_vlapic(v);
         if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
             break;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 304aeea..7c4d796 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1584,6 +1584,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
 {
     unsigned long status;
     u8 old;
+    int vector;
+    struct vlapic *s = vcpu_vlapic(v);
 
     if ( isr < 0 )
         isr = 0;
@@ -1597,6 +1599,14 @@ static void vmx_process_isr(int isr, struct vcpu *v)
         status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
         __vmwrite(GUEST_INTR_STATUS, status);
     }
+    for ( vector = 0; vector < NR_VECTORS; vector++ )
+        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
+            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
+        
+    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
+    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
+    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
+    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
     vmx_vmcs_exit(v);
 }
 
diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
index bf59b95..fc8d131 100644
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -61,6 +61,8 @@
 
 #define VEC_POS(v) ((v) % 32)
 #define REG_POS(v) (((v) / 32) * 0x10)
+#define vlapic_test_vector(vec, bitmap)                         \
+    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_set_vector(vec, bitmap)                         \
     test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_clear_vector(vec, bitmap)  

> 
> Olaf


Best regards,
Yang



[-- Attachment #2: 0001-fix-migration-with-apicv.patch --]
[-- Type: application/octet-stream, Size: 2632 bytes --]

From 7b931f05033dfd0bf9c8b7b08a286d82c55ef2ef Mon Sep 17 00:00:00 2001
From: Yang Zhang <yang.z.zhang@Intel.com>
Date: Thu, 9 Oct 2014 13:23:06 -0400
Subject: [PATCH] fix migration with apicv

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
---
 xen/arch/x86/hvm/vlapic.c        |    3 +++
 xen/arch/x86/hvm/vmx/vmx.c       |   10 ++++++++++
 xen/include/asm-x86/hvm/vlapic.h |    2 ++
 3 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index 99ae1be..e702ed3 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -1259,6 +1259,9 @@ static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
 
     for_each_vcpu ( d, v )
     {
+        if ( hvm_funcs.sync_pir_to_irr )
+            hvm_funcs.sync_pir_to_irr(v);
+
         s = vcpu_vlapic(v);
         if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
             break;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 304aeea..7c4d796 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1584,6 +1584,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
 {
     unsigned long status;
     u8 old;
+    int vector;
+    struct vlapic *s = vcpu_vlapic(v);
 
     if ( isr < 0 )
         isr = 0;
@@ -1597,6 +1599,14 @@ static void vmx_process_isr(int isr, struct vcpu *v)
         status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
         __vmwrite(GUEST_INTR_STATUS, status);
     }
+    for ( vector = 0; vector < NR_VECTORS; vector++ )
+        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
+            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
+        
+    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
+    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
+    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
+    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
     vmx_vmcs_exit(v);
 }
 
diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
index bf59b95..fc8d131 100644
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -61,6 +61,8 @@
 
 #define VEC_POS(v) ((v) % 32)
 #define REG_POS(v) (((v) / 32) * 0x10)
+#define vlapic_test_vector(vec, bitmap)                         \
+    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_set_vector(vec, bitmap)                         \
     test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
 #define vlapic_test_and_clear_vector(vec, bitmap)                       \
-- 
1.7.6.3


[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09  7:24                                                 ` Zhang, Yang Z
@ 2014-10-09  8:41                                                   ` Fabio Fantoni
  2014-10-09  8:43                                                     ` Zhang, Yang Z
  2014-10-09 11:35                                                   ` Malcolm Crossley
                                                                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 60+ messages in thread
From: Fabio Fantoni @ 2014-10-09  8:41 UTC (permalink / raw)
  To: Zhang, Yang Z, Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Jan Beulich, Nakajima, Jun, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 3719 bytes --]

Il 09/10/2014 09:24, Zhang, Yang Z ha scritto:
> Olaf Hering wrote on 2014-10-08:
>> On Wed, Oct 08, Zhang, Yang Z wrote:
>>
>>> Olaf Hering wrote on 2014-10-08:
>>>> On Wed, Oct 08, Zhang, Yang Z wrote:
>>>>> Can you reproduce it with APICv disabled? You can add apicv=0
>>>>> into your grub to disable it.
>>>> Using 'apicv=0' with staging helps, the guest resumes properly.
>>> Does 'resumes properly' mean there are two sysrq in dmesg after resume?
>> Yes.
>>
>>>> Will now try the patch you mentioned in the other mail.
>>> Thanks. I am set upping the environment for testing now.
>> That patch (Message-ID: <20140804075007.GA11609@aepfle.de>) does not
>> help.
> Sorry. I forget to tell that that patch must combine with one fixing from your patch.
> I rebased it based on latest Xen and the attached patch includes all fixings. Could you have a try? It works on my side, hope it helps.

Hi, I want test this fix to see if it solves one strange save/restore 
bug. Before this should be applied also another patch or can be applied 
only that on xen-unstable? I ask because I not see "x86/hvm: implement 
save/restore for posted interrupts" patch in xen-unstable that I suppose 
is required for this.

Thanks for any reply and sorry for my bad english.

>
> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> index 99ae1be..e702ed3 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -1259,6 +1259,9 @@ static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
>   
>       for_each_vcpu ( d, v )
>       {
> +        if ( hvm_funcs.sync_pir_to_irr )
> +            hvm_funcs.sync_pir_to_irr(v);
> +
>           s = vcpu_vlapic(v);
>           if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
>               break;
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 304aeea..7c4d796 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1584,6 +1584,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>   {
>       unsigned long status;
>       u8 old;
> +    int vector;
> +    struct vlapic *s = vcpu_vlapic(v);
>   
>       if ( isr < 0 )
>           isr = 0;
> @@ -1597,6 +1599,14 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>           status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
>           __vmwrite(GUEST_INTR_STATUS, status);
>       }
> +    for ( vector = 0; vector < NR_VECTORS; vector++ )
> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
> +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
> +
> +    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
> +    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
> +    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
> +    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
>       vmx_vmcs_exit(v);
>   }
>   
> diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
> index bf59b95..fc8d131 100644
> --- a/xen/include/asm-x86/hvm/vlapic.h
> +++ b/xen/include/asm-x86/hvm/vlapic.h
> @@ -61,6 +61,8 @@
>   
>   #define VEC_POS(v) ((v) % 32)
>   #define REG_POS(v) (((v) / 32) * 0x10)
> +#define vlapic_test_vector(vec, bitmap)                         \
> +    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>   #define vlapic_test_and_set_vector(vec, bitmap)                         \
>       test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>   #define vlapic_test_and_clear_vector(vec, bitmap)
>
>> Olaf
>
> Best regards,
> Yang
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 5286 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09  8:41                                                   ` Fabio Fantoni
@ 2014-10-09  8:43                                                     ` Zhang, Yang Z
  2014-10-09 12:49                                                       ` Fabio Fantoni
  0 siblings, 1 reply; 60+ messages in thread
From: Zhang, Yang Z @ 2014-10-09  8:43 UTC (permalink / raw)
  To: Fabio Fantoni, Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Jan Beulich, Nakajima, Jun, xen-devel

Fabio Fantoni wrote on 2014-10-09:
> Il 09/10/2014 09:24, Zhang, Yang Z ha scritto:
> 
> 
> 	Olaf Hering wrote on 2014-10-08:
> 
> 		On Wed, Oct 08, Zhang, Yang Z wrote:
> 
> 
> 			Olaf Hering wrote on 2014-10-08:
> 
> 				On Wed, Oct 08, Zhang, Yang Z wrote:
> 
> 					Can you reproduce it with APICv disabled? You can add apicv=0
> 					into your grub to disable it.
> 
> 				Using 'apicv=0' with staging helps, the guest resumes properly.
> 
> 			Does 'resumes properly' mean there are two sysrq in dmesg after
> resume?
> 
> 
> 		Yes.
> 
> 
> 				Will now try the patch you mentioned in the other mail.
> 
> 			Thanks. I am set upping the environment for testing now.
> 
> 
> 		That patch (Message-ID: <20140804075007.GA11609@aepfle.de>
> <mailto:20140804075007.GA11609@aepfle.de> ) does not
> 		help.
> 
> 
> 	Sorry. I forget to tell that that patch must combine with one fixing
> from your patch.
> 	I rebased it based on latest Xen and the attached patch includes all fixings.
> Could you have a try? It works on my side, hope it helps.
> 
> 
> Hi, I want test this fix to see if it solves one strange save/restore
> bug. Before this should be applied also another patch or can be
> applied only that on xen-unstable? I ask because I not see "x86/hvm:
> implement save/restore for posted interrupts" patch in xen-unstable that I suppose is required for this.

This patch includes all fixings that I know. So just apply it into latest Xen and have a try. 

> 
> Thanks for any reply and sorry for my bad english.
> 
> 
> 
> 
> 
> 	diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> 	index 99ae1be..e702ed3 100644
> 	--- a/xen/arch/x86/hvm/vlapic.c
> 	+++ b/xen/arch/x86/hvm/vlapic.c
> 	@@ -1259,6 +1259,9 @@ static int lapic_save_regs(struct domain *d,
> hvm_domain_context_t *h)
> 
> 	     for_each_vcpu ( d, v ) 	     { 	+        if (
> hvm_funcs.sync_pir_to_irr ) 	+            hvm_funcs.sync_pir_to_irr(v);
> 	+ 	         s = vcpu_vlapic(v); 	         if ( (rc =
> hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 ) 	            
> break; 	diff --git a/xen/arch/x86/hvm/vmx/vmx.c
> b/xen/arch/x86/hvm/vmx/vmx.c 	index 304aeea..7c4d796 100644 	---
> a/xen/arch/x86/hvm/vmx/vmx.c 	+++ b/xen/arch/x86/hvm/vmx/vmx.c 	@@
> -1584,6 +1584,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
> 	 { 	     unsigned long status; 	     u8 old; 	+    int vector; 	+   
> struct vlapic *s = vcpu_vlapic(v);
> 
> 	     if ( isr < 0 ) 	         isr = 0; 	@@ -1597,6 +1599,14 @@ static
> void vmx_process_isr(int isr, struct vcpu *v) 	         status |= isr <<
> VMX_GUEST_INTR_STATUS_SVI_OFFSET; 	         __vmwrite(GUEST_INTR_STATUS,
> status); 	     } 	+    for ( vector = 0; vector < NR_VECTORS; vector++ )
> 	+        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR])) 	+  
>          set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap); 	+ 	+   
> __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]); 	+   
> __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]); 	+   
> __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]); 	+   
> __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]); 	    
> vmx_vmcs_exit(v); 	 }
> 
> 	diff --git a/xen/include/asm-x86/hvm/vlapic.h
> b/xen/include/asm-x86/hvm/vlapic.h 	index bf59b95..fc8d131 100644 	---
> a/xen/include/asm-x86/hvm/vlapic.h 	+++
> b/xen/include/asm-x86/hvm/vlapic.h 	@@ -61,6 +61,8 @@
> 
> 	 #define VEC_POS(v) ((v) % 32) 	 #define REG_POS(v) (((v) / 32) * 0x10)
> 	+#define vlapic_test_vector(vec, bitmap)                         \ 	+  
>  test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec))) 	 #define
> vlapic_test_and_set_vector(vec, bitmap) \ 	    
> test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec))) 	
> #define vlapic_test_and_clear_vector(vec, bitmap)
> 
> 
> 
> 		Olaf
> 
> 
> 
> 	Best regards,
> 	Yang
> 
> 
> 
> 
> 
> 	_______________________________________________
> 	Xen-devel mailing list
> 	Xen-devel@lists.xen.org
> 	http://lists.xen.org/xen-devel
>


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09  7:24                                                 ` Zhang, Yang Z
  2014-10-09  8:41                                                   ` Fabio Fantoni
@ 2014-10-09 11:35                                                   ` Malcolm Crossley
  2014-10-09 13:19                                                   ` Andrew Cooper
  2014-10-09 13:31                                                   ` Olaf Hering
  3 siblings, 0 replies; 60+ messages in thread
From: Malcolm Crossley @ 2014-10-09 11:35 UTC (permalink / raw)
  To: Zhang, Yang Z, Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Jan Beulich, Nakajima, Jun, xen-devel

On 09/10/14 08:24, Zhang, Yang Z wrote:
> Olaf Hering wrote on 2014-10-08:
>> On Wed, Oct 08, Zhang, Yang Z wrote:
>>
>>> Olaf Hering wrote on 2014-10-08:
>>>> On Wed, Oct 08, Zhang, Yang Z wrote:
>>>>> Can you reproduce it with APICv disabled? You can add apicv=0
>>>>> into your grub to disable it.
>>>> Using 'apicv=0' with staging helps, the guest resumes properly.
>>> Does 'resumes properly' mean there are two sysrq in dmesg after resume?
>>
>> Yes.
>>
>>>> Will now try the patch you mentioned in the other mail.
>>> Thanks. I am set upping the environment for testing now.
>>
>> That patch (Message-ID: <20140804075007.GA11609@aepfle.de>) does not
>> help.
> 
> Sorry. I forget to tell that that patch must combine with one fixing from your patch.
> I rebased it based on latest Xen and the attached patch includes all fixings. Could you have a try? It works on my side, hope it helps.
> 
I can confirm that the patch below fixes the VM migration issue's
Xenserver has seen with Windows VM's running with PV drivers.

Tested-by: Malcolm Crossley <malcolm.crossley@citrix.com>

Malcolm

> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> index 99ae1be..e702ed3 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -1259,6 +1259,9 @@ static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
>  
>      for_each_vcpu ( d, v )
>      {
> +        if ( hvm_funcs.sync_pir_to_irr )
> +            hvm_funcs.sync_pir_to_irr(v);
> +
>          s = vcpu_vlapic(v);
>          if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
>              break;
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 304aeea..7c4d796 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1584,6 +1584,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>  {
>      unsigned long status;
>      u8 old;
> +    int vector;
> +    struct vlapic *s = vcpu_vlapic(v);
>  
>      if ( isr < 0 )
>          isr = 0;
> @@ -1597,6 +1599,14 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>          status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
>          __vmwrite(GUEST_INTR_STATUS, status);
>      }
> +    for ( vector = 0; vector < NR_VECTORS; vector++ )
> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
> +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
> +        
> +    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
> +    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
> +    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
> +    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
>      vmx_vmcs_exit(v);
>  }
>  
> diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
> index bf59b95..fc8d131 100644
> --- a/xen/include/asm-x86/hvm/vlapic.h
> +++ b/xen/include/asm-x86/hvm/vlapic.h
> @@ -61,6 +61,8 @@
>  
>  #define VEC_POS(v) ((v) % 32)
>  #define REG_POS(v) (((v) / 32) * 0x10)
> +#define vlapic_test_vector(vec, bitmap)                         \
> +    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_set_vector(vec, bitmap)                         \
>      test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_clear_vector(vec, bitmap)  
> 
>>
>> Olaf
> 
> 
> Best regards,
> Yang
> 
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
> 

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09  8:43                                                     ` Zhang, Yang Z
@ 2014-10-09 12:49                                                       ` Fabio Fantoni
  0 siblings, 0 replies; 60+ messages in thread
From: Fabio Fantoni @ 2014-10-09 12:49 UTC (permalink / raw)
  To: Zhang, Yang Z, Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Jan Beulich, Nakajima, Jun, xen-devel

Il 09/10/2014 10:43, Zhang, Yang Z ha scritto:
> Fabio Fantoni wrote on 2014-10-09:
>> Il 09/10/2014 09:24, Zhang, Yang Z ha scritto:
>>
>>
>> 	Olaf Hering wrote on 2014-10-08:
>>
>> 		On Wed, Oct 08, Zhang, Yang Z wrote:
>>
>>
>> 			Olaf Hering wrote on 2014-10-08:
>>
>> 				On Wed, Oct 08, Zhang, Yang Z wrote:
>>
>> 					Can you reproduce it with APICv disabled? You can add apicv=0
>> 					into your grub to disable it.
>>
>> 				Using 'apicv=0' with staging helps, the guest resumes properly.
>>
>> 			Does 'resumes properly' mean there are two sysrq in dmesg after
>> resume?
>>
>>
>> 		Yes.
>>
>>
>> 				Will now try the patch you mentioned in the other mail.
>>
>> 			Thanks. I am set upping the environment for testing now.
>>
>>
>> 		That patch (Message-ID: <20140804075007.GA11609@aepfle.de>
>> <mailto:20140804075007.GA11609@aepfle.de> ) does not
>> 		help.
>>
>>
>> 	Sorry. I forget to tell that that patch must combine with one fixing
>> from your patch.
>> 	I rebased it based on latest Xen and the attached patch includes all fixings.
>> Could you have a try? It works on my side, hope it helps.
>>
>>
>> Hi, I want test this fix to see if it solves one strange save/restore
>> bug. Before this should be applied also another patch or can be
>> applied only that on xen-unstable? I ask because I not see "x86/hvm:
>> implement save/restore for posted interrupts" patch in xen-unstable that I suppose is required for this.
> This patch includes all fixings that I know. So just apply it into latest Xen and have a try.

Thanks for reply.
I tested the patch with windows 7 domUs with newer pv drivers, 
unfortunately does not solve the problem after the save/restore with qxl 
vga but at least I haven't noticed any regression.
Initial post about my save/restore problem is here if someone want take 
a look:
http://lists.xen.org/archives/html/xen-devel/2014-07/msg01021.html
Latest xen-unstable remove the warning I saw previously in xl dmesg, so 
I not have any useful debug informations to help to solves it :(

Thanks for any reply and sorry for my bad english.

>
>> Thanks for any reply and sorry for my bad english.
>>
>>
>>
>>
>>
>> 	diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
>> 	index 99ae1be..e702ed3 100644
>> 	--- a/xen/arch/x86/hvm/vlapic.c
>> 	+++ b/xen/arch/x86/hvm/vlapic.c
>> 	@@ -1259,6 +1259,9 @@ static int lapic_save_regs(struct domain *d,
>> hvm_domain_context_t *h)
>>
>> 	     for_each_vcpu ( d, v ) 	     { 	+        if (
>> hvm_funcs.sync_pir_to_irr ) 	+            hvm_funcs.sync_pir_to_irr(v);
>> 	+ 	         s = vcpu_vlapic(v); 	         if ( (rc =
>> hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 ) 	
>> break; 	diff --git a/xen/arch/x86/hvm/vmx/vmx.c
>> b/xen/arch/x86/hvm/vmx/vmx.c 	index 304aeea..7c4d796 100644 	---
>> a/xen/arch/x86/hvm/vmx/vmx.c 	+++ b/xen/arch/x86/hvm/vmx/vmx.c 	@@
>> -1584,6 +1584,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>> 	 { 	     unsigned long status; 	     u8 old; 	+    int vector; 	+
>> struct vlapic *s = vcpu_vlapic(v);
>>
>> 	     if ( isr < 0 ) 	         isr = 0; 	@@ -1597,6 +1599,14 @@ static
>> void vmx_process_isr(int isr, struct vcpu *v) 	         status |= isr <<
>> VMX_GUEST_INTR_STATUS_SVI_OFFSET; 	         __vmwrite(GUEST_INTR_STATUS,
>> status); 	     } 	+    for ( vector = 0; vector < NR_VECTORS; vector++ )
>> 	+        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR])) 	+
>>           set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap); 	+ 	+
>> __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]); 	+
>> __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]); 	+
>> __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]); 	+
>> __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]); 	
>> vmx_vmcs_exit(v); 	 }
>>
>> 	diff --git a/xen/include/asm-x86/hvm/vlapic.h
>> b/xen/include/asm-x86/hvm/vlapic.h 	index bf59b95..fc8d131 100644 	---
>> a/xen/include/asm-x86/hvm/vlapic.h 	+++
>> b/xen/include/asm-x86/hvm/vlapic.h 	@@ -61,6 +61,8 @@
>>
>> 	 #define VEC_POS(v) ((v) % 32) 	 #define REG_POS(v) (((v) / 32) * 0x10)
>> 	+#define vlapic_test_vector(vec, bitmap)                         \ 	+
>>   test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec))) 	 #define
>> vlapic_test_and_set_vector(vec, bitmap) \ 	
>> test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec))) 	
>> #define vlapic_test_and_clear_vector(vec, bitmap)
>>
>>
>>
>> 		Olaf
>>
>>
>>
>> 	Best regards,
>> 	Yang
>>
>>
>>
>>
>>
>> 	_______________________________________________
>> 	Xen-devel mailing list
>> 	Xen-devel@lists.xen.org
>> 	http://lists.xen.org/xen-devel
>>
>
> Best regards,
> Yang
>
>

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09  7:24                                                 ` Zhang, Yang Z
  2014-10-09  8:41                                                   ` Fabio Fantoni
  2014-10-09 11:35                                                   ` Malcolm Crossley
@ 2014-10-09 13:19                                                   ` Andrew Cooper
  2014-10-10  0:48                                                     ` Zhang, Yang Z
  2014-10-09 13:31                                                   ` Olaf Hering
  3 siblings, 1 reply; 60+ messages in thread
From: Andrew Cooper @ 2014-10-09 13:19 UTC (permalink / raw)
  To: Zhang, Yang Z, Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Jan Beulich, Nakajima, Jun, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 3850 bytes --]

On 09/10/14 08:24, Zhang, Yang Z wrote:
> Olaf Hering wrote on 2014-10-08:
>> On Wed, Oct 08, Zhang, Yang Z wrote:
>>
>>> Olaf Hering wrote on 2014-10-08:
>>>> On Wed, Oct 08, Zhang, Yang Z wrote:
>>>>> Can you reproduce it with APICv disabled? You can add apicv=0
>>>>> into your grub to disable it.
>>>> Using 'apicv=0' with staging helps, the guest resumes properly.
>>> Does 'resumes properly' mean there are two sysrq in dmesg after resume?
>> Yes.
>>
>>>> Will now try the patch you mentioned in the other mail.
>>> Thanks. I am set upping the environment for testing now.
>> That patch (Message-ID: <20140804075007.GA11609@aepfle.de>) does not
>> help.
> Sorry. I forget to tell that that patch must combine with one fixing from your patch.
> I rebased it based on latest Xen and the attached patch includes all fixings. Could you have a try? It works on my side, hope it helps.
>
> diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> index 99ae1be..e702ed3 100644
> --- a/xen/arch/x86/hvm/vlapic.c
> +++ b/xen/arch/x86/hvm/vlapic.c
> @@ -1259,6 +1259,9 @@ static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
>  
>      for_each_vcpu ( d, v )
>      {
> +        if ( hvm_funcs.sync_pir_to_irr )
> +            hvm_funcs.sync_pir_to_irr(v);
> +
>          s = vcpu_vlapic(v);
>          if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
>              break;
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 304aeea..7c4d796 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1584,6 +1584,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>  {
>      unsigned long status;
>      u8 old;
> +    int vector;

unsigned vector; especially as it is used in some pointer arithmetic
hidden inside the vlapic_test_vector() macro.

> +    struct vlapic *s = vcpu_vlapic(v);

's' seems to be a strange choice of variable name.  Prevailing use in
this file is "struct vlapic *vlapic = vcpu_vlapic(v);"

>  
>      if ( isr < 0 )
>          isr = 0;
> @@ -1597,6 +1599,14 @@ static void vmx_process_isr(int isr, struct vcpu *v)
>          status |= isr << VMX_GUEST_INTR_STATUS_SVI_OFFSET;
>          __vmwrite(GUEST_INTR_STATUS, status);
>      }
> +    for ( vector = 0; vector < NR_VECTORS; vector++ )

Vectors 0 to 0x0f are strictly reserved.  Any reason not to start vector
at 0x10 ?

> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
> +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);

Can't this loop be optimised somewhat to using word-sized |= operations?

~Andrew

> +        
> +    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
> +    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
> +    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
> +    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
>      vmx_vmcs_exit(v);
>  }
>  
> diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
> index bf59b95..fc8d131 100644
> --- a/xen/include/asm-x86/hvm/vlapic.h
> +++ b/xen/include/asm-x86/hvm/vlapic.h
> @@ -61,6 +61,8 @@
>  
>  #define VEC_POS(v) ((v) % 32)
>  #define REG_POS(v) (((v) / 32) * 0x10)
> +#define vlapic_test_vector(vec, bitmap)                         \
> +    test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_set_vector(vec, bitmap)                         \
>      test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec)))
>  #define vlapic_test_and_clear_vector(vec, bitmap)  
>
>> Olaf
>
> Best regards,
> Yang
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 5886 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09  7:24                                                 ` Zhang, Yang Z
                                                                     ` (2 preceding siblings ...)
  2014-10-09 13:19                                                   ` Andrew Cooper
@ 2014-10-09 13:31                                                   ` Olaf Hering
  2014-10-09 14:11                                                     ` Olaf Hering
  3 siblings, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-10-09 13:31 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

On Thu, Oct 09, Zhang, Yang Z wrote:

> +    for ( vector = 0; vector < NR_VECTORS; vector++ )
> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
> +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);

Did you send out the wrong version a few weeks ago? I tested the old
patch at that time. Now I notice the new version has this change:

-        if (vlapic_test_vector(vector, &s->regs->data[APIC_ISR]))$
+        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))$

I will try the new version.

Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09 13:31                                                   ` Olaf Hering
@ 2014-10-09 14:11                                                     ` Olaf Hering
  2014-10-10  0:52                                                       ` Zhang, Yang Z
  0 siblings, 1 reply; 60+ messages in thread
From: Olaf Hering @ 2014-10-09 14:11 UTC (permalink / raw)
  To: Zhang, Yang Z
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

On Thu, Oct 09, Olaf Hering wrote:

> On Thu, Oct 09, Zhang, Yang Z wrote:
> 
> > +    for ( vector = 0; vector < NR_VECTORS; vector++ )
> > +        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
> > +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
> 
> Did you send out the wrong version a few weeks ago? I tested the old
> patch at that time. Now I notice the new version has this change:
> 
> -        if (vlapic_test_vector(vector, &s->regs->data[APIC_ISR]))$
> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))$
> 
> I will try the new version.

Yes, this change finally worked also for me. Thanks!


Olaf

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09 13:19                                                   ` Andrew Cooper
@ 2014-10-10  0:48                                                     ` Zhang, Yang Z
  0 siblings, 0 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-10-10  0:48 UTC (permalink / raw)
  To: Andrew Cooper, Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Jan Beulich, Nakajima, Jun, xen-devel

Andrew Cooper wrote on 2014-10-09:
> On 09/10/14 08:24, Zhang, Yang Z wrote:
> 
> 
> 	Olaf Hering wrote on 2014-10-08:
> 
> 		On Wed, Oct 08, Zhang, Yang Z wrote:
> 
> 
> 			Olaf Hering wrote on 2014-10-08:
> 
> 				On Wed, Oct 08, Zhang, Yang Z wrote:
> 
> 					Can you reproduce it with APICv disabled? You can add apicv=0
> 					into your grub to disable it.
> 
> 				Using 'apicv=0' with staging helps, the guest resumes properly.
> 
> 			Does 'resumes properly' mean there are two sysrq in dmesg after
> resume?
> 
> 
> 		Yes.
> 
> 
> 				Will now try the patch you mentioned in the other mail.
> 
> 			Thanks. I am set upping the environment for testing now.
> 
> 
> 		That patch (Message-ID: <20140804075007.GA11609@aepfle.de>
> <mailto:20140804075007.GA11609@aepfle.de> ) does not
> 		help.
> 
> 
> 	Sorry. I forget to tell that that patch must combine with one fixing
> from your patch.
> 	I rebased it based on latest Xen and the attached patch includes all fixings.
> Could you have a try? It works on my side, hope it helps.
> 
> 	diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
> 	index 99ae1be..e702ed3 100644
> 	--- a/xen/arch/x86/hvm/vlapic.c
> 	+++ b/xen/arch/x86/hvm/vlapic.c
> 	@@ -1259,6 +1259,9 @@ static int lapic_save_regs(struct domain *d,
> hvm_domain_context_t *h)
> 
> 	     for_each_vcpu ( d, v ) 	     { 	+        if (
> hvm_funcs.sync_pir_to_irr ) 	+            hvm_funcs.sync_pir_to_irr(v);
> 	+ 	         s = vcpu_vlapic(v); 	         if ( (rc =
> hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 ) 	            
> break; 	diff --git a/xen/arch/x86/hvm/vmx/vmx.c
> b/xen/arch/x86/hvm/vmx/vmx.c 	index 304aeea..7c4d796 100644 	---
> a/xen/arch/x86/hvm/vmx/vmx.c 	+++ b/xen/arch/x86/hvm/vmx/vmx.c 	@@
> -1584,6 +1584,8 @@ static void vmx_process_isr(int isr, struct vcpu *v)
> 	 { 	     unsigned long status; 	     u8 old; 	+    int vector;
> 
> 
> unsigned vector; especially as it is used in some pointer arithmetic
> hidden inside the vlapic_test_vector() macro.
> 
> 
> 
> 
> 	+    struct vlapic *s = vcpu_vlapic(v);
> 
> 
> 's' seems to be a strange choice of variable name.  Prevailing use in
> this file is "struct vlapic *vlapic = vcpu_vlapic(v);"
> 
> 
> 
> 
> 
> 	     if ( isr < 0 ) 	         isr = 0; 	@@ -1597,6 +1599,14 @@ static
> void vmx_process_isr(int isr, struct vcpu *v) 	         status |= isr <<
> VMX_GUEST_INTR_STATUS_SVI_OFFSET; 	         __vmwrite(GUEST_INTR_STATUS,
> status); 	     } 	+    for ( vector = 0; vector < NR_VECTORS; vector++ )
> 
> 
> Vectors 0 to 0x0f are strictly reserved.  Any reason not to start
> vector at
> 0x10 ?
> 
> 
> 
> 
> 	+        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
> 	+            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
> 
> 
> Can't this loop be optimised somewhat to using word-sized |= operations?

This is just a test patch to see whether it is able to fix the problem. I will refine it once get the confirm from Olaf. 

> 
> ~Andrew
> 
> 
> 
> 
> 	+
> 	+    __vmwrite(0x201c, v->arch.hvm_vmx.eoi_exit_bitmap[0]);
> 	+    __vmwrite(0x201e, v->arch.hvm_vmx.eoi_exit_bitmap[1]);
> 	+    __vmwrite(0x2020, v->arch.hvm_vmx.eoi_exit_bitmap[2]);
> 	+    __vmwrite(0x2022, v->arch.hvm_vmx.eoi_exit_bitmap[3]);
> 	     vmx_vmcs_exit(v);
> 	 }
> 
> 	diff --git a/xen/include/asm-x86/hvm/vlapic.h
> b/xen/include/asm-x86/hvm/vlapic.h 	index bf59b95..fc8d131 100644 	---
> a/xen/include/asm-x86/hvm/vlapic.h 	+++
> b/xen/include/asm-x86/hvm/vlapic.h 	@@ -61,6 +61,8 @@
> 
> 	 #define VEC_POS(v) ((v) % 32) 	 #define REG_POS(v) (((v) / 32) * 0x10)
> 	+#define vlapic_test_vector(vec, bitmap)                         \ 	+  
>  test_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec))) 	 #define
> vlapic_test_and_set_vector(vec, bitmap) \ 	    
> test_and_set_bit(VEC_POS(vec), (uint32_t *)((bitmap) + REG_POS(vec))) 	
> #define vlapic_test_and_clear_vector(vec, bitmap)
> 
> 
> 
> 		Olaf
> 
> 
> 
> 	Best regards,
> 	Yang
> 
> 
> 
> 
> 
> 	_______________________________________________
> 	Xen-devel mailing list
> 	Xen-devel@lists.xen.org
> 	http://lists.xen.org/xen-devel
>


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

* Re: [PATCH] x86/hvm: implement save/restore for posted interrupts
  2014-10-09 14:11                                                     ` Olaf Hering
@ 2014-10-10  0:52                                                       ` Zhang, Yang Z
  0 siblings, 0 replies; 60+ messages in thread
From: Zhang, Yang Z @ 2014-10-10  0:52 UTC (permalink / raw)
  To: Olaf Hering
  Cc: Tian, Kevin, Dong, Eddie, Nakajima, Jun, Jan Beulich, xen-devel

Olaf Hering wrote on 2014-10-09:
> On Thu, Oct 09, Olaf Hering wrote:
> 
>> On Thu, Oct 09, Zhang, Yang Z wrote:
>> 
>>> +    for ( vector = 0; vector < NR_VECTORS; vector++ )
>>> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))
>>> +            set_bit(vector,  v->arch.hvm_vmx.eoi_exit_bitmap);
>> 
>> Did you send out the wrong version a few weeks ago? I tested the old

Indeed, the previous version is wrong.:( That also explains why it always fails to work on your side but works to me.

>> patch at that time. Now I notice the new version has this change:
>> 
>> -        if (vlapic_test_vector(vector, &s->regs->data[APIC_ISR]))$
>> +        if (vlapic_test_vector(vector, &s->regs->data[APIC_TMR]))$
>> 
>> I will try the new version.
> 
> Yes, this change finally worked also for me. Thanks!

Great! I will refine it and send out the formal version.

> 
> 
> Olaf


Best regards,
Yang

^ permalink raw reply	[flat|nested] 60+ messages in thread

end of thread, other threads:[~2014-10-10  0:52 UTC | newest]

Thread overview: 60+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-03 15:09 [PATCH] x86/hvm: implement save/restore for posted interrupts Olaf Hering
2014-07-03 15:27 ` Jan Beulich
2014-07-09 11:24 ` Zhang, Yang Z
2014-07-09 21:14   ` Tian, Kevin
2014-07-16 14:28     ` Olaf Hering
2014-07-16 16:11       ` Tian, Kevin
2014-07-17  7:11         ` Olaf Hering
2014-07-17  9:29           ` Zhang, Yang Z
2014-07-18 13:58             ` Olaf Hering
2014-07-22 23:06               ` Zhang, Yang Z
2014-07-23  8:03                 ` Olaf Hering
2014-07-25 21:38             ` Tian, Kevin
2014-07-28  6:52               ` Olaf Hering
2014-07-29  7:48                 ` Zhang, Yang Z
2014-07-29  8:27                   ` Jan Beulich
2014-08-02 10:59                   ` Olaf Hering
2014-08-04  1:08                     ` Zhang, Yang Z
2014-08-04  7:50                       ` Olaf Hering
2014-08-04  7:57                         ` Zhang, Yang Z
2014-08-05 10:32                           ` Olaf Hering
2014-08-08  0:18                             ` Zhang, Yang Z
2014-09-01  6:44                             ` Zhang, Yang Z
2014-09-18  3:29                             ` Zhang, Yang Z
2014-09-19 13:32                               ` Olaf Hering
2014-09-19 13:39                                 ` Jan Beulich
2014-09-19 17:51                                   ` Andrew Cooper
2014-09-22 22:43                                     ` Tian, Kevin
2014-09-24 16:26                                       ` Malcolm Crossley
2014-09-30 16:19                                 ` Jan Beulich
2014-10-01  8:07                                   ` Olaf Hering
2014-10-01  8:29                                     ` Jan Beulich
2014-10-01 20:13                                       ` Olaf Hering
2014-10-02  6:51                                         ` Jan Beulich
2014-10-02  8:10                                           ` Andrew Cooper
2014-10-02  8:20                                             ` Olaf Hering
2014-10-08  7:37                                         ` Zhang, Yang Z
2014-10-08  7:40                                           ` Olaf Hering
2014-10-08  7:55                                             ` Zhang, Yang Z
2014-10-08  8:28                                           ` Olaf Hering
2014-10-08  8:35                                             ` Zhang, Yang Z
2014-10-08  8:54                                               ` Olaf Hering
2014-10-09  7:24                                                 ` Zhang, Yang Z
2014-10-09  8:41                                                   ` Fabio Fantoni
2014-10-09  8:43                                                     ` Zhang, Yang Z
2014-10-09 12:49                                                       ` Fabio Fantoni
2014-10-09 11:35                                                   ` Malcolm Crossley
2014-10-09 13:19                                                   ` Andrew Cooper
2014-10-10  0:48                                                     ` Zhang, Yang Z
2014-10-09 13:31                                                   ` Olaf Hering
2014-10-09 14:11                                                     ` Olaf Hering
2014-10-10  0:52                                                       ` Zhang, Yang Z
2014-07-25  9:40 ` Jan Beulich
2014-07-25 12:49   ` Tian, Kevin
2014-07-25 13:59     ` Jan Beulich
2014-07-25 21:31       ` Tian, Kevin
2014-07-28  6:46         ` Jan Beulich
2014-07-28  7:52           ` Wu, Feng
2014-07-28  8:17           ` Zhang, Yang Z
2014-07-28  9:02             ` Jan Beulich
2014-07-28  9:50               ` Liuqiming (John)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.