All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/2] arm: read/write rank->vcpu atomically
@ 2017-02-11  2:05 Stefano Stabellini
  2017-02-11  2:05 ` [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr Stefano Stabellini
  2017-02-16 18:55 ` [PATCH v4 1/2] arm: read/write rank->vcpu atomically Julien Grall
  0 siblings, 2 replies; 9+ messages in thread
From: Stefano Stabellini @ 2017-02-11  2:05 UTC (permalink / raw)
  To: xen-devel; +Cc: julien.grall, sstabellini

We don't need a lock in vgic_get_target_vcpu anymore, solving the
following lock inversion bug: the rank lock should be taken first, then
the vgic lock. However, gic_update_one_lr is called with the vgic lock
held, and it calls vgic_get_target_vcpu, which tries to obtain the rank
lock.

Coverity-ID: 1381855
Coverity-ID: 1381853

Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
---
 xen/arch/arm/vgic-v2.c |  6 +++---
 xen/arch/arm/vgic-v3.c |  6 +++---
 xen/arch/arm/vgic.c    | 27 +++++----------------------
 3 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
index 3dbcfe8..b30379e 100644
--- a/xen/arch/arm/vgic-v2.c
+++ b/xen/arch/arm/vgic-v2.c
@@ -79,7 +79,7 @@ static uint32_t vgic_fetch_itargetsr(struct vgic_irq_rank *rank,
     offset &= ~(NR_TARGETS_PER_ITARGETSR - 1);
 
     for ( i = 0; i < NR_TARGETS_PER_ITARGETSR; i++, offset++ )
-        reg |= (1 << rank->vcpu[offset]) << (i * NR_BITS_PER_TARGET);
+        reg |= (1 << read_atomic(&rank->vcpu[offset])) << (i * NR_BITS_PER_TARGET);
 
     return reg;
 }
@@ -152,7 +152,7 @@ static void vgic_store_itargetsr(struct domain *d, struct vgic_irq_rank *rank,
         /* The vCPU ID always starts from 0 */
         new_target--;
 
-        old_target = rank->vcpu[offset];
+        old_target = read_atomic(&rank->vcpu[offset]);
 
         /* Only migrate the vIRQ if the target vCPU has changed */
         if ( new_target != old_target )
@@ -162,7 +162,7 @@ static void vgic_store_itargetsr(struct domain *d, struct vgic_irq_rank *rank,
                              virq);
         }
 
-        rank->vcpu[offset] = new_target;
+        write_atomic(&rank->vcpu[offset], new_target);
     }
 }
 
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index d61479d..7dc9b6f 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -108,7 +108,7 @@ static uint64_t vgic_fetch_irouter(struct vgic_irq_rank *rank,
     /* Get the index in the rank */
     offset &= INTERRUPT_RANK_MASK;
 
-    return vcpuid_to_vaffinity(rank->vcpu[offset]);
+    return vcpuid_to_vaffinity(read_atomic(&rank->vcpu[offset]));
 }
 
 /*
@@ -136,7 +136,7 @@ static void vgic_store_irouter(struct domain *d, struct vgic_irq_rank *rank,
     offset &= virq & INTERRUPT_RANK_MASK;
 
     new_vcpu = vgic_v3_irouter_to_vcpu(d, irouter);
-    old_vcpu = d->vcpu[rank->vcpu[offset]];
+    old_vcpu = d->vcpu[read_atomic(&rank->vcpu[offset])];
 
     /*
      * From the spec (see 8.9.13 in IHI 0069A), any write with an
@@ -154,7 +154,7 @@ static void vgic_store_irouter(struct domain *d, struct vgic_irq_rank *rank,
     if ( new_vcpu != old_vcpu )
         vgic_migrate_irq(old_vcpu, new_vcpu, virq);
 
-    rank->vcpu[offset] = new_vcpu->vcpu_id;
+    write_atomic(&rank->vcpu[offset], new_vcpu->vcpu_id);
 }
 
 static inline bool vgic_reg64_check_access(struct hsr_dabt dabt)
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 364d5f0..3dd9044 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -85,7 +85,7 @@ static void vgic_rank_init(struct vgic_irq_rank *rank, uint8_t index,
     rank->index = index;
 
     for ( i = 0; i < NR_INTERRUPT_PER_RANK; i++ )
-        rank->vcpu[i] = vcpu;
+        write_atomic(&rank->vcpu[i], vcpu);
 }
 
 int domain_vgic_register(struct domain *d, int *mmio_count)
@@ -218,28 +218,11 @@ int vcpu_vgic_free(struct vcpu *v)
     return 0;
 }
 
-/* The function should be called by rank lock taken. */
-static struct vcpu *__vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
-{
-    struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
-
-    ASSERT(spin_is_locked(&rank->lock));
-
-    return v->domain->vcpu[rank->vcpu[virq & INTERRUPT_RANK_MASK]];
-}
-
-/* takes the rank lock */
 struct vcpu *vgic_get_target_vcpu(struct vcpu *v, unsigned int virq)
 {
-    struct vcpu *v_target;
     struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
-    unsigned long flags;
-
-    vgic_lock_rank(v, rank, flags);
-    v_target = __vgic_get_target_vcpu(v, virq);
-    vgic_unlock_rank(v, rank, flags);
-
-    return v_target;
+    int target = read_atomic(&rank->vcpu[virq & INTERRUPT_RANK_MASK]);
+    return v->domain->vcpu[target];
 }
 
 static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
@@ -326,7 +309,7 @@ void vgic_disable_irqs(struct vcpu *v, uint32_t r, int n)
 
     while ( (i = find_next_bit(&mask, 32, i)) < 32 ) {
         irq = i + (32 * n);
-        v_target = __vgic_get_target_vcpu(v, irq);
+        v_target = vgic_get_target_vcpu(v, irq);
         p = irq_to_pending(v_target, irq);
         clear_bit(GIC_IRQ_GUEST_ENABLED, &p->status);
         gic_remove_from_queues(v_target, irq);
@@ -368,7 +351,7 @@ void vgic_enable_irqs(struct vcpu *v, uint32_t r, int n)
 
     while ( (i = find_next_bit(&mask, 32, i)) < 32 ) {
         irq = i + (32 * n);
-        v_target = __vgic_get_target_vcpu(v, irq);
+        v_target = vgic_get_target_vcpu(v, irq);
         p = irq_to_pending(v_target, irq);
         set_bit(GIC_IRQ_GUEST_ENABLED, &p->status);
         spin_lock_irqsave(&v_target->arch.vgic.lock, flags);
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr
  2017-02-11  2:05 [PATCH v4 1/2] arm: read/write rank->vcpu atomically Stefano Stabellini
@ 2017-02-11  2:05 ` Stefano Stabellini
  2017-02-14  0:29   ` Stefano Stabellini
  2017-02-16 19:36   ` Julien Grall
  2017-02-16 18:55 ` [PATCH v4 1/2] arm: read/write rank->vcpu atomically Julien Grall
  1 sibling, 2 replies; 9+ messages in thread
From: Stefano Stabellini @ 2017-02-11  2:05 UTC (permalink / raw)
  To: xen-devel; +Cc: julien.grall, sstabellini

Concurrent execution of gic_update_one_lr and vgic_store_itargetsr can
result in the wrong pcpu being set as irq target, see
http://marc.info/?l=xen-devel&m=148218667104072.

To solve the issue, add barriers, remove an irq from the inflight
queue, only after the affinity has been set. On the other end, write the
new vcpu target, before checking GIC_IRQ_GUEST_MIGRATING and inflight.

Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
---
 xen/arch/arm/gic.c     | 3 ++-
 xen/arch/arm/vgic-v2.c | 4 ++--
 xen/arch/arm/vgic-v3.c | 4 +++-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index a5348f2..bb52959 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -503,12 +503,13 @@ static void gic_update_one_lr(struct vcpu *v, int i)
              !test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
             gic_raise_guest_irq(v, irq, p->priority);
         else {
-            list_del_init(&p->inflight);
             if ( test_and_clear_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
             {
                 struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
                 irq_set_affinity(p->desc, cpumask_of(v_target->processor));
             }
+            smp_mb();
+            list_del_init(&p->inflight);
         }
     }
 }
diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
index b30379e..f47286e 100644
--- a/xen/arch/arm/vgic-v2.c
+++ b/xen/arch/arm/vgic-v2.c
@@ -153,6 +153,8 @@ static void vgic_store_itargetsr(struct domain *d, struct vgic_irq_rank *rank,
         new_target--;
 
         old_target = read_atomic(&rank->vcpu[offset]);
+        write_atomic(&rank->vcpu[offset], new_target);
+        smp_mb();
 
         /* Only migrate the vIRQ if the target vCPU has changed */
         if ( new_target != old_target )
@@ -161,8 +163,6 @@ static void vgic_store_itargetsr(struct domain *d, struct vgic_irq_rank *rank,
                              d->vcpu[new_target],
                              virq);
         }
-
-        write_atomic(&rank->vcpu[offset], new_target);
     }
 }
 
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index 7dc9b6f..e826666 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -150,11 +150,13 @@ static void vgic_store_irouter(struct domain *d, struct vgic_irq_rank *rank,
     if ( !new_vcpu )
         return;
 
+    write_atomic(&rank->vcpu[offset], new_vcpu->vcpu_id);
+    smp_mb();
+
     /* Only migrate the IRQ if the target vCPU has changed */
     if ( new_vcpu != old_vcpu )
         vgic_migrate_irq(old_vcpu, new_vcpu, virq);
 
-    write_atomic(&rank->vcpu[offset], new_vcpu->vcpu_id);
 }
 
 static inline bool vgic_reg64_check_access(struct hsr_dabt dabt)
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr
  2017-02-11  2:05 ` [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr Stefano Stabellini
@ 2017-02-14  0:29   ` Stefano Stabellini
  2017-02-16 19:36   ` Julien Grall
  1 sibling, 0 replies; 9+ messages in thread
From: Stefano Stabellini @ 2017-02-14  0:29 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, julien.grall

On Fri, 10 Feb 2017, Stefano Stabellini wrote:
> Concurrent execution of gic_update_one_lr and vgic_store_itargetsr can
> result in the wrong pcpu being set as irq target, see
> http://marc.info/?l=xen-devel&m=148218667104072.
> 
> To solve the issue, add barriers, remove an irq from the inflight
> queue, only after the affinity has been set. On the other end, write the
> new vcpu target, before checking GIC_IRQ_GUEST_MIGRATING and inflight.
> 
> Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>

For reference, with this patch, gic_update_one_lr and vgic_store_itargetsr
can still run simultaneously on two cpus without any lock protections. This
scenario can happen:

  <GIC_IRQ_GUEST_MIGRATING is set>      <GIC_IRQ_GUEST_MIGRATING is set>
  CPU0: gic_update_one_lr               CPU1: vgic_store_itargetsr
  ----------------------------------------------------------------------
  clear GIC_IRQ_GUEST_MIGRATING
                                        set rank->vcpu (final)
                                        vgic_migrate_irq
                                          if (inflight) set GIC_IRQ_GUEST_MIGRATING
  read rank->vcpu (final)
  irq_set_affinity (final)
  remove from inflight

At this point, the affinity is correctly set to the right pcpu and
everything else is OK too, except that GIC_IRQ_GUEST_MIGRATING is still
set, while inflight has been cleared.

What does that mean? Next time gic_update_one_lr is called again
cleaning up this interrupt, the GIC_IRQ_GUEST_MIGRATING path is taken
again, even though it's not necessary. irq_set_affinity is called one
more time, but should be harmless. However, GIC_IRQ_GUEST_QUEUED is
ignored, but I think that's acceptable: it is only set at this stage if
a second interrupt came through while the first one was active (not
possible for level interrupts) and the vcpu wasn't current. In that
case, the second interrupt will be lost, but next time we get an
interrupt it will be injected as usual.

If that's not acceptable, we can avoid this scenario by using this patch

http://marc.info/?l=xen-devel&m=148237295920492


> ---
>  xen/arch/arm/gic.c     | 3 ++-
>  xen/arch/arm/vgic-v2.c | 4 ++--
>  xen/arch/arm/vgic-v3.c | 4 +++-
>  3 files changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index a5348f2..bb52959 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -503,12 +503,13 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>               !test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
>              gic_raise_guest_irq(v, irq, p->priority);
>          else {
> -            list_del_init(&p->inflight);
>              if ( test_and_clear_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
>              {
>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
>              }
> +            smp_mb();
> +            list_del_init(&p->inflight);
>          }
>      }
>  }
> diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
> index b30379e..f47286e 100644
> --- a/xen/arch/arm/vgic-v2.c
> +++ b/xen/arch/arm/vgic-v2.c
> @@ -153,6 +153,8 @@ static void vgic_store_itargetsr(struct domain *d, struct vgic_irq_rank *rank,
>          new_target--;
>  
>          old_target = read_atomic(&rank->vcpu[offset]);
> +        write_atomic(&rank->vcpu[offset], new_target);
> +        smp_mb();
>  
>          /* Only migrate the vIRQ if the target vCPU has changed */
>          if ( new_target != old_target )
> @@ -161,8 +163,6 @@ static void vgic_store_itargetsr(struct domain *d, struct vgic_irq_rank *rank,
>                               d->vcpu[new_target],
>                               virq);
>          }
> -
> -        write_atomic(&rank->vcpu[offset], new_target);
>      }
>  }
>  
> diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
> index 7dc9b6f..e826666 100644
> --- a/xen/arch/arm/vgic-v3.c
> +++ b/xen/arch/arm/vgic-v3.c
> @@ -150,11 +150,13 @@ static void vgic_store_irouter(struct domain *d, struct vgic_irq_rank *rank,
>      if ( !new_vcpu )
>          return;
>  
> +    write_atomic(&rank->vcpu[offset], new_vcpu->vcpu_id);
> +    smp_mb();
> +
>      /* Only migrate the IRQ if the target vCPU has changed */
>      if ( new_vcpu != old_vcpu )
>          vgic_migrate_irq(old_vcpu, new_vcpu, virq);
>  
> -    write_atomic(&rank->vcpu[offset], new_vcpu->vcpu_id);
>  }
>  
>  static inline bool vgic_reg64_check_access(struct hsr_dabt dabt)
> -- 
> 1.9.1
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 1/2] arm: read/write rank->vcpu atomically
  2017-02-11  2:05 [PATCH v4 1/2] arm: read/write rank->vcpu atomically Stefano Stabellini
  2017-02-11  2:05 ` [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr Stefano Stabellini
@ 2017-02-16 18:55 ` Julien Grall
  2017-02-16 19:59   ` Stefano Stabellini
  1 sibling, 1 reply; 9+ messages in thread
From: Julien Grall @ 2017-02-16 18:55 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel; +Cc: nd

Hi Stefano,

On 11/02/17 02:05, Stefano Stabellini wrote:
> We don't need a lock in vgic_get_target_vcpu anymore, solving the
> following lock inversion bug: the rank lock should be taken first, then
> the vgic lock. However, gic_update_one_lr is called with the vgic lock
> held, and it calls vgic_get_target_vcpu, which tries to obtain the rank
> lock.
>
> Coverity-ID: 1381855
> Coverity-ID: 1381853
>
> Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
> ---
>  xen/arch/arm/vgic-v2.c |  6 +++---
>  xen/arch/arm/vgic-v3.c |  6 +++---
>  xen/arch/arm/vgic.c    | 27 +++++----------------------
>  3 files changed, 11 insertions(+), 28 deletions(-)
>
> diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
> index 3dbcfe8..b30379e 100644
> --- a/xen/arch/arm/vgic-v2.c
> +++ b/xen/arch/arm/vgic-v2.c
> @@ -79,7 +79,7 @@ static uint32_t vgic_fetch_itargetsr(struct vgic_irq_rank *rank,
>      offset &= ~(NR_TARGETS_PER_ITARGETSR - 1);
>
>      for ( i = 0; i < NR_TARGETS_PER_ITARGETSR; i++, offset++ )
> -        reg |= (1 << rank->vcpu[offset]) << (i * NR_BITS_PER_TARGET);
> +        reg |= (1 << read_atomic(&rank->vcpu[offset])) << (i * NR_BITS_PER_TARGET);

I was about to suggested to turn vcpu into an atomic_t to catch 
potential misuse. But unfortunately atomic_t is int.

So I would probably add a comment on top of the field vcpu in 
vgic_irq_rank explaining that vcpu should be read using atomic.

With that:

Reviewed-by: Julien Grall <julien.grall@arm.com>

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr
  2017-02-11  2:05 ` [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr Stefano Stabellini
  2017-02-14  0:29   ` Stefano Stabellini
@ 2017-02-16 19:36   ` Julien Grall
  2017-02-16 22:10     ` Stefano Stabellini
  1 sibling, 1 reply; 9+ messages in thread
From: Julien Grall @ 2017-02-16 19:36 UTC (permalink / raw)
  To: Stefano Stabellini, xen-devel; +Cc: nd

Hi Stefano,

On 11/02/17 02:05, Stefano Stabellini wrote:
> Concurrent execution of gic_update_one_lr and vgic_store_itargetsr can
> result in the wrong pcpu being set as irq target, see
> http://marc.info/?l=xen-devel&m=148218667104072.
>
> To solve the issue, add barriers, remove an irq from the inflight
> queue, only after the affinity has been set. On the other end, write the
> new vcpu target, before checking GIC_IRQ_GUEST_MIGRATING and inflight.
>
> Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
> ---
>  xen/arch/arm/gic.c     | 3 ++-
>  xen/arch/arm/vgic-v2.c | 4 ++--
>  xen/arch/arm/vgic-v3.c | 4 +++-
>  3 files changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index a5348f2..bb52959 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -503,12 +503,13 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>               !test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
>              gic_raise_guest_irq(v, irq, p->priority);
>          else {
> -            list_del_init(&p->inflight);
>              if ( test_and_clear_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
>              {
>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
>              }
> +            smp_mb();
> +            list_del_init(&p->inflight);

I don't understand why you remove from the inflight list afterwards. If 
you do that you introduce that same problem as discussed in
<7a78c859-fa6f-ba10-b574-d8edd46ea934@arm.com>

As long as the interrupt is routed to the pCPU running 
gic_update_one_lr, the interrupt cannot fired because the interrupts are 
masked. However, as soon as irq_set_affinity is called the interrupt may 
fire on the other pCPU.

However, list_del_init is not atomic and not protected by any lock. So 
vgic_vcpu_inject_irq may see a corrupted version of {p,n}->inflight.

Did I miss anything?

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 1/2] arm: read/write rank->vcpu atomically
  2017-02-16 18:55 ` [PATCH v4 1/2] arm: read/write rank->vcpu atomically Julien Grall
@ 2017-02-16 19:59   ` Stefano Stabellini
  0 siblings, 0 replies; 9+ messages in thread
From: Stefano Stabellini @ 2017-02-16 19:59 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, nd, Stefano Stabellini

On Thu, 16 Feb 2017, Julien Grall wrote:
> Hi Stefano,
> 
> On 11/02/17 02:05, Stefano Stabellini wrote:
> > We don't need a lock in vgic_get_target_vcpu anymore, solving the
> > following lock inversion bug: the rank lock should be taken first, then
> > the vgic lock. However, gic_update_one_lr is called with the vgic lock
> > held, and it calls vgic_get_target_vcpu, which tries to obtain the rank
> > lock.
> > 
> > Coverity-ID: 1381855
> > Coverity-ID: 1381853
> > 
> > Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
> > ---
> >  xen/arch/arm/vgic-v2.c |  6 +++---
> >  xen/arch/arm/vgic-v3.c |  6 +++---
> >  xen/arch/arm/vgic.c    | 27 +++++----------------------
> >  3 files changed, 11 insertions(+), 28 deletions(-)
> > 
> > diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
> > index 3dbcfe8..b30379e 100644
> > --- a/xen/arch/arm/vgic-v2.c
> > +++ b/xen/arch/arm/vgic-v2.c
> > @@ -79,7 +79,7 @@ static uint32_t vgic_fetch_itargetsr(struct vgic_irq_rank
> > *rank,
> >      offset &= ~(NR_TARGETS_PER_ITARGETSR - 1);
> > 
> >      for ( i = 0; i < NR_TARGETS_PER_ITARGETSR; i++, offset++ )
> > -        reg |= (1 << rank->vcpu[offset]) << (i * NR_BITS_PER_TARGET);
> > +        reg |= (1 << read_atomic(&rank->vcpu[offset])) << (i *
> > NR_BITS_PER_TARGET);
> 
> I was about to suggested to turn vcpu into an atomic_t to catch potential
> misuse. But unfortunately atomic_t is int.

Indeed


> So I would probably add a comment on top of the field vcpu in vgic_irq_rank
> explaining that vcpu should be read using atomic.
>
> With that:
> 
> Reviewed-by: Julien Grall <julien.grall@arm.com>

Thank you, I added:

+     * Use atomic operations to read/write the vcpu fields to avoid
+     * taking the rank lock.

and committed.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr
  2017-02-16 19:36   ` Julien Grall
@ 2017-02-16 22:10     ` Stefano Stabellini
  2017-02-16 23:12       ` Julien Grall
  0 siblings, 1 reply; 9+ messages in thread
From: Stefano Stabellini @ 2017-02-16 22:10 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, nd, Stefano Stabellini

On Thu, 16 Feb 2017, Julien Grall wrote:
> Hi Stefano,
> 
> On 11/02/17 02:05, Stefano Stabellini wrote:
> > Concurrent execution of gic_update_one_lr and vgic_store_itargetsr can
> > result in the wrong pcpu being set as irq target, see
> > http://marc.info/?l=xen-devel&m=148218667104072.
> > 
> > To solve the issue, add barriers, remove an irq from the inflight
> > queue, only after the affinity has been set. On the other end, write the
> > new vcpu target, before checking GIC_IRQ_GUEST_MIGRATING and inflight.
> > 
> > Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
> > ---
> >  xen/arch/arm/gic.c     | 3 ++-
> >  xen/arch/arm/vgic-v2.c | 4 ++--
> >  xen/arch/arm/vgic-v3.c | 4 +++-
> >  3 files changed, 7 insertions(+), 4 deletions(-)
> > 
> > diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> > index a5348f2..bb52959 100644
> > --- a/xen/arch/arm/gic.c
> > +++ b/xen/arch/arm/gic.c
> > @@ -503,12 +503,13 @@ static void gic_update_one_lr(struct vcpu *v, int i)
> >               !test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
> >              gic_raise_guest_irq(v, irq, p->priority);
> >          else {
> > -            list_del_init(&p->inflight);
> >              if ( test_and_clear_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
> >              {
> >                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
> >                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
> >              }
> > +            smp_mb();
> > +            list_del_init(&p->inflight);
> 
> I don't understand why you remove from the inflight list afterwards. If you do
> that you introduce that same problem as discussed in
> <7a78c859-fa6f-ba10-b574-d8edd46ea934@arm.com>
>
> As long as the interrupt is routed to the pCPU running gic_update_one_lr, the
> interrupt cannot fired because the interrupts are masked.  

This is not accurate: it is possible to receive a second interrupt
notification while the first one is still active.


> However, as soon as irq_set_affinity is called the interrupt may fire
> on the other pCPU.

This is true.


> However, list_del_init is not atomic and not protected by any lock. So
> vgic_vcpu_inject_irq may see a corrupted version of {p,n}->inflight.
> 
> Did I miss anything?

Moving list_del_init later ensures that there are no conflicts between
gic_update_one_lr and vgic_store_itargetsr (more specifically,
vgic_migrate_irq). If you look at the implementation of
vgic_migrate_irq, all checks depends on list_empty(&p->inflight). If we
don't move list_del_init later, given that vgic_migrate_irq can be
called with a different vgic lock taken than gic_update_one_lr, the
following scenario can happen:


  <GIC_IRQ_GUEST_MIGRATING is set>      <GIC_IRQ_GUEST_MIGRATING is set>
  CPU0: gic_update_one_lr               CPU1: vgic_store_itargetsr
  ----------------------------------------------------------------------
  remove from inflight
  clear GIC_IRQ_GUEST_MIGRATING
  read rank->vcpu (intermediate)
                                        set rank->vcpu (final)
                                        vgic_migrate_irq
                                          if (!inflight) irq_set_affinity (final)
  irq_set_affinity (intermediate)


As a result, the irq affinity is set to the wrong cpu. With this patch,
this problem doesn't occur.

However, you are right that both in the case of gic_update_one_lr and
vgic_migrate_irq, as well as the case of gic_update_one_lr and
vgic_vcpu_inject_irq that you mentioned, list_del_init (from
gic_update_one_lr) is potentially run as the same time as list_empty
(from vgic_migrate_irq or from vgic_vcpu_inject_irq), and they are not
atomic.

Also see this other potential issue: http://marc.info/?l=xen-devel&m=148703220714075 

All these concurrent accesses are difficult to understand and to deal
with. This is why my original suggestion was to use the old vcpu vgic
lock, rather then try to ensure safe concurrent accesses everywhere.
That option is still open and would solve both problems.

We only need to:

- store the vcpu to which an irq is currently injected
http://marc.info/?l=xen-devel&m=148237295020488
- check the new irq->vcpu field, and take the right vgic lock
something like http://marc.info/?l=xen-devel&m=148237295920492&w=2, but
would need improvements

Much simpler, right?

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr
  2017-02-16 22:10     ` Stefano Stabellini
@ 2017-02-16 23:12       ` Julien Grall
  2017-02-16 23:39         ` Stefano Stabellini
  0 siblings, 1 reply; 9+ messages in thread
From: Julien Grall @ 2017-02-16 23:12 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: xen-devel, nd



On 16/02/2017 22:10, Stefano Stabellini wrote:
> On Thu, 16 Feb 2017, Julien Grall wrote:
>> Hi Stefano,
>>
>> On 11/02/17 02:05, Stefano Stabellini wrote:
>>> Concurrent execution of gic_update_one_lr and vgic_store_itargetsr can
>>> result in the wrong pcpu being set as irq target, see
>>> http://marc.info/?l=xen-devel&m=148218667104072.
>>>
>>> To solve the issue, add barriers, remove an irq from the inflight
>>> queue, only after the affinity has been set. On the other end, write the
>>> new vcpu target, before checking GIC_IRQ_GUEST_MIGRATING and inflight.
>>>
>>> Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
>>> ---
>>>  xen/arch/arm/gic.c     | 3 ++-
>>>  xen/arch/arm/vgic-v2.c | 4 ++--
>>>  xen/arch/arm/vgic-v3.c | 4 +++-
>>>  3 files changed, 7 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
>>> index a5348f2..bb52959 100644
>>> --- a/xen/arch/arm/gic.c
>>> +++ b/xen/arch/arm/gic.c
>>> @@ -503,12 +503,13 @@ static void gic_update_one_lr(struct vcpu *v, int i)
>>>               !test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
>>>              gic_raise_guest_irq(v, irq, p->priority);
>>>          else {
>>> -            list_del_init(&p->inflight);
>>>              if ( test_and_clear_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
>>>              {
>>>                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
>>>                  irq_set_affinity(p->desc, cpumask_of(v_target->processor));
>>>              }
>>> +            smp_mb();
>>> +            list_del_init(&p->inflight);
>>
>> I don't understand why you remove from the inflight list afterwards. If you do
>> that you introduce that same problem as discussed in
>> <7a78c859-fa6f-ba10-b574-d8edd46ea934@arm.com>
>>
>> As long as the interrupt is routed to the pCPU running gic_update_one_lr, the
>> interrupt cannot fired because the interrupts are masked.
>
> This is not accurate: it is possible to receive a second interrupt
> notification while the first one is still active.

Can you detail here? Because if you look at how gic_update_one_lr is 
called from gic_clear_lrs, interrupts are masked.

So it cannot be received by Xen while you are in gic_update_one_lr and 
before irq_set_affinity is called.

>
>
>> However, as soon as irq_set_affinity is called the interrupt may fire
>> on the other pCPU.
>
> This is true.
>
>
>> However, list_del_init is not atomic and not protected by any lock. So
>> vgic_vcpu_inject_irq may see a corrupted version of {p,n}->inflight.
>>
>> Did I miss anything?
>
> Moving list_del_init later ensures that there are no conflicts between
> gic_update_one_lr and vgic_store_itargetsr (more specifically,
> vgic_migrate_irq). If you look at the implementation of
> vgic_migrate_irq, all checks depends on list_empty(&p->inflight). If we
> don't move list_del_init later, given that vgic_migrate_irq can be
> called with a different vgic lock taken than gic_update_one_lr, the
> following scenario can happen:
>
>
>   <GIC_IRQ_GUEST_MIGRATING is set>      <GIC_IRQ_GUEST_MIGRATING is set>
>   CPU0: gic_update_one_lr               CPU1: vgic_store_itargetsr
>   ----------------------------------------------------------------------
>   remove from inflight
>   clear GIC_IRQ_GUEST_MIGRATING
>   read rank->vcpu (intermediate)

It is only true if vgic_store_itargetsr is testing 
GIC_IRQ_GUEST_MIGRATING here and it was clear.

>                                         set rank->vcpu (final)
>                                         vgic_migrate_irq
>                                           if (!inflight) irq_set_affinity (final)
>   irq_set_affinity (intermediate)
>
>
> As a result, the irq affinity is set to the wrong cpu. With this patch,
> this problem doesn't occur.
>
> However, you are right that both in the case of gic_update_one_lr and
> vgic_migrate_irq, as well as the case of gic_update_one_lr and
> vgic_vcpu_inject_irq that you mentioned, list_del_init (from
> gic_update_one_lr) is potentially run as the same time as list_empty
> (from vgic_migrate_irq or from vgic_vcpu_inject_irq), and they are not
> atomic.
>
> Also see this other potential issue: http://marc.info/?l=xen-devel&m=148703220714075
>
> All these concurrent accesses are difficult to understand and to deal
> with. This is why my original suggestion was to use the old vcpu vgic
> lock, rather then try to ensure safe concurrent accesses everywhere.
> That option is still open and would solve both problems.
> We only need to:
>
> - store the vcpu to which an irq is currently injected
> http://marc.info/?l=xen-devel&m=148237295020488
> - check the new irq->vcpu field, and take the right vgic lock
> something like http://marc.info/?l=xen-devel&m=148237295920492&w=2, but
> would need improvements
>
> Much simpler, right?

Would not it be easier to just take the desc->lock to protect the 
concurrent access?

Cheers,

-- 
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr
  2017-02-16 23:12       ` Julien Grall
@ 2017-02-16 23:39         ` Stefano Stabellini
  0 siblings, 0 replies; 9+ messages in thread
From: Stefano Stabellini @ 2017-02-16 23:39 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, nd, Stefano Stabellini

On Thu, 16 Feb 2017, Julien Grall wrote:
> On 16/02/2017 22:10, Stefano Stabellini wrote:
> > On Thu, 16 Feb 2017, Julien Grall wrote:
> > > Hi Stefano,
> > > 
> > > On 11/02/17 02:05, Stefano Stabellini wrote:
> > > > Concurrent execution of gic_update_one_lr and vgic_store_itargetsr can
> > > > result in the wrong pcpu being set as irq target, see
> > > > http://marc.info/?l=xen-devel&m=148218667104072.
> > > > 
> > > > To solve the issue, add barriers, remove an irq from the inflight
> > > > queue, only after the affinity has been set. On the other end, write the
> > > > new vcpu target, before checking GIC_IRQ_GUEST_MIGRATING and inflight.
> > > > 
> > > > Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
> > > > ---
> > > >  xen/arch/arm/gic.c     | 3 ++-
> > > >  xen/arch/arm/vgic-v2.c | 4 ++--
> > > >  xen/arch/arm/vgic-v3.c | 4 +++-
> > > >  3 files changed, 7 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> > > > index a5348f2..bb52959 100644
> > > > --- a/xen/arch/arm/gic.c
> > > > +++ b/xen/arch/arm/gic.c
> > > > @@ -503,12 +503,13 @@ static void gic_update_one_lr(struct vcpu *v, int
> > > > i)
> > > >               !test_bit(GIC_IRQ_GUEST_MIGRATING, &p->status) )
> > > >              gic_raise_guest_irq(v, irq, p->priority);
> > > >          else {
> > > > -            list_del_init(&p->inflight);
> > > >              if ( test_and_clear_bit(GIC_IRQ_GUEST_MIGRATING,
> > > > &p->status) )
> > > >              {
> > > >                  struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
> > > >                  irq_set_affinity(p->desc,
> > > > cpumask_of(v_target->processor));
> > > >              }
> > > > +            smp_mb();
> > > > +            list_del_init(&p->inflight);
> > > 
> > > I don't understand why you remove from the inflight list afterwards. If
> > > you do
> > > that you introduce that same problem as discussed in
> > > <7a78c859-fa6f-ba10-b574-d8edd46ea934@arm.com>
> > > 
> > > As long as the interrupt is routed to the pCPU running gic_update_one_lr,
> > > the
> > > interrupt cannot fired because the interrupts are masked.
> > 
> > This is not accurate: it is possible to receive a second interrupt
> > notification while the first one is still active.
> 
> Can you detail here? Because if you look at how gic_update_one_lr is called
> from gic_clear_lrs, interrupts are masked.
> 
> So it cannot be received by Xen while you are in gic_update_one_lr and before
> irq_set_affinity is called.

Yes, you are right, I meant generally. In this case, it is as you say.


> > > However, as soon as irq_set_affinity is called the interrupt may fire
> > > on the other pCPU.
> > 
> > This is true.
> > 
> > 
> > > However, list_del_init is not atomic and not protected by any lock. So
> > > vgic_vcpu_inject_irq may see a corrupted version of {p,n}->inflight.
> > > 
> > > Did I miss anything?
> > 
> > Moving list_del_init later ensures that there are no conflicts between
> > gic_update_one_lr and vgic_store_itargetsr (more specifically,
> > vgic_migrate_irq). If you look at the implementation of
> > vgic_migrate_irq, all checks depends on list_empty(&p->inflight). If we
> > don't move list_del_init later, given that vgic_migrate_irq can be
> > called with a different vgic lock taken than gic_update_one_lr, the
> > following scenario can happen:
> > 
> > 
> >   <GIC_IRQ_GUEST_MIGRATING is set>      <GIC_IRQ_GUEST_MIGRATING is set>
> >   CPU0: gic_update_one_lr               CPU1: vgic_store_itargetsr
> >   ----------------------------------------------------------------------
> >   remove from inflight
> >   clear GIC_IRQ_GUEST_MIGRATING
> >   read rank->vcpu (intermediate)
> 
> It is only true if vgic_store_itargetsr is testing GIC_IRQ_GUEST_MIGRATING
> here and it was clear.

Right, that's the scenario, see the right colum. If you meant to say
something else, I couldn't understand, sorry.

I have been playing with rearranging these few lines of code in
gic_update_one_lr and vgic_store_itargetsr/vgic_migrate_irq quite a bit,
but I couldn't figure out a way to solve all races. This patch is one of
the best outcomes I found. If you can figure out a way to rearrange this
code to not be racy, but still lockless, let me know!


> >                                         set rank->vcpu (final)
> >                                         vgic_migrate_irq
> >                                           if (!inflight) irq_set_affinity
> > (final)
> >   irq_set_affinity (intermediate)
> > 
> > 
> > As a result, the irq affinity is set to the wrong cpu. With this patch,
> > this problem doesn't occur.
> > 
> > However, you are right that both in the case of gic_update_one_lr and
> > vgic_migrate_irq, as well as the case of gic_update_one_lr and
> > vgic_vcpu_inject_irq that you mentioned, list_del_init (from
> > gic_update_one_lr) is potentially run as the same time as list_empty
> > (from vgic_migrate_irq or from vgic_vcpu_inject_irq), and they are not
> > atomic.
> > 
> > Also see this other potential issue:
> > http://marc.info/?l=xen-devel&m=148703220714075
> > 
> > All these concurrent accesses are difficult to understand and to deal
> > with. This is why my original suggestion was to use the old vcpu vgic
> > lock, rather then try to ensure safe concurrent accesses everywhere.
> > That option is still open and would solve both problems.
> > We only need to:
> > 
> > - store the vcpu to which an irq is currently injected
> > http://marc.info/?l=xen-devel&m=148237295020488
> > - check the new irq->vcpu field, and take the right vgic lock
> > something like http://marc.info/?l=xen-devel&m=148237295920492&w=2, but
> > would need improvements
> > 
> > Much simpler, right?
> 
> Would not it be easier to just take the desc->lock to protect the concurrent
> access?

One more lock is almost never easier :) things are going to get more
entangled. Also given your accurate observation that
vgic_vcpu_inject_irq wouldn't be a problem if we place
list_del_init(&p->inflight) before irq_set_affinity, I think the problem
is rather limited and could be solved easily with the lock we have.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-02-16 23:39 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-11  2:05 [PATCH v4 1/2] arm: read/write rank->vcpu atomically Stefano Stabellini
2017-02-11  2:05 ` [PATCH v4 2/2] arm: proper ordering for correct execution of gic_update_one_lr and vgic_store_itargetsr Stefano Stabellini
2017-02-14  0:29   ` Stefano Stabellini
2017-02-16 19:36   ` Julien Grall
2017-02-16 22:10     ` Stefano Stabellini
2017-02-16 23:12       ` Julien Grall
2017-02-16 23:39         ` Stefano Stabellini
2017-02-16 18:55 ` [PATCH v4 1/2] arm: read/write rank->vcpu atomically Julien Grall
2017-02-16 19:59   ` Stefano Stabellini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.