All of lore.kernel.org
 help / color / mirror / Atom feed
* passthrough: improve interrupt injection locking
@ 2015-10-23 11:05 David Vrabel
  2015-10-23 11:05 ` [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt David Vrabel
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: David Vrabel @ 2015-10-23 11:05 UTC (permalink / raw)
  To: xen-devel; +Cc: David Vrabel, Jan Beulich, Ian Campbell

When injecting an interrupt for a passthrough device into a guest, the
per-domain event_lock is held, reducing performance when a guest has
many VCPUs and high interrupt rates.

By using a per-interrupt lock in the hot paths, this contention is
eliminated and performance improves (a bit).

For testing, a 32 VCPU guest with an NVME device assigned to it was
used.  Continual reads with small (512 B) blocks were performed on all
32 hardware queues simultaneously.

* Lock profiling:

Before (elapsed: 60 s):

(XEN) [ 3321.143155] Domain 1 event_lock:
(XEN) [ 3321.143158]   lock:    14411627(00000005:90714AEF),
                      block:     6658599(00000003:709F82BD)

After (elapsed: 60 s):

(XEN) [ 1253.921427] Domain 2 event_lock:
(XEN) [ 1253.921429]   lock:        8287(00000000:01AE517C),
                      block:          67(00000000:000D4C3A)

* Aggregate performance:

        MB/s
Before  60.8
After   68.4

David

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt
  2015-10-23 11:05 passthrough: improve interrupt injection locking David Vrabel
@ 2015-10-23 11:05 ` David Vrabel
  2015-10-27 11:56   ` Jan Beulich
  2015-10-23 11:05 ` [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs David Vrabel
  2015-10-23 12:37 ` passthrough: improve interrupt injection locking Ian Campbell
  2 siblings, 1 reply; 13+ messages in thread
From: David Vrabel @ 2015-10-23 11:05 UTC (permalink / raw)
  To: xen-devel; +Cc: David Vrabel, Jan Beulich, Ian Campbell

The use of the per-domain event_lock in hvm_dirq_assist() does not scale
with many VCPUs or interrupts.

Add a per-interrupt lock to reduce contention.  When a interrupt for a
passthrough device is being setup or teared down, we must take both
the event_lock and this new lock.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 xen/drivers/passthrough/io.c | 34 +++++++++++++++++++++++-----------
 xen/include/xen/hvm/irq.h    |  1 +
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index bda9374..7c86c20 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -106,7 +106,7 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
 {
     struct domain *d = pirq_dpci->dom;
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(spin_is_locked(&pirq_dpci->lock));
 
     switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
     {
@@ -209,7 +209,6 @@ int pt_irq_create_bind(
     if ( pirq < 0 || pirq >= d->nr_pirqs )
         return -EINVAL;
 
- restart:
     spin_lock(&d->event_lock);
 
     hvm_irq_dpci = domain_get_irq_dpci(d);
@@ -237,6 +236,8 @@ int pt_irq_create_bind(
     }
     pirq_dpci = pirq_dpci(info);
 
+    spin_lock(&pirq_dpci->lock);
+
     /*
      * A crude 'while' loop with us dropping the spinlock and giving
      * the softirq_dpci a chance to run.
@@ -245,11 +246,11 @@ int pt_irq_create_bind(
      * would have spun forever and would do the same thing (wait to flush out
      * outstanding hvm_dirq_assist calls.
      */
-    if ( pt_pirq_softirq_active(pirq_dpci) )
+    while ( pt_pirq_softirq_active(pirq_dpci) )
     {
-        spin_unlock(&d->event_lock);
+        spin_unlock(&pirq_dpci->lock);
         cpu_relax();
-        goto restart;
+        spin_lock(&pirq_dpci->lock);
     }
 
     switch ( pt_irq_bind->irq_type )
@@ -301,6 +302,7 @@ int pt_irq_create_bind(
                 pirq_dpci->dom = NULL;
                 pirq_dpci->flags = 0;
                 pirq_cleanup_check(info, d);
+                spin_unlock(&pirq_dpci->lock);
                 spin_unlock(&d->event_lock);
                 return rc;
             }
@@ -311,6 +313,7 @@ int pt_irq_create_bind(
 
             if ( (pirq_dpci->flags & mask) != mask )
             {
+                spin_unlock(&pirq_dpci->lock);
                 spin_unlock(&d->event_lock);
                 return -EBUSY;
             }
@@ -331,6 +334,7 @@ int pt_irq_create_bind(
         dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
         dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
         pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
+        spin_unlock(&pirq_dpci->lock);
         spin_unlock(&d->event_lock);
         if ( dest_vcpu_id >= 0 )
             hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
@@ -351,6 +355,7 @@ int pt_irq_create_bind(
 
         if ( !digl || !girq )
         {
+            spin_unlock(&pirq_dpci->lock);
             spin_unlock(&d->event_lock);
             xfree(girq);
             xfree(digl);
@@ -412,6 +417,7 @@ int pt_irq_create_bind(
                 hvm_irq_dpci->link_cnt[link]--;
                 pirq_dpci->flags = 0;
                 pirq_cleanup_check(info, d);
+                spin_unlock(&pirq_dpci->lock);
                 spin_unlock(&d->event_lock);
                 xfree(girq);
                 xfree(digl);
@@ -419,6 +425,7 @@ int pt_irq_create_bind(
             }
         }
 
+        spin_unlock(&pirq_dpci->lock);
         spin_unlock(&d->event_lock);
 
         if ( iommu_verbose )
@@ -430,6 +437,7 @@ int pt_irq_create_bind(
     }
 
     default:
+        spin_unlock(&pirq_dpci->lock);
         spin_unlock(&d->event_lock);
         return -EOPNOTSUPP;
     }
@@ -481,6 +489,8 @@ int pt_irq_destroy_bind(
     pirq = pirq_info(d, machine_gsi);
     pirq_dpci = pirq_dpci(pirq);
 
+    spin_lock(&pirq_dpci->lock);
+
     if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI )
     {
         unsigned int bus = pt_irq_bind->u.pci.bus;
@@ -549,6 +559,7 @@ int pt_irq_destroy_bind(
         pirq_cleanup_check(pirq, d);
     }
 
+    spin_unlock(&pirq_dpci->lock);
     spin_unlock(&d->event_lock);
 
     if ( what && iommu_verbose )
@@ -566,6 +577,7 @@ int pt_irq_destroy_bind(
 
 void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci)
 {
+    spin_lock_init(&dpci->lock);
     INIT_LIST_HEAD(&dpci->digl_list);
     dpci->gmsi.dest_vcpu_id = -1;
 }
@@ -621,7 +633,7 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
     return 1;
 }
 
-/* called with d->event_lock held */
+/* called with pirq_dhci->lock held */
 static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)
 {
     irq_desc_t *desc;
@@ -675,7 +687,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
 {
     ASSERT(d->arch.hvm_domain.irq.dpci);
 
-    spin_lock(&d->event_lock);
+    spin_lock(&pirq_dpci->lock);
     if ( test_and_clear_bool(pirq_dpci->masked) )
     {
         struct pirq *pirq = dpci_pirq(pirq_dpci);
@@ -687,7 +699,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
 
             if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
             {
-                spin_unlock(&d->event_lock);
+                spin_unlock(&pirq_dpci->lock);
                 return;
             }
         }
@@ -695,7 +707,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
         if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
         {
             vmsi_deliver_pirq(d, pirq_dpci);
-            spin_unlock(&d->event_lock);
+            spin_unlock(&pirq_dpci->lock);
             return;
         }
 
@@ -709,7 +721,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
         {
             /* for translated MSI to INTx interrupt, eoi as early as possible */
             __msi_pirq_eoi(pirq_dpci);
-            spin_unlock(&d->event_lock);
+            spin_unlock(&pirq_dpci->lock);
             return;
         }
 
@@ -723,7 +735,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
         ASSERT(pt_irq_need_timer(pirq_dpci->flags));
         set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
     }
-    spin_unlock(&d->event_lock);
+    spin_unlock(&pirq_dpci->lock);
 }
 
 static void __hvm_dpci_eoi(struct domain *d,
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 4c9cb20..8b8e461 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -91,6 +91,7 @@ struct hvm_irq_dpci {
 
 /* Machine IRQ to guest device/intx mapping. */
 struct hvm_pirq_dpci {
+    spinlock_t lock;
     uint32_t flags;
     unsigned int state;
     bool_t masked;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs
  2015-10-23 11:05 passthrough: improve interrupt injection locking David Vrabel
  2015-10-23 11:05 ` [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt David Vrabel
@ 2015-10-23 11:05 ` David Vrabel
  2015-10-27 12:44   ` Jan Beulich
  2015-10-28 20:42   ` Konrad Rzeszutek Wilk
  2015-10-23 12:37 ` passthrough: improve interrupt injection locking Ian Campbell
  2 siblings, 2 replies; 13+ messages in thread
From: David Vrabel @ 2015-10-23 11:05 UTC (permalink / raw)
  To: xen-devel; +Cc: David Vrabel, Jan Beulich, Ian Campbell

radix_tree_gang_lookup() only requires a RCU read lock, not the
per-domain event_lock.

Introduce a new RCU read lock and take the per-interrupt lock before
calling the callback instead.

This eliminates all contention on the event_lock when injecting
interrupts from passthrough devices.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 xen/drivers/passthrough/io.c | 10 +++++++---
 xen/include/xen/sched.h      |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 7c86c20..9b51ef0 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -601,7 +601,7 @@ int pt_pirq_iterate(struct domain *d,
     unsigned int pirq = 0, n, i;
     struct pirq *pirqs[8];
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    rcu_read_lock(&d->pirq_rcu_lock);
 
     do {
         n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
@@ -610,12 +610,18 @@ int pt_pirq_iterate(struct domain *d,
         {
             struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]);
 
+            spin_lock(&pirq_dpci->lock);
+
             pirq = pirqs[i]->pirq;
             if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
                 rc = cb(d, pirq_dpci, arg);
+
+            spin_unlock(&pirq_dpci->lock);
         }
     } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
 
+    rcu_read_unlock(&d->pirq_rcu_lock);
+
     return rc;
 }
 
@@ -678,9 +684,7 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
     if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
        return;
 
-    spin_lock(&d->event_lock);
     pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
-    spin_unlock(&d->event_lock);
 }
 
 static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 3729b0f..ae98c1e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -355,6 +355,7 @@ struct domain
      */
     struct radix_tree_root pirq_tree;
     unsigned int     nr_pirqs;
+    rcu_read_lock_t pirq_rcu_lock;
 
     enum guest_type guest_type;
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: passthrough: improve interrupt injection locking
  2015-10-23 11:05 passthrough: improve interrupt injection locking David Vrabel
  2015-10-23 11:05 ` [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt David Vrabel
  2015-10-23 11:05 ` [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs David Vrabel
@ 2015-10-23 12:37 ` Ian Campbell
  2015-10-23 12:38   ` David Vrabel
  2 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2015-10-23 12:37 UTC (permalink / raw)
  To: David Vrabel, xen-devel; +Cc: Jan Beulich

On Fri, 2015-10-23 at 12:05 +0100, David Vrabel wrote:
> When injecting an interrupt for a passthrough device into a guest, the
> per-domain event_lock is held, reducing performance when a guest has
> many VCPUs and high interrupt rates.

Did you CC me due to a possible impact on ARM? If so then I think since ARM
lacks this "dpci" stuff none of these changes should have any impact on
that arch.

If you think I've missed something or you CCd me for some other reason
please let me know.

Thanks,
Ian.

> 
> By using a per-interrupt lock in the hot paths, this contention is
> eliminated and performance improves (a bit).
> 
> For testing, a 32 VCPU guest with an NVME device assigned to it was
> used.  Continual reads with small (512 B) blocks were performed on all
> 32 hardware queues simultaneously.
> 
> * Lock profiling:
> 
> Before (elapsed: 60 s):
> 
> (XEN) [ 3321.143155] Domain 1 event_lock:
> (XEN) [ 3321.143158]   lock:    14411627(00000005:90714AEF),
>                       block:     6658599(00000003:709F82BD)
> 
> After (elapsed: 60 s):
> 
> (XEN) [ 1253.921427] Domain 2 event_lock:
> (XEN) [ 1253.921429]   lock:        8287(00000000:01AE517C),
>                       block:          67(00000000:000D4C3A)
> 
> * Aggregate performance:
> 
>         MB/s
> Before  60.8
> After   68.4
> 
> David
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: passthrough: improve interrupt injection locking
  2015-10-23 12:37 ` passthrough: improve interrupt injection locking Ian Campbell
@ 2015-10-23 12:38   ` David Vrabel
  2015-10-23 12:45     ` Ian Campbell
  0 siblings, 1 reply; 13+ messages in thread
From: David Vrabel @ 2015-10-23 12:38 UTC (permalink / raw)
  To: Ian Campbell, xen-devel; +Cc: Jan Beulich

On 23/10/15 13:37, Ian Campbell wrote:
> On Fri, 2015-10-23 at 12:05 +0100, David Vrabel wrote:
>> When injecting an interrupt for a passthrough device into a guest, the
>> per-domain event_lock is held, reducing performance when a guest has
>> many VCPUs and high interrupt rates.
> 
> Did you CC me due to a possible impact on ARM? If so then I think since ARM
> lacks this "dpci" stuff none of these changes should have any impact on
> that arch.
> 
> If you think I've missed something or you CCd me for some other reason
> please let me know.

This series seems to fall into "THE REST" category.

David

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: passthrough: improve interrupt injection locking
  2015-10-23 12:38   ` David Vrabel
@ 2015-10-23 12:45     ` Ian Campbell
  0 siblings, 0 replies; 13+ messages in thread
From: Ian Campbell @ 2015-10-23 12:45 UTC (permalink / raw)
  To: David Vrabel, xen-devel; +Cc: Jan Beulich

On Fri, 2015-10-23 at 13:38 +0100, David Vrabel wrote:
> On 23/10/15 13:37, Ian Campbell wrote:
> > On Fri, 2015-10-23 at 12:05 +0100, David Vrabel wrote:
> > > When injecting an interrupt for a passthrough device into a guest,
> > > the
> > > per-domain event_lock is held, reducing performance when a guest has
> > > many VCPUs and high interrupt rates.
> > 
> > Did you CC me due to a possible impact on ARM? If so then I think since
> > ARM
> > lacks this "dpci" stuff none of these changes should have any impact on
> > that arch.
> > 
> > If you think I've missed something or you CCd me for some other reason
> > please let me know.
> 
> This series seems to fall into "THE REST" category.

Ah, only Jan and I were CCd so I didn't think of that.

I think despite the paths this ends up being x86 specific, so I'll leave it
to Jan.

Ian.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt
  2015-10-23 11:05 ` [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt David Vrabel
@ 2015-10-27 11:56   ` Jan Beulich
  2015-10-28 20:18     ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2015-10-27 11:56 UTC (permalink / raw)
  To: David Vrabel, Konrad Rzeszutek Wilk; +Cc: xen-devel, Ian Campbell

>>> On 23.10.15 at 13:05, <david.vrabel@citrix.com> wrote:
> The use of the per-domain event_lock in hvm_dirq_assist() does not scale
> with many VCPUs or interrupts.
> 
> Add a per-interrupt lock to reduce contention.  When a interrupt for a
> passthrough device is being setup or teared down, we must take both
> the event_lock and this new lock.
> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>

Konrad, could you please take a look too?

> @@ -245,11 +246,11 @@ int pt_irq_create_bind(
>       * would have spun forever and would do the same thing (wait to flush out
>       * outstanding hvm_dirq_assist calls.
>       */
> -    if ( pt_pirq_softirq_active(pirq_dpci) )
> +    while ( pt_pirq_softirq_active(pirq_dpci) )
>      {
> -        spin_unlock(&d->event_lock);
> +        spin_unlock(&pirq_dpci->lock);
>          cpu_relax();
> -        goto restart;
> +        spin_lock(&pirq_dpci->lock);
>      }

The comment ahead of pt_pirq_softirq_active() (mentioning
event_lock) will thus need updating.

> @@ -481,6 +489,8 @@ int pt_irq_destroy_bind(
>      pirq = pirq_info(d, machine_gsi);
>      pirq_dpci = pirq_dpci(pirq);
>  
> +    spin_lock(&pirq_dpci->lock);

Considering that code further down in this function checks
pirq_dpci to be non-NULL, this doesn't look safe (or else those
checks should go away, as after this addition they would be
likely to trigger e.g. Coverity warnings).

> @@ -621,7 +633,7 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
>      return 1;
>  }
>  
> -/* called with d->event_lock held */
> +/* called with pirq_dhci->lock held */
>  static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)

The fact that this is a safe change to the locking model imo needs
to be spelled out explicitly in the commit message. Afaict it's safe
only because desc_guest_eoi() uses pirq for nothing else than to
(atomically!) clear pirq->masked.

> @@ -675,7 +687,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
>  {
>      ASSERT(d->arch.hvm_domain.irq.dpci);
>  
> -    spin_lock(&d->event_lock);
> +    spin_lock(&pirq_dpci->lock);
>      if ( test_and_clear_bool(pirq_dpci->masked) )
>      {
>          struct pirq *pirq = dpci_pirq(pirq_dpci);

Along the same lines - it's not obvious that the uses of pirq here are
safe with event_lock no longer held. In particular I wonder about
send_guest_pirq() - despite the other use in __do_IRQ_guest() not
doing any locking either I'm not convinced this is correct.

Jan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs
  2015-10-23 11:05 ` [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs David Vrabel
@ 2015-10-27 12:44   ` Jan Beulich
  2015-10-27 13:11     ` David Vrabel
  2015-10-28 20:42   ` Konrad Rzeszutek Wilk
  1 sibling, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2015-10-27 12:44 UTC (permalink / raw)
  To: David Vrabel; +Cc: xen-devel, Ian Campbell

>>> On 23.10.15 at 13:05, <david.vrabel@citrix.com> wrote:
> radix_tree_gang_lookup() only requires a RCU read lock, not the
> per-domain event_lock.

... when not caring about a consistent snapshot.

> @@ -678,9 +684,7 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
>      if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
>         return;
>  
> -    spin_lock(&d->event_lock);
>      pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
> -    spin_unlock(&d->event_lock);
>  }

How about other callers of pt_pirq_iterate()?

Jan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs
  2015-10-27 12:44   ` Jan Beulich
@ 2015-10-27 13:11     ` David Vrabel
  0 siblings, 0 replies; 13+ messages in thread
From: David Vrabel @ 2015-10-27 13:11 UTC (permalink / raw)
  To: Jan Beulich, David Vrabel; +Cc: xen-devel, Ian Campbell

On 27/10/15 12:44, Jan Beulich wrote:
>>>> On 23.10.15 at 13:05, <david.vrabel@citrix.com> wrote:
>> radix_tree_gang_lookup() only requires a RCU read lock, not the
>> per-domain event_lock.
> 
> ... when not caring about a consistent snapshot.
> 
>> @@ -678,9 +684,7 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
>>      if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
>>         return;
>>  
>> -    spin_lock(&d->event_lock);
>>      pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
>> -    spin_unlock(&d->event_lock);
>>  }
> 
> How about other callers of pt_pirq_iterate()?

They weren't hot so I didn't look at them.

David

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt
  2015-10-27 11:56   ` Jan Beulich
@ 2015-10-28 20:18     ` Konrad Rzeszutek Wilk
  2015-10-29  9:11       ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-10-28 20:18 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, David Vrabel, Ian Campbell

.snip..
> > @@ -481,6 +489,8 @@ int pt_irq_destroy_bind(
> >      pirq = pirq_info(d, machine_gsi);
> >      pirq_dpci = pirq_dpci(pirq);
> >  
> > +    spin_lock(&pirq_dpci->lock);
> 
> Considering that code further down in this function checks
> pirq_dpci to be non-NULL, this doesn't look safe (or else those
> checks should go away, as after this addition they would be
> likely to trigger e.g. Coverity warnings).

?  The checks are for pirq_dpci->dom.

> 
> > @@ -621,7 +633,7 @@ int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
> >      return 1;
> >  }
> >  
> > -/* called with d->event_lock held */
> > +/* called with pirq_dhci->lock held */
> >  static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)
> 
> The fact that this is a safe change to the locking model imo needs
> to be spelled out explicitly in the commit message. Afaict it's safe
> only because desc_guest_eoi() uses pirq for nothing else than to
> (atomically!) clear pirq->masked.
> 
> > @@ -675,7 +687,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
> >  {
> >      ASSERT(d->arch.hvm_domain.irq.dpci);
> >  
> > -    spin_lock(&d->event_lock);
> > +    spin_lock(&pirq_dpci->lock);
> >      if ( test_and_clear_bool(pirq_dpci->masked) )
> >      {
> >          struct pirq *pirq = dpci_pirq(pirq_dpci);
> 
> Along the same lines - it's not obvious that the uses of pirq here are
> safe with event_lock no longer held. In particular I wonder about
> send_guest_pirq() - despite the other use in __do_IRQ_guest() not
> doing any locking either I'm not convinced this is correct.


It seems that the event channel mechanism only uses the event channel
lock when expanding and initializing (FIFO). For the old mechanism
it was for binding, closing (uhuh), status, reset, and set_priority.

The 'closing' is interesting. What if the guest was closing the port
while we are to notify it of an interrupt (so inside hvm_dirq_assist).

evtchn_close will take the event_lock call pirq_cleanup_check
and calls pt_pirq_cleanup_check. We check for STATE_RUN (still set
as dpci_softirq hasn't finished calling hvm_dirq_assist) so it returns
zero .. and does not call radix_tree_delete.

Then evtchn_close goes to 'unlink_pirq_port' and then follows up with
'unmap_domain_pirq_emuirq'. That sets emuirq to IRQ_UNBOUND.

OK, now hvm_dirq_assist is now inside the first 'if' and checks:

684         if ( hvm_domain_use_pirq(d, pirq) )

which would have been true, but now is false (as emuirq is now
IRQ_UNBOUND). 

Lets also assume this is for an MSI.

The 'pt_irq_need_timer' returns 0:

142     return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));

which hits this:

723         ASSERT(pt_irq_need_timer(pirq_dpci->flags));

and blows up.

If we are not compiled with 'debug=y' then we would set a timer which
8 ms later calls 'pt_irq_time_out'. It then clears the STATE_RUN and
the softirq is done.

So lets switch back to 'evtchn_close'.. which unlocks the event_lock
and is done.

The 'pt_irq_time_out' is pretty much a nop - takes the event_lock,
does not iterate over the list as it is an MSI one so no legacy interrupts.
Calls pt_irq_guest_eoi over the radix tree - and pt_irq_guest_eoi does nothing
as the _HVM_IRQ_DPCI_EOI_LATCH_SHIFT is not set.

I think that is the only issue - when using the legacy event channels
and the guest fiddling with the pirq. I suppose that means anybody
using 'unmap_domain_pirq_emuirq' will hit this ASSERT(). That means:

 arch_domain_soft_reset
 physdev_unmap_pirq
 evtchn_close

Thought you could 'fix' this by doing:


diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index bda9374..6ea59db 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -720,8 +720,8 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
          * guest will never deal with the irq, then the physical interrupt line
          * will never be deasserted.
          */
-        ASSERT(pt_irq_need_timer(pirq_dpci->flags));
-        set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
+        if (pt_irq_need_timer(pirq_dpci->flags))
+            set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
     }
     spin_unlock(&d->event_lock);
 }






> 
> Jan
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs
  2015-10-23 11:05 ` [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs David Vrabel
  2015-10-27 12:44   ` Jan Beulich
@ 2015-10-28 20:42   ` Konrad Rzeszutek Wilk
  1 sibling, 0 replies; 13+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-10-28 20:42 UTC (permalink / raw)
  To: David Vrabel; +Cc: xen-devel, Ian Campbell, Jan Beulich

On Fri, Oct 23, 2015 at 12:05:22PM +0100, David Vrabel wrote:
> radix_tree_gang_lookup() only requires a RCU read lock, not the
> per-domain event_lock.

Don't you need to make some form of 'spin_lock_init' call?

> 
> Introduce a new RCU read lock and take the per-interrupt lock before
> calling the callback instead.
> 
> This eliminates all contention on the event_lock when injecting
> interrupts from passthrough devices.

This pt_pirq_iterate is called from:

hvm_migrate_pirqs (which only uses irq_desc lock) - so you could
also drop the event_lock there.

pt_irq_time_out (which  is only called for legacy interrupts and
only at 8ms). Uses event_lock.

pci_clean_dpci_irqs (during domain shutdown, uses event_lock).

Also should this rcu lock be put in 'pirq_guest_unmask' (which oddly
enough has no lock?!)?

Reading the radix-tree.h t says: "
..
The notable exceptions to this rule are the following functions:
 ...
  radix_tree_gang_lookup

The first 7 functions are able to be called locklessly, using RCU. The
caller must ensure calls to these functions are made within rcu_read_lock()
regions."

Great, so that is what your patch does. Thought it would imply that
everybody using radix_tree_gang_lookup on pirq_tree needs to use RCU?

> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> ---
>  xen/drivers/passthrough/io.c | 10 +++++++---
>  xen/include/xen/sched.h      |  1 +
>  2 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
> index 7c86c20..9b51ef0 100644
> --- a/xen/drivers/passthrough/io.c
> +++ b/xen/drivers/passthrough/io.c
> @@ -601,7 +601,7 @@ int pt_pirq_iterate(struct domain *d,
>      unsigned int pirq = 0, n, i;
>      struct pirq *pirqs[8];
>  
> -    ASSERT(spin_is_locked(&d->event_lock));
> +    rcu_read_lock(&d->pirq_rcu_lock);
>  
>      do {
>          n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
> @@ -610,12 +610,18 @@ int pt_pirq_iterate(struct domain *d,
>          {
>              struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]);
>  
> +            spin_lock(&pirq_dpci->lock);
> +
>              pirq = pirqs[i]->pirq;
>              if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
>                  rc = cb(d, pirq_dpci, arg);
> +
> +            spin_unlock(&pirq_dpci->lock);
>          }
>      } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
>  
> +    rcu_read_unlock(&d->pirq_rcu_lock);
> +
>      return rc;
>  }
>  
> @@ -678,9 +684,7 @@ void hvm_dpci_msi_eoi(struct domain *d, int vector)
>      if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
>         return;
>  
> -    spin_lock(&d->event_lock);
>      pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
> -    spin_unlock(&d->event_lock);
>  }
>  
>  static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 3729b0f..ae98c1e 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -355,6 +355,7 @@ struct domain
>       */
>      struct radix_tree_root pirq_tree;
>      unsigned int     nr_pirqs;
> +    rcu_read_lock_t pirq_rcu_lock;
>  
>      enum guest_type guest_type;
>  
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt
  2015-10-28 20:18     ` Konrad Rzeszutek Wilk
@ 2015-10-29  9:11       ` Jan Beulich
  2015-10-30 14:45         ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2015-10-29  9:11 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: xen-devel, David Vrabel, Ian Campbell

>>> On 28.10.15 at 21:18, <konrad.wilk@oracle.com> wrote:
>> > @@ -481,6 +489,8 @@ int pt_irq_destroy_bind(
>> >      pirq = pirq_info(d, machine_gsi);
>> >      pirq_dpci = pirq_dpci(pirq);
>> >  
>> > +    spin_lock(&pirq_dpci->lock);
>> 
>> Considering that code further down in this function checks
>> pirq_dpci to be non-NULL, this doesn't look safe (or else those
>> checks should go away, as after this addition they would be
>> likely to trigger e.g. Coverity warnings).
> 
> ?  The checks are for pirq_dpci->dom.

What about

        /* clear the mirq info */
        if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )

and

    if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
         list_empty(&pirq_dpci->digl_list) )

? In fact I can't spot any access to pirq_dpci->dom in this function.

>> > @@ -675,7 +687,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
>> >  {
>> >      ASSERT(d->arch.hvm_domain.irq.dpci);
>> >  
>> > -    spin_lock(&d->event_lock);
>> > +    spin_lock(&pirq_dpci->lock);
>> >      if ( test_and_clear_bool(pirq_dpci->masked) )
>> >      {
>> >          struct pirq *pirq = dpci_pirq(pirq_dpci);
>> 
>> Along the same lines - it's not obvious that the uses of pirq here are
>> safe with event_lock no longer held. In particular I wonder about
>> send_guest_pirq() - despite the other use in __do_IRQ_guest() not
>> doing any locking either I'm not convinced this is correct.
> 
> 
> It seems that the event channel mechanism only uses the event channel
> lock when expanding and initializing (FIFO). For the old mechanism
> it was for binding, closing (uhuh), status, reset, and set_priority.

Well, the event lock is also used for some pIRQ management iirc.

Jan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt
  2015-10-29  9:11       ` Jan Beulich
@ 2015-10-30 14:45         ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 13+ messages in thread
From: Konrad Rzeszutek Wilk @ 2015-10-30 14:45 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, David Vrabel, Ian Campbell

On Thu, Oct 29, 2015 at 03:11:39AM -0600, Jan Beulich wrote:
> >>> On 28.10.15 at 21:18, <konrad.wilk@oracle.com> wrote:
> >> > @@ -481,6 +489,8 @@ int pt_irq_destroy_bind(
> >> >      pirq = pirq_info(d, machine_gsi);
> >> >      pirq_dpci = pirq_dpci(pirq);
> >> >  
> >> > +    spin_lock(&pirq_dpci->lock);
> >> 
> >> Considering that code further down in this function checks
> >> pirq_dpci to be non-NULL, this doesn't look safe (or else those
> >> checks should go away, as after this addition they would be
> >> likely to trigger e.g. Coverity warnings).
> > 
> > ?  The checks are for pirq_dpci->dom.
> 
> What about
> 
>         /* clear the mirq info */
>         if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
> 
> and
> 
>     if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
>          list_empty(&pirq_dpci->digl_list) )

Yes. I was looking at the function code (pt_irq_create_bind) not the
pt_irq_destroy_bind!
> 
> ? In fact I can't spot any access to pirq_dpci->dom in this function.
> 
> >> > @@ -675,7 +687,7 @@ static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
> >> >  {
> >> >      ASSERT(d->arch.hvm_domain.irq.dpci);
> >> >  
> >> > -    spin_lock(&d->event_lock);
> >> > +    spin_lock(&pirq_dpci->lock);
> >> >      if ( test_and_clear_bool(pirq_dpci->masked) )
> >> >      {
> >> >          struct pirq *pirq = dpci_pirq(pirq_dpci);
> >> 
> >> Along the same lines - it's not obvious that the uses of pirq here are
> >> safe with event_lock no longer held. In particular I wonder about
> >> send_guest_pirq() - despite the other use in __do_IRQ_guest() not
> >> doing any locking either I'm not convinced this is correct.
> > 
> > 
> > It seems that the event channel mechanism only uses the event channel
> > lock when expanding and initializing (FIFO). For the old mechanism
> > it was for binding, closing (uhuh), status, reset, and set_priority.
> 
> Well, the event lock is also used for some pIRQ management iirc.

Correct - pirq_guest_bind, pirq_guest_unbind, map_domain_pirq,
map_domain_emuirq_pirq, unmap_domain_pirq_emuirq, and so on.

> 
> Jan
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-10-30 14:45 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-23 11:05 passthrough: improve interrupt injection locking David Vrabel
2015-10-23 11:05 ` [PATCHv1 1/2] passthrough: use per-interrupt lock when injecting an interrupt David Vrabel
2015-10-27 11:56   ` Jan Beulich
2015-10-28 20:18     ` Konrad Rzeszutek Wilk
2015-10-29  9:11       ` Jan Beulich
2015-10-30 14:45         ` Konrad Rzeszutek Wilk
2015-10-23 11:05 ` [PATCHv1 2/2] passthrough: improve locking when iterating over interrupts bound to VMs David Vrabel
2015-10-27 12:44   ` Jan Beulich
2015-10-27 13:11     ` David Vrabel
2015-10-28 20:42   ` Konrad Rzeszutek Wilk
2015-10-23 12:37 ` passthrough: improve interrupt injection locking Ian Campbell
2015-10-23 12:38   ` David Vrabel
2015-10-23 12:45     ` Ian Campbell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.