All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on
@ 2021-01-27  8:13 Jan Beulich
  2021-01-27  8:15 ` [PATCH v5 1/6] evtchn: use per-channel lock where possible Jan Beulich
                   ` (6 more replies)
  0 siblings, 7 replies; 15+ messages in thread
From: Jan Beulich @ 2021-01-27  8:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall, Wei Liu,
	Stefano Stabellini

These are grouped into a series largely because of their origin,
not so much because there are (heavy) dependencies among them.
The main change from v4 is the dropping of the two patches trying
to do away with the double event lock acquires in interdomain
channel handling. See also the individual patches.

1: use per-channel lock where possible
2: convert domain event lock to an r/w one
3: slightly defer lock acquire where possible
4: add helper for port_is_valid() + evtchn_from_port()
5: type adjustments
6: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq()

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v5 1/6] evtchn: use per-channel lock where possible
  2021-01-27  8:13 [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
@ 2021-01-27  8:15 ` Jan Beulich
  2021-01-27  8:16 ` [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one Jan Beulich
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2021-01-27  8:15 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall, Wei Liu,
	Stefano Stabellini

Neither evtchn_status() nor domain_dump_evtchn_info() nor
flask_get_peer_sid() need to hold the per-domain lock - they all only
read a single channel's state (at a time, in the dump case).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -974,15 +974,16 @@ int evtchn_status(evtchn_status_t *statu
     if ( d == NULL )
         return -ESRCH;
 
-    spin_lock(&d->event_lock);
-
     if ( !port_is_valid(d, port) )
     {
-        rc = -EINVAL;
-        goto out;
+        rcu_unlock_domain(d);
+        return -EINVAL;
     }
 
     chn = evtchn_from_port(d, port);
+
+    evtchn_read_lock(chn);
+
     if ( consumer_is_xen(chn) )
     {
         rc = -EACCES;
@@ -1027,7 +1028,7 @@ int evtchn_status(evtchn_status_t *statu
     status->vcpu = chn->notify_vcpu_id;
 
  out:
-    spin_unlock(&d->event_lock);
+    evtchn_read_unlock(chn);
     rcu_unlock_domain(d);
 
     return rc;
@@ -1582,22 +1583,32 @@ void evtchn_move_pirqs(struct vcpu *v)
 static void domain_dump_evtchn_info(struct domain *d)
 {
     unsigned int port;
-    int irq;
 
     printk("Event channel information for domain %d:\n"
            "Polling vCPUs: {%*pbl}\n"
            "    port [p/m/s]\n", d->domain_id, d->max_vcpus, d->poll_mask);
 
-    spin_lock(&d->event_lock);
-
     for ( port = 1; port_is_valid(d, port); ++port )
     {
-        const struct evtchn *chn;
+        struct evtchn *chn;
         char *ssid;
 
+        if ( !(port & 0x3f) )
+            process_pending_softirqs();
+
         chn = evtchn_from_port(d, port);
+
+        if ( !evtchn_read_trylock(chn) )
+        {
+            printk("    %4u in flux\n", port);
+            continue;
+        }
+
         if ( chn->state == ECS_FREE )
+        {
+            evtchn_read_unlock(chn);
             continue;
+        }
 
         printk("    %4u [%d/%d/",
                port,
@@ -1607,26 +1618,49 @@ static void domain_dump_evtchn_info(stru
         printk("]: s=%d n=%d x=%d",
                chn->state, chn->notify_vcpu_id, chn->xen_consumer);
 
+        ssid = xsm_show_security_evtchn(d, chn);
+
         switch ( chn->state )
         {
         case ECS_UNBOUND:
             printk(" d=%d", chn->u.unbound.remote_domid);
             break;
+
         case ECS_INTERDOMAIN:
             printk(" d=%d p=%d",
                    chn->u.interdomain.remote_dom->domain_id,
                    chn->u.interdomain.remote_port);
             break;
-        case ECS_PIRQ:
-            irq = domain_pirq_to_irq(d, chn->u.pirq.irq);
-            printk(" p=%d i=%d", chn->u.pirq.irq, irq);
+
+        case ECS_PIRQ: {
+            unsigned int pirq = chn->u.pirq.irq;
+
+            /*
+             * The per-channel locks nest inside the per-domain one, so we
+             * can't acquire the latter without first letting go of the former.
+             */
+            evtchn_read_unlock(chn);
+            chn = NULL;
+            if ( spin_trylock(&d->event_lock) )
+            {
+                int irq = domain_pirq_to_irq(d, pirq);
+
+                spin_unlock(&d->event_lock);
+                printk(" p=%u i=%d", pirq, irq);
+            }
+            else
+                printk(" p=%u i=?", pirq);
             break;
+        }
+
         case ECS_VIRQ:
             printk(" v=%d", chn->u.virq);
             break;
         }
 
-        ssid = xsm_show_security_evtchn(d, chn);
+        if ( chn )
+            evtchn_read_unlock(chn);
+
         if (ssid) {
             printk(" Z=%s\n", ssid);
             xfree(ssid);
@@ -1634,8 +1668,6 @@ static void domain_dump_evtchn_info(stru
             printk("\n");
         }
     }
-
-    spin_unlock(&d->event_lock);
 }
 
 static void dump_evtchn_info(unsigned char key)
--- a/xen/xsm/flask/flask_op.c
+++ b/xen/xsm/flask/flask_op.c
@@ -555,12 +555,13 @@ static int flask_get_peer_sid(struct xen
     struct evtchn *chn;
     struct domain_security_struct *dsec;
 
-    spin_lock(&d->event_lock);
-
     if ( !port_is_valid(d, arg->evtchn) )
-        goto out;
+        return -EINVAL;
 
     chn = evtchn_from_port(d, arg->evtchn);
+
+    evtchn_read_lock(chn);
+
     if ( chn->state != ECS_INTERDOMAIN )
         goto out;
 
@@ -573,7 +574,7 @@ static int flask_get_peer_sid(struct xen
     rv = 0;
 
  out:
-    spin_unlock(&d->event_lock);
+    evtchn_read_unlock(chn);
     return rv;
 }
 



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one
  2021-01-27  8:13 [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
  2021-01-27  8:15 ` [PATCH v5 1/6] evtchn: use per-channel lock where possible Jan Beulich
@ 2021-01-27  8:16 ` Jan Beulich
  2021-05-27 11:01   ` Roger Pau Monné
  2022-07-07 18:00   ` Julien Grall
  2021-01-27  8:16 ` [PATCH v5 3/6] evtchn: slightly defer lock acquire where possible Jan Beulich
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 15+ messages in thread
From: Jan Beulich @ 2021-01-27  8:16 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall, Wei Liu,
	Stefano Stabellini, Roger Pau Monné,
	Kevin Tian

Especially for the use in evtchn_move_pirqs() (called when moving a vCPU
across pCPU-s) and the ones in EOI handling in PCI pass-through code,
serializing perhaps an entire domain isn't helpful when no state (which
isn't e.g. further protected by the per-channel lock) changes.

Unfortunately this implies dropping of lock profiling for this lock,
until r/w locks may get enabled for such functionality.

While ->notify_vcpu_id is now meant to be consistently updated with the
per-channel lock held, an extension applies to ECS_PIRQ: The field is
also guaranteed to not change with the per-domain event lock held for
writing. Therefore the link_pirq_port() call from evtchn_bind_pirq()
could in principle be moved out of the per-channel locked regions, but
this further code churn didn't seem worth it.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: Re-base, also over dropped earlier patch.
v4: Re-base, in particular over new earlier patches. Acquire both
    per-domain locks for writing in evtchn_close(). Adjust
    spin_barrier() related comments.
v3: Re-base.
v2: Consistently lock for writing in evtchn_reset(). Fix error path in
    pci_clean_dpci_irqs(). Lock for writing in pt_irq_time_out(),
    hvm_dirq_assist(), hvm_dpci_eoi(), and hvm_dpci_isairq_eoi(). Move
    rw_barrier() introduction here. Re-base over changes earlier in the
    series.

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -903,7 +903,7 @@ int arch_domain_soft_reset(struct domain
     if ( !is_hvm_domain(d) )
         return -EINVAL;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     for ( i = 0; i < d->nr_pirqs ; i++ )
     {
         if ( domain_pirq_to_emuirq(d, i) != IRQ_UNBOUND )
@@ -913,7 +913,7 @@ int arch_domain_soft_reset(struct domain
                 break;
         }
     }
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     if ( ret )
         return ret;
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -528,9 +528,9 @@ void hvm_migrate_pirqs(struct vcpu *v)
     if ( !is_iommu_enabled(d) || !hvm_domain_irq(d)->dpci )
        return;
 
-    spin_lock(&d->event_lock);
+    read_lock(&d->event_lock);
     pt_pirq_iterate(d, migrate_pirq, v);
-    spin_unlock(&d->event_lock);
+    read_unlock(&d->event_lock);
 }
 
 static bool hvm_get_pending_event(struct vcpu *v, struct x86_event *info)
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -404,9 +404,9 @@ int hvm_inject_msi(struct domain *d, uin
             {
                 int rc;
 
-                spin_lock(&d->event_lock);
+                write_lock(&d->event_lock);
                 rc = map_domain_emuirq_pirq(d, pirq, IRQ_MSI_EMU);
-                spin_unlock(&d->event_lock);
+                write_unlock(&d->event_lock);
                 if ( rc )
                     return rc;
                 info = pirq_info(d, pirq);
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -203,9 +203,9 @@ static int vioapic_hwdom_map_gsi(unsigne
     {
         gprintk(XENLOG_WARNING, "vioapic: error binding GSI %u: %d\n",
                 gsi, ret);
-        spin_lock(&currd->event_lock);
+        write_lock(&currd->event_lock);
         unmap_domain_pirq(currd, pirq);
-        spin_unlock(&currd->event_lock);
+        write_unlock(&currd->event_lock);
     }
     pcidevs_unlock();
 
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -465,7 +465,7 @@ int msixtbl_pt_register(struct domain *d
     int r = -EINVAL;
 
     ASSERT(pcidevs_locked());
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     if ( !msixtbl_initialised(d) )
         return -ENODEV;
@@ -535,7 +535,7 @@ void msixtbl_pt_unregister(struct domain
     struct msixtbl_entry *entry;
 
     ASSERT(pcidevs_locked());
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     if ( !msixtbl_initialised(d) )
         return;
@@ -589,13 +589,13 @@ void msixtbl_pt_cleanup(struct domain *d
     if ( !msixtbl_initialised(d) )
         return;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     list_for_each_entry_safe( entry, temp,
                               &d->arch.hvm.msixtbl_list, list )
         del_msixtbl_entry(entry);
 
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 }
 
 void msix_write_completion(struct vcpu *v)
@@ -809,9 +809,9 @@ static void vpci_msi_disable(const struc
         ASSERT(!rc);
     }
 
-    spin_lock(&pdev->domain->event_lock);
+    write_lock(&pdev->domain->event_lock);
     unmap_domain_pirq(pdev->domain, pirq);
-    spin_unlock(&pdev->domain->event_lock);
+    write_unlock(&pdev->domain->event_lock);
     pcidevs_unlock();
 }
 
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -2413,10 +2413,10 @@ int ioapic_guest_write(unsigned long phy
     }
     if ( pirq >= 0 )
     {
-        spin_lock(&hardware_domain->event_lock);
+        write_lock(&hardware_domain->event_lock);
         ret = map_domain_pirq(hardware_domain, pirq, irq,
                               MAP_PIRQ_TYPE_GSI, NULL);
-        spin_unlock(&hardware_domain->event_lock);
+        write_unlock(&hardware_domain->event_lock);
         if ( ret < 0 )
             return ret;
     }
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1552,7 +1552,7 @@ int pirq_guest_bind(struct vcpu *v, stru
     unsigned int        max_nr_guests = will_share ? irq_max_guests : 1;
     int                 rc = 0;
 
-    WARN_ON(!spin_is_locked(&v->domain->event_lock));
+    WARN_ON(!rw_is_write_locked(&v->domain->event_lock));
     BUG_ON(!local_irq_is_enabled());
 
  retry:
@@ -1766,7 +1766,7 @@ void pirq_guest_unbind(struct domain *d,
     struct irq_desc *desc;
     int irq = 0;
 
-    WARN_ON(!spin_is_locked(&d->event_lock));
+    WARN_ON(!rw_is_write_locked(&d->event_lock));
 
     BUG_ON(!local_irq_is_enabled());
     desc = pirq_spin_lock_irq_desc(pirq, NULL);
@@ -1803,7 +1803,7 @@ static bool pirq_guest_force_unbind(stru
     unsigned int i;
     bool bound = false;
 
-    WARN_ON(!spin_is_locked(&d->event_lock));
+    WARN_ON(!rw_is_write_locked(&d->event_lock));
 
     BUG_ON(!local_irq_is_enabled());
     desc = pirq_spin_lock_irq_desc(pirq, NULL);
@@ -2045,7 +2045,7 @@ int get_free_pirq(struct domain *d, int
 {
     int i;
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     if ( type == MAP_PIRQ_TYPE_GSI )
     {
@@ -2070,7 +2070,7 @@ int get_free_pirqs(struct domain *d, uns
 {
     unsigned int i, found = 0;
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
         if ( is_free_pirq(d, pirq_info(d, i)) )
@@ -2098,7 +2098,7 @@ int map_domain_pirq(
     DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {};
     DECLARE_BITMAP(granted, MAX_MSI_IRQS) = {};
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     if ( !irq_access_permitted(current->domain, irq))
         return -EPERM;
@@ -2317,7 +2317,7 @@ int unmap_domain_pirq(struct domain *d,
         return -EINVAL;
 
     ASSERT(pcidevs_locked());
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     info = pirq_info(d, pirq);
     if ( !info || (irq = info->arch.irq) <= 0 )
@@ -2444,13 +2444,13 @@ void free_domain_pirqs(struct domain *d)
     int i;
 
     pcidevs_lock();
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     for ( i = 0; i < d->nr_pirqs; i++ )
         if ( domain_pirq_to_irq(d, i) > 0 )
             unmap_domain_pirq(d, i);
 
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
     pcidevs_unlock();
 }
 
@@ -2693,7 +2693,7 @@ int map_domain_emuirq_pirq(struct domain
     int old_emuirq = IRQ_UNBOUND, old_pirq = IRQ_UNBOUND;
     struct pirq *info;
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     if ( !is_hvm_domain(d) )
         return -EINVAL;
@@ -2759,7 +2759,7 @@ int unmap_domain_pirq_emuirq(struct doma
     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
         return -EINVAL;
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     emuirq = domain_pirq_to_emuirq(d, pirq);
     if ( emuirq == IRQ_UNBOUND )
@@ -2807,7 +2807,7 @@ static int allocate_pirq(struct domain *
 {
     int current_pirq;
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
     current_pirq = domain_irq_to_pirq(d, irq);
     if ( pirq < 0 )
     {
@@ -2879,7 +2879,7 @@ int allocate_and_map_gsi_pirq(struct dom
     }
 
     /* Verify or get pirq. */
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     pirq = allocate_pirq(d, index, *pirq_p, irq, MAP_PIRQ_TYPE_GSI, NULL);
     if ( pirq < 0 )
     {
@@ -2892,7 +2892,7 @@ int allocate_and_map_gsi_pirq(struct dom
         *pirq_p = pirq;
 
  done:
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return ret;
 }
@@ -2933,7 +2933,7 @@ int allocate_and_map_msi_pirq(struct dom
 
     pcidevs_lock();
     /* Verify or get pirq. */
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     pirq = allocate_pirq(d, index, *pirq_p, irq, type, &msi->entry_nr);
     if ( pirq < 0 )
     {
@@ -2946,7 +2946,7 @@ int allocate_and_map_msi_pirq(struct dom
         *pirq_p = pirq;
 
  done:
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
     pcidevs_unlock();
     if ( ret )
     {
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -34,7 +34,7 @@ static int physdev_hvm_map_pirq(
 
     ASSERT(!is_hardware_domain(d));
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     switch ( type )
     {
     case MAP_PIRQ_TYPE_GSI: {
@@ -84,7 +84,7 @@ static int physdev_hvm_map_pirq(
         break;
     }
 
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
     return ret;
 }
 
@@ -154,18 +154,18 @@ int physdev_unmap_pirq(domid_t domid, in
 
     if ( is_hvm_domain(d) && has_pirq(d) )
     {
-        spin_lock(&d->event_lock);
+        write_lock(&d->event_lock);
         if ( domain_pirq_to_emuirq(d, pirq) != IRQ_UNBOUND )
             ret = unmap_domain_pirq_emuirq(d, pirq);
-        spin_unlock(&d->event_lock);
+        write_unlock(&d->event_lock);
         if ( domid == DOMID_SELF || ret )
             goto free_domain;
     }
 
     pcidevs_lock();
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     ret = unmap_domain_pirq(d, pirq);
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
     pcidevs_unlock();
 
  free_domain:
@@ -192,10 +192,10 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         ret = -EINVAL;
         if ( eoi.irq >= currd->nr_pirqs )
             break;
-        spin_lock(&currd->event_lock);
+        read_lock(&currd->event_lock);
         pirq = pirq_info(currd, eoi.irq);
         if ( !pirq ) {
-            spin_unlock(&currd->event_lock);
+            read_unlock(&currd->event_lock);
             break;
         }
         if ( currd->arch.auto_unmask )
@@ -214,7 +214,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
                     && hvm_irq->gsi_assert_count[gsi] )
                 send_guest_pirq(currd, pirq);
         }
-        spin_unlock(&currd->event_lock);
+        read_unlock(&currd->event_lock);
         ret = 0;
         break;
     }
@@ -626,7 +626,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( copy_from_guest(&out, arg, 1) != 0 )
             break;
 
-        spin_lock(&currd->event_lock);
+        write_lock(&currd->event_lock);
 
         ret = get_free_pirq(currd, out.type);
         if ( ret >= 0 )
@@ -639,7 +639,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
                 ret = -ENOMEM;
         }
 
-        spin_unlock(&currd->event_lock);
+        write_unlock(&currd->event_lock);
 
         if ( ret >= 0 )
         {
--- a/xen/arch/x86/pv/shim.c
+++ b/xen/arch/x86/pv/shim.c
@@ -448,7 +448,7 @@ static long pv_shim_event_channel_op(int
         if ( rc )                                                           \
             break;                                                          \
                                                                             \
-        spin_lock(&d->event_lock);                                          \
+        write_lock(&d->event_lock);                                         \
         rc = evtchn_allocate_port(d, op.port_field);                        \
         if ( rc )                                                           \
         {                                                                   \
@@ -457,7 +457,7 @@ static long pv_shim_event_channel_op(int
         }                                                                   \
         else                                                                \
             evtchn_reserve(d, op.port_field);                               \
-        spin_unlock(&d->event_lock);                                        \
+        write_unlock(&d->event_lock);                                       \
                                                                             \
         if ( !rc && __copy_to_guest(arg, &op, 1) )                          \
             rc = -EFAULT;                                                   \
@@ -585,11 +585,11 @@ static long pv_shim_event_channel_op(int
         if ( rc )
             break;
 
-        spin_lock(&d->event_lock);
+        write_lock(&d->event_lock);
         rc = evtchn_allocate_port(d, ipi.port);
         if ( rc )
         {
-            spin_unlock(&d->event_lock);
+            write_unlock(&d->event_lock);
 
             close.port = ipi.port;
             BUG_ON(xen_hypercall_event_channel_op(EVTCHNOP_close, &close));
@@ -598,7 +598,7 @@ static long pv_shim_event_channel_op(int
 
         evtchn_assign_vcpu(d, ipi.port, ipi.vcpu);
         evtchn_reserve(d, ipi.port);
-        spin_unlock(&d->event_lock);
+        write_unlock(&d->event_lock);
 
         if ( __copy_to_guest(arg, &ipi, 1) )
             rc = -EFAULT;
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -294,7 +294,7 @@ static long evtchn_alloc_unbound(evtchn_
     if ( d == NULL )
         return -ESRCH;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     if ( (port = get_free_port(d)) < 0 )
         ERROR_EXIT_DOM(port, d);
@@ -317,7 +317,7 @@ static long evtchn_alloc_unbound(evtchn_
 
  out:
     check_free_port(d, port);
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
     rcu_unlock_domain(d);
 
     return rc;
@@ -355,14 +355,14 @@ static long evtchn_bind_interdomain(evtc
     /* Avoid deadlock by first acquiring lock of domain with smaller id. */
     if ( ld < rd )
     {
-        spin_lock(&ld->event_lock);
-        spin_lock(&rd->event_lock);
+        write_lock(&ld->event_lock);
+        write_lock(&rd->event_lock);
     }
     else
     {
         if ( ld != rd )
-            spin_lock(&rd->event_lock);
-        spin_lock(&ld->event_lock);
+            write_lock(&rd->event_lock);
+        write_lock(&ld->event_lock);
     }
 
     if ( (lport = get_free_port(ld)) < 0 )
@@ -403,9 +403,9 @@ static long evtchn_bind_interdomain(evtc
 
  out:
     check_free_port(ld, lport);
-    spin_unlock(&ld->event_lock);
+    write_unlock(&ld->event_lock);
     if ( ld != rd )
-        spin_unlock(&rd->event_lock);
+        write_unlock(&rd->event_lock);
     
     rcu_unlock_domain(rd);
 
@@ -436,7 +436,7 @@ int evtchn_bind_virq(evtchn_bind_virq_t
     if ( (v = domain_vcpu(d, vcpu)) == NULL )
         return -ENOENT;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     if ( read_atomic(&v->virq_to_evtchn[virq]) )
         ERROR_EXIT(-EEXIST);
@@ -477,7 +477,7 @@ int evtchn_bind_virq(evtchn_bind_virq_t
     write_atomic(&v->virq_to_evtchn[virq], port);
 
  out:
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return rc;
 }
@@ -493,7 +493,7 @@ static long evtchn_bind_ipi(evtchn_bind_
     if ( domain_vcpu(d, vcpu) == NULL )
         return -ENOENT;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     if ( (port = get_free_port(d)) < 0 )
         ERROR_EXIT(port);
@@ -511,7 +511,7 @@ static long evtchn_bind_ipi(evtchn_bind_
     bind->port = port;
 
  out:
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return rc;
 }
@@ -557,7 +557,7 @@ static long evtchn_bind_pirq(evtchn_bind
     if ( !is_hvm_domain(d) && !pirq_access_permitted(d, pirq) )
         return -EPERM;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     if ( pirq_to_evtchn(d, pirq) != 0 )
         ERROR_EXIT(-EEXIST);
@@ -597,7 +597,7 @@ static long evtchn_bind_pirq(evtchn_bind
 
  out:
     check_free_port(d, port);
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return rc;
 }
@@ -611,7 +611,7 @@ int evtchn_close(struct domain *d1, int
     long           rc = 0;
 
  again:
-    spin_lock(&d1->event_lock);
+    write_lock(&d1->event_lock);
 
     if ( !port_is_valid(d1, port1) )
     {
@@ -682,13 +682,11 @@ int evtchn_close(struct domain *d1, int
             rcu_lock_domain(d2);
 
             if ( d1 < d2 )
-            {
-                spin_lock(&d2->event_lock);
-            }
+                write_lock(&d2->event_lock);
             else if ( d1 != d2 )
             {
-                spin_unlock(&d1->event_lock);
-                spin_lock(&d2->event_lock);
+                write_unlock(&d1->event_lock);
+                write_lock(&d2->event_lock);
                 goto again;
             }
         }
@@ -735,11 +733,11 @@ int evtchn_close(struct domain *d1, int
     if ( d2 != NULL )
     {
         if ( d1 != d2 )
-            spin_unlock(&d2->event_lock);
+            write_unlock(&d2->event_lock);
         rcu_unlock_domain(d2);
     }
 
-    spin_unlock(&d1->event_lock);
+    write_unlock(&d1->event_lock);
 
     return rc;
 }
@@ -1046,7 +1044,7 @@ long evtchn_bind_vcpu(unsigned int port,
     if ( (v = domain_vcpu(d, vcpu_id)) == NULL )
         return -ENOENT;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     if ( !port_is_valid(d, port) )
     {
@@ -1090,7 +1088,7 @@ long evtchn_bind_vcpu(unsigned int port,
     }
 
  out:
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return rc;
 }
@@ -1136,7 +1134,7 @@ int evtchn_reset(struct domain *d, bool
     if ( d != current->domain && !d->controller_pause_count )
         return -EINVAL;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     /*
      * If we are resuming, then start where we stopped. Otherwise, check
@@ -1147,7 +1145,7 @@ int evtchn_reset(struct domain *d, bool
     if ( i > d->next_evtchn )
         d->next_evtchn = i;
 
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     if ( !i )
         return -EBUSY;
@@ -1159,14 +1157,14 @@ int evtchn_reset(struct domain *d, bool
         /* NB: Choice of frequency is arbitrary. */
         if ( !(i & 0x3f) && hypercall_preempt_check() )
         {
-            spin_lock(&d->event_lock);
+            write_lock(&d->event_lock);
             d->next_evtchn = i;
-            spin_unlock(&d->event_lock);
+            write_unlock(&d->event_lock);
             return -ERESTART;
         }
     }
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     d->next_evtchn = 0;
 
@@ -1179,7 +1177,7 @@ int evtchn_reset(struct domain *d, bool
         evtchn_2l_init(d);
     }
 
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return rc;
 }
@@ -1369,7 +1367,7 @@ int alloc_unbound_xen_event_channel(
     struct evtchn *chn;
     int            port, rc;
 
-    spin_lock(&ld->event_lock);
+    write_lock(&ld->event_lock);
 
     port = rc = get_free_port(ld);
     if ( rc < 0 )
@@ -1397,7 +1395,7 @@ int alloc_unbound_xen_event_channel(
 
  out:
     check_free_port(ld, port);
-    spin_unlock(&ld->event_lock);
+    write_unlock(&ld->event_lock);
 
     return rc < 0 ? rc : port;
 }
@@ -1408,7 +1406,7 @@ void free_xen_event_channel(struct domai
     {
         /*
          * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
-         * with the spin_barrier() and BUG_ON() in evtchn_destroy().
+         * with the kind-of-barrier and BUG_ON() in evtchn_destroy().
          */
         smp_rmb();
         BUG_ON(!d->is_dying);
@@ -1428,7 +1426,7 @@ void notify_via_xen_event_channel(struct
     {
         /*
          * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
-         * with the spin_barrier() and BUG_ON() in evtchn_destroy().
+         * with the kind-of-barrier and BUG_ON() in evtchn_destroy().
          */
         smp_rmb();
         ASSERT(ld->is_dying);
@@ -1485,7 +1483,8 @@ int evtchn_init(struct domain *d, unsign
         return -ENOMEM;
     d->valid_evtchns = EVTCHNS_PER_BUCKET;
 
-    spin_lock_init_prof(d, event_lock);
+    rwlock_init(&d->event_lock);
+
     if ( get_free_port(d) != 0 )
     {
         free_evtchn_bucket(d, d->evtchn);
@@ -1510,9 +1509,10 @@ int evtchn_destroy(struct domain *d)
 {
     unsigned int i;
 
-    /* After this barrier no new event-channel allocations can occur. */
+    /* After this kind-of-barrier no new event-channel allocations can occur. */
     BUG_ON(!d->is_dying);
-    spin_barrier(&d->event_lock);
+    read_lock(&d->event_lock);
+    read_unlock(&d->event_lock);
 
     /* Close all existing event channels. */
     for ( i = d->valid_evtchns; --i; )
@@ -1570,13 +1570,13 @@ void evtchn_move_pirqs(struct vcpu *v)
     unsigned int port;
     struct evtchn *chn;
 
-    spin_lock(&d->event_lock);
+    read_lock(&d->event_lock);
     for ( port = v->pirq_evtchn_head; port; port = chn->u.pirq.next_port )
     {
         chn = evtchn_from_port(d, port);
         pirq_set_affinity(d, chn->u.pirq.irq, mask);
     }
-    spin_unlock(&d->event_lock);
+    read_unlock(&d->event_lock);
 }
 
 
@@ -1641,11 +1641,11 @@ static void domain_dump_evtchn_info(stru
              */
             evtchn_read_unlock(chn);
             chn = NULL;
-            if ( spin_trylock(&d->event_lock) )
+            if ( read_trylock(&d->event_lock) )
             {
                 int irq = domain_pirq_to_irq(d, pirq);
 
-                spin_unlock(&d->event_lock);
+                read_unlock(&d->event_lock);
                 printk(" p=%u i=%d", pirq, irq);
             }
             else
--- a/xen/common/event_fifo.c
+++ b/xen/common/event_fifo.c
@@ -600,7 +600,7 @@ int evtchn_fifo_init_control(struct evtc
     if ( offset & (8 - 1) )
         return -EINVAL;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     /*
      * If this is the first control block, setup an empty event array
@@ -636,13 +636,13 @@ int evtchn_fifo_init_control(struct evtc
     else
         rc = map_control_block(v, gfn, offset);
 
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return rc;
 
  error:
     evtchn_fifo_destroy(d);
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
     return rc;
 }
 
@@ -695,9 +695,9 @@ int evtchn_fifo_expand_array(const struc
     if ( !d->evtchn_fifo )
         return -EOPNOTSUPP;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     rc = add_page_to_event_array(d, expand_array->array_gfn);
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return rc;
 }
--- a/xen/drivers/passthrough/vtd/x86/hvm.c
+++ b/xen/drivers/passthrough/vtd/x86/hvm.c
@@ -54,7 +54,7 @@ void hvm_dpci_isairq_eoi(struct domain *
     if ( !is_iommu_enabled(d) )
         return;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     dpci = domain_get_irq_dpci(d);
 
@@ -63,5 +63,5 @@ void hvm_dpci_isairq_eoi(struct domain *
         /* Multiple mirq may be mapped to one isa irq */
         pt_pirq_iterate(d, _hvm_dpci_isairq_eoi, (void *)(long)isairq);
     }
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 }
--- a/xen/drivers/passthrough/x86/hvm.c
+++ b/xen/drivers/passthrough/x86/hvm.c
@@ -105,7 +105,7 @@ static void pt_pirq_softirq_reset(struct
 {
     struct domain *d = pirq_dpci->dom;
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_write_locked(&d->event_lock));
 
     switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
     {
@@ -162,7 +162,7 @@ static void pt_irq_time_out(void *data)
     const struct hvm_irq_dpci *dpci;
     const struct dev_intx_gsi_link *digl;
 
-    spin_lock(&irq_map->dom->event_lock);
+    write_lock(&irq_map->dom->event_lock);
 
     if ( irq_map->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
     {
@@ -177,7 +177,7 @@ static void pt_irq_time_out(void *data)
         hvm_gsi_deassert(irq_map->dom, dpci_pirq(irq_map)->pirq);
         irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH;
         pt_irq_guest_eoi(irq_map->dom, irq_map, NULL);
-        spin_unlock(&irq_map->dom->event_lock);
+        write_unlock(&irq_map->dom->event_lock);
         return;
     }
 
@@ -185,7 +185,7 @@ static void pt_irq_time_out(void *data)
     if ( unlikely(!dpci) )
     {
         ASSERT_UNREACHABLE();
-        spin_unlock(&irq_map->dom->event_lock);
+        write_unlock(&irq_map->dom->event_lock);
         return;
     }
     list_for_each_entry ( digl, &irq_map->digl_list, list )
@@ -204,7 +204,7 @@ static void pt_irq_time_out(void *data)
 
     pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL);
 
-    spin_unlock(&irq_map->dom->event_lock);
+    write_unlock(&irq_map->dom->event_lock);
 }
 
 struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d)
@@ -288,7 +288,7 @@ int pt_irq_create_bind(
         return -EINVAL;
 
  restart:
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     hvm_irq_dpci = domain_get_irq_dpci(d);
     if ( !hvm_irq_dpci && !is_hardware_domain(d) )
@@ -304,7 +304,7 @@ int pt_irq_create_bind(
         hvm_irq_dpci = xzalloc(struct hvm_irq_dpci);
         if ( hvm_irq_dpci == NULL )
         {
-            spin_unlock(&d->event_lock);
+            write_unlock(&d->event_lock);
             return -ENOMEM;
         }
         for ( i = 0; i < NR_HVM_DOMU_IRQS; i++ )
@@ -316,7 +316,7 @@ int pt_irq_create_bind(
     info = pirq_get_info(d, pirq);
     if ( !info )
     {
-        spin_unlock(&d->event_lock);
+        write_unlock(&d->event_lock);
         return -ENOMEM;
     }
     pirq_dpci = pirq_dpci(info);
@@ -331,7 +331,7 @@ int pt_irq_create_bind(
      */
     if ( pt_pirq_softirq_active(pirq_dpci) )
     {
-        spin_unlock(&d->event_lock);
+        write_unlock(&d->event_lock);
         cpu_relax();
         goto restart;
     }
@@ -389,7 +389,7 @@ int pt_irq_create_bind(
                 pirq_dpci->dom = NULL;
                 pirq_dpci->flags = 0;
                 pirq_cleanup_check(info, d);
-                spin_unlock(&d->event_lock);
+                write_unlock(&d->event_lock);
                 return rc;
             }
         }
@@ -399,7 +399,7 @@ int pt_irq_create_bind(
 
             if ( (pirq_dpci->flags & mask) != mask )
             {
-                spin_unlock(&d->event_lock);
+                write_unlock(&d->event_lock);
                 return -EBUSY;
             }
 
@@ -423,7 +423,7 @@ int pt_irq_create_bind(
 
         dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
         pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
-        spin_unlock(&d->event_lock);
+        write_unlock(&d->event_lock);
 
         pirq_dpci->gmsi.posted = false;
         vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
@@ -483,7 +483,7 @@ int pt_irq_create_bind(
 
             if ( !digl || !girq )
             {
-                spin_unlock(&d->event_lock);
+                write_unlock(&d->event_lock);
                 xfree(girq);
                 xfree(digl);
                 return -ENOMEM;
@@ -510,7 +510,7 @@ int pt_irq_create_bind(
             if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_PCI ||
                  pirq >= hvm_domain_irq(d)->nr_gsis )
             {
-                spin_unlock(&d->event_lock);
+                write_unlock(&d->event_lock);
 
                 return -EINVAL;
             }
@@ -546,7 +546,7 @@ int pt_irq_create_bind(
 
                     if ( mask < 0 || trigger_mode < 0 )
                     {
-                        spin_unlock(&d->event_lock);
+                        write_unlock(&d->event_lock);
 
                         ASSERT_UNREACHABLE();
                         return -EINVAL;
@@ -594,14 +594,14 @@ int pt_irq_create_bind(
                 }
                 pirq_dpci->flags = 0;
                 pirq_cleanup_check(info, d);
-                spin_unlock(&d->event_lock);
+                write_unlock(&d->event_lock);
                 xfree(girq);
                 xfree(digl);
                 return rc;
             }
         }
 
-        spin_unlock(&d->event_lock);
+        write_unlock(&d->event_lock);
 
         if ( iommu_verbose )
         {
@@ -619,7 +619,7 @@ int pt_irq_create_bind(
     }
 
     default:
-        spin_unlock(&d->event_lock);
+        write_unlock(&d->event_lock);
         return -EOPNOTSUPP;
     }
 
@@ -672,13 +672,13 @@ int pt_irq_destroy_bind(
         return -EOPNOTSUPP;
     }
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
 
     hvm_irq_dpci = domain_get_irq_dpci(d);
 
     if ( !hvm_irq_dpci && !is_hardware_domain(d) )
     {
-        spin_unlock(&d->event_lock);
+        write_unlock(&d->event_lock);
         return -EINVAL;
     }
 
@@ -711,7 +711,7 @@ int pt_irq_destroy_bind(
 
         if ( girq )
         {
-            spin_unlock(&d->event_lock);
+            write_unlock(&d->event_lock);
             return -EINVAL;
         }
 
@@ -755,7 +755,7 @@ int pt_irq_destroy_bind(
         pirq_cleanup_check(pirq, d);
     }
 
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     if ( what && iommu_verbose )
     {
@@ -799,7 +799,7 @@ int pt_pirq_iterate(struct domain *d,
     unsigned int pirq = 0, n, i;
     struct pirq *pirqs[8];
 
-    ASSERT(spin_is_locked(&d->event_lock));
+    ASSERT(rw_is_locked(&d->event_lock));
 
     do {
         n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
@@ -880,9 +880,9 @@ void hvm_dpci_msi_eoi(struct domain *d,
          (!hvm_domain_irq(d)->dpci && !is_hardware_domain(d)) )
        return;
 
-    spin_lock(&d->event_lock);
+    read_lock(&d->event_lock);
     pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
-    spin_unlock(&d->event_lock);
+    read_unlock(&d->event_lock);
 }
 
 static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
@@ -893,7 +893,7 @@ static void hvm_dirq_assist(struct domai
         return;
     }
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     if ( test_and_clear_bool(pirq_dpci->masked) )
     {
         struct pirq *pirq = dpci_pirq(pirq_dpci);
@@ -947,7 +947,7 @@ static void hvm_dirq_assist(struct domai
     }
 
  out:
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 }
 
 static void hvm_pirq_eoi(struct pirq *pirq)
@@ -1007,7 +1007,7 @@ void hvm_dpci_eoi(struct domain *d, unsi
 
     if ( is_hardware_domain(d) )
     {
-        spin_lock(&d->event_lock);
+        write_lock(&d->event_lock);
         hvm_gsi_eoi(d, guest_gsi);
         goto unlock;
     }
@@ -1018,7 +1018,7 @@ void hvm_dpci_eoi(struct domain *d, unsi
         return;
     }
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     hvm_irq_dpci = domain_get_irq_dpci(d);
 
     if ( !hvm_irq_dpci )
@@ -1028,7 +1028,7 @@ void hvm_dpci_eoi(struct domain *d, unsi
         __hvm_dpci_eoi(d, girq);
 
 unlock:
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 }
 
 static int pci_clean_dpci_irq(struct domain *d,
@@ -1066,7 +1066,7 @@ int arch_pci_clean_pirqs(struct domain *
     if ( !is_hvm_domain(d) )
         return 0;
 
-    spin_lock(&d->event_lock);
+    write_lock(&d->event_lock);
     hvm_irq_dpci = domain_get_irq_dpci(d);
     if ( hvm_irq_dpci != NULL )
     {
@@ -1074,14 +1074,14 @@ int arch_pci_clean_pirqs(struct domain *
 
         if ( ret )
         {
-            spin_unlock(&d->event_lock);
+            write_unlock(&d->event_lock);
             return ret;
         }
 
         hvm_domain_irq(d)->dpci = NULL;
         free_hvm_irq_dpci(hvm_irq_dpci);
     }
-    spin_unlock(&d->event_lock);
+    write_unlock(&d->event_lock);
 
     return 0;
 }
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -377,7 +377,7 @@ struct domain
     unsigned int     xen_evtchns;
     /* Port to resume from in evtchn_reset(), when in a continuation. */
     unsigned int     next_evtchn;
-    spinlock_t       event_lock;
+    rwlock_t         event_lock;
     const struct evtchn_port_ops *evtchn_port_ops;
     struct evtchn_fifo_domain *evtchn_fifo;
 



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v5 3/6] evtchn: slightly defer lock acquire where possible
  2021-01-27  8:13 [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
  2021-01-27  8:15 ` [PATCH v5 1/6] evtchn: use per-channel lock where possible Jan Beulich
  2021-01-27  8:16 ` [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one Jan Beulich
@ 2021-01-27  8:16 ` Jan Beulich
  2021-01-27  8:16 ` [PATCH v5 4/6] evtchn: add helper for port_is_valid() + evtchn_from_port() Jan Beulich
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2021-01-27  8:16 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall, Wei Liu,
	Stefano Stabellini

port_is_valid() and evtchn_from_port() are fine to use without holding
any locks. Accordingly acquire the per-domain lock slightly later in
evtchn_close() and evtchn_bind_vcpu().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -610,17 +610,14 @@ int evtchn_close(struct domain *d1, int
     int            port2;
     long           rc = 0;
 
- again:
-    write_lock(&d1->event_lock);
-
     if ( !port_is_valid(d1, port1) )
-    {
-        rc = -EINVAL;
-        goto out;
-    }
+        return -EINVAL;
 
     chn1 = evtchn_from_port(d1, port1);
 
+ again:
+    write_lock(&d1->event_lock);
+
     /* Guest cannot close a Xen-attached event channel. */
     if ( unlikely(consumer_is_xen(chn1)) && guest )
     {
@@ -1044,16 +1041,13 @@ long evtchn_bind_vcpu(unsigned int port,
     if ( (v = domain_vcpu(d, vcpu_id)) == NULL )
         return -ENOENT;
 
-    write_lock(&d->event_lock);
-
     if ( !port_is_valid(d, port) )
-    {
-        rc = -EINVAL;
-        goto out;
-    }
+        return -EINVAL;
 
     chn = evtchn_from_port(d, port);
 
+    write_lock(&d->event_lock);
+
     /* Guest cannot re-bind a Xen-attached event channel. */
     if ( unlikely(consumer_is_xen(chn)) )
     {



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v5 4/6] evtchn: add helper for port_is_valid() + evtchn_from_port()
  2021-01-27  8:13 [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
                   ` (2 preceding siblings ...)
  2021-01-27  8:16 ` [PATCH v5 3/6] evtchn: slightly defer lock acquire where possible Jan Beulich
@ 2021-01-27  8:16 ` Jan Beulich
  2021-01-27  8:17 ` [PATCH v5 5/6] evtchn: type adjustments Jan Beulich
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2021-01-27  8:16 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall, Wei Liu,
	Stefano Stabellini

The combination is pretty common, so adding a simple local helper seems
worthwhile. Make it const- and type-correct, in turn requiring the
two called function to also be const-correct (and at this occasion also
make them type-correct).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Julien Grall <jgrall@amazon.com>
---
v4: New.

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -147,6 +147,11 @@ static bool virq_is_global(unsigned int
     return true;
 }
 
+static struct evtchn *_evtchn_from_port(const struct domain *d,
+                                        evtchn_port_t port)
+{
+    return port_is_valid(d, port) ? evtchn_from_port(d, port) : NULL;
+}
 
 static struct evtchn *alloc_evtchn_bucket(struct domain *d, unsigned int port)
 {
@@ -369,9 +374,9 @@ static long evtchn_bind_interdomain(evtc
         ERROR_EXIT(lport);
     lchn = evtchn_from_port(ld, lport);
 
-    if ( !port_is_valid(rd, rport) )
+    rchn = _evtchn_from_port(rd, rport);
+    if ( !rchn )
         ERROR_EXIT_DOM(-EINVAL, rd);
-    rchn = evtchn_from_port(rd, rport);
     if ( (rchn->state != ECS_UNBOUND) ||
          (rchn->u.unbound.remote_domid != ld->domain_id) )
         ERROR_EXIT_DOM(-EINVAL, rd);
@@ -606,15 +611,12 @@ static long evtchn_bind_pirq(evtchn_bind
 int evtchn_close(struct domain *d1, int port1, bool guest)
 {
     struct domain *d2 = NULL;
-    struct evtchn *chn1, *chn2;
-    int            port2;
+    struct evtchn *chn1 = _evtchn_from_port(d1, port1), *chn2;
     long           rc = 0;
 
-    if ( !port_is_valid(d1, port1) )
+    if ( !chn1 )
         return -EINVAL;
 
-    chn1 = evtchn_from_port(d1, port1);
-
  again:
     write_lock(&d1->event_lock);
 
@@ -700,10 +702,8 @@ int evtchn_close(struct domain *d1, int
             goto out;
         }
 
-        port2 = chn1->u.interdomain.remote_port;
-        BUG_ON(!port_is_valid(d2, port2));
-
-        chn2 = evtchn_from_port(d2, port2);
+        chn2 = _evtchn_from_port(d2, chn1->u.interdomain.remote_port);
+        BUG_ON(!chn2);
         BUG_ON(chn2->state != ECS_INTERDOMAIN);
         BUG_ON(chn2->u.interdomain.remote_dom != d1);
 
@@ -741,15 +741,13 @@ int evtchn_close(struct domain *d1, int
 
 int evtchn_send(struct domain *ld, unsigned int lport)
 {
-    struct evtchn *lchn, *rchn;
+    struct evtchn *lchn = _evtchn_from_port(ld, lport), *rchn;
     struct domain *rd;
     int            rport, ret = 0;
 
-    if ( !port_is_valid(ld, lport) )
+    if ( !lchn )
         return -EINVAL;
 
-    lchn = evtchn_from_port(ld, lport);
-
     evtchn_read_lock(lchn);
 
     /* Guest cannot send via a Xen-attached event channel. */
@@ -961,7 +959,6 @@ int evtchn_status(evtchn_status_t *statu
 {
     struct domain   *d;
     domid_t          dom = status->dom;
-    int              port = status->port;
     struct evtchn   *chn;
     long             rc = 0;
 
@@ -969,14 +966,13 @@ int evtchn_status(evtchn_status_t *statu
     if ( d == NULL )
         return -ESRCH;
 
-    if ( !port_is_valid(d, port) )
+    chn = _evtchn_from_port(d, status->port);
+    if ( !chn )
     {
         rcu_unlock_domain(d);
         return -EINVAL;
     }
 
-    chn = evtchn_from_port(d, port);
-
     evtchn_read_lock(chn);
 
     if ( consumer_is_xen(chn) )
@@ -1041,11 +1037,10 @@ long evtchn_bind_vcpu(unsigned int port,
     if ( (v = domain_vcpu(d, vcpu_id)) == NULL )
         return -ENOENT;
 
-    if ( !port_is_valid(d, port) )
+    chn = _evtchn_from_port(d, port);
+    if ( !chn )
         return -EINVAL;
 
-    chn = evtchn_from_port(d, port);
-
     write_lock(&d->event_lock);
 
     /* Guest cannot re-bind a Xen-attached event channel. */
@@ -1091,13 +1086,11 @@ long evtchn_bind_vcpu(unsigned int port,
 int evtchn_unmask(unsigned int port)
 {
     struct domain *d = current->domain;
-    struct evtchn *evtchn;
+    struct evtchn *evtchn = _evtchn_from_port(d, port);
 
-    if ( unlikely(!port_is_valid(d, port)) )
+    if ( unlikely(!evtchn) )
         return -EINVAL;
 
-    evtchn = evtchn_from_port(d, port);
-
     evtchn_read_lock(evtchn);
 
     evtchn_port_unmask(d, evtchn);
@@ -1180,14 +1173,12 @@ static long evtchn_set_priority(const st
 {
     struct domain *d = current->domain;
     unsigned int port = set_priority->port;
-    struct evtchn *chn;
+    struct evtchn *chn = _evtchn_from_port(d, port);
     long ret;
 
-    if ( !port_is_valid(d, port) )
+    if ( !chn )
         return -EINVAL;
 
-    chn = evtchn_from_port(d, port);
-
     evtchn_read_lock(chn);
 
     ret = evtchn_port_set_priority(d, chn, set_priority->priority);
@@ -1413,10 +1404,10 @@ void free_xen_event_channel(struct domai
 
 void notify_via_xen_event_channel(struct domain *ld, int lport)
 {
-    struct evtchn *lchn, *rchn;
+    struct evtchn *lchn = _evtchn_from_port(ld, lport), *rchn;
     struct domain *rd;
 
-    if ( !port_is_valid(ld, lport) )
+    if ( !lchn )
     {
         /*
          * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
@@ -1427,8 +1418,6 @@ void notify_via_xen_event_channel(struct
         return;
     }
 
-    lchn = evtchn_from_port(ld, lport);
-
     if ( !evtchn_read_trylock(lchn) )
         return;
 
@@ -1582,16 +1571,17 @@ static void domain_dump_evtchn_info(stru
            "Polling vCPUs: {%*pbl}\n"
            "    port [p/m/s]\n", d->domain_id, d->max_vcpus, d->poll_mask);
 
-    for ( port = 1; port_is_valid(d, port); ++port )
+    for ( port = 1; ; ++port )
     {
-        struct evtchn *chn;
+        struct evtchn *chn = _evtchn_from_port(d, port);
         char *ssid;
 
+        if ( !chn )
+            break;
+
         if ( !(port & 0x3f) )
             process_pending_softirqs();
 
-        chn = evtchn_from_port(d, port);
-
         if ( !evtchn_read_trylock(chn) )
         {
             printk("    %4u in flux\n", port);
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -120,7 +120,7 @@ static inline void evtchn_read_unlock(st
     read_unlock(&evtchn->lock);
 }
 
-static inline bool_t port_is_valid(struct domain *d, unsigned int p)
+static inline bool port_is_valid(const struct domain *d, evtchn_port_t p)
 {
     if ( p >= read_atomic(&d->valid_evtchns) )
         return false;
@@ -135,7 +135,8 @@ static inline bool_t port_is_valid(struc
     return true;
 }
 
-static inline struct evtchn *evtchn_from_port(struct domain *d, unsigned int p)
+static inline struct evtchn *evtchn_from_port(const struct domain *d,
+                                              evtchn_port_t p)
 {
     if ( p < EVTCHNS_PER_BUCKET )
         return &d->evtchn[array_index_nospec(p, EVTCHNS_PER_BUCKET)];



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v5 5/6] evtchn: type adjustments
  2021-01-27  8:13 [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
                   ` (3 preceding siblings ...)
  2021-01-27  8:16 ` [PATCH v5 4/6] evtchn: add helper for port_is_valid() + evtchn_from_port() Jan Beulich
@ 2021-01-27  8:17 ` Jan Beulich
  2021-01-27  8:17 ` [PATCH v5 6/6] evtchn: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq() Jan Beulich
  2021-04-21 15:23 ` Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
  6 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2021-01-27  8:17 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall, Wei Liu,
	Stefano Stabellini

First of all avoid "long" when "int" suffices, i.e. in particular when
merely conveying error codes. 32-bit values are slightly cheaper to
deal with on x86, and their processing is at least no more expensive on
Arm. Where possible use evtchn_port_t for port numbers and unsigned int
for other unsigned quantities in adjacent code. In evtchn_set_priority()
eliminate a local variable altogether instead of changing its type.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: New.

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -287,13 +287,12 @@ void evtchn_free(struct domain *d, struc
     xsm_evtchn_close_post(chn);
 }
 
-static long evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc)
+static int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc)
 {
     struct evtchn *chn;
     struct domain *d;
-    int            port;
+    int            port, rc;
     domid_t        dom = alloc->dom;
-    long           rc;
 
     d = rcu_lock_domain_by_any_id(dom);
     if ( d == NULL )
@@ -346,13 +345,13 @@ static void double_evtchn_unlock(struct
     evtchn_write_unlock(rchn);
 }
 
-static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
+static int evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
 {
     struct evtchn *lchn, *rchn;
     struct domain *ld = current->domain, *rd;
-    int            lport, rport = bind->remote_port;
+    int            lport, rc;
+    evtchn_port_t  rport = bind->remote_port;
     domid_t        rdom = bind->remote_dom;
-    long           rc;
 
     if ( (rd = rcu_lock_domain_by_any_id(rdom)) == NULL )
         return -ESRCH;
@@ -488,12 +487,12 @@ int evtchn_bind_virq(evtchn_bind_virq_t
 }
 
 
-static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind)
+static int evtchn_bind_ipi(evtchn_bind_ipi_t *bind)
 {
     struct evtchn *chn;
     struct domain *d = current->domain;
-    int            port, vcpu = bind->vcpu;
-    long           rc = 0;
+    int            port, rc = 0;
+    unsigned int   vcpu = bind->vcpu;
 
     if ( domain_vcpu(d, vcpu) == NULL )
         return -ENOENT;
@@ -547,16 +546,16 @@ static void unlink_pirq_port(struct evtc
 }
 
 
-static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
+static int evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
 {
     struct evtchn *chn;
     struct domain *d = current->domain;
     struct vcpu   *v = d->vcpu[0];
     struct pirq   *info;
-    int            port = 0, pirq = bind->pirq;
-    long           rc;
+    int            port = 0, rc;
+    unsigned int   pirq = bind->pirq;
 
-    if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
+    if ( pirq >= d->nr_pirqs )
         return -EINVAL;
 
     if ( !is_hvm_domain(d) && !pirq_access_permitted(d, pirq) )
@@ -612,7 +611,7 @@ int evtchn_close(struct domain *d1, int
 {
     struct domain *d2 = NULL;
     struct evtchn *chn1 = _evtchn_from_port(d1, port1), *chn2;
-    long           rc = 0;
+    int            rc = 0;
 
     if ( !chn1 )
         return -EINVAL;
@@ -960,7 +959,7 @@ int evtchn_status(evtchn_status_t *statu
     struct domain   *d;
     domid_t          dom = status->dom;
     struct evtchn   *chn;
-    long             rc = 0;
+    int              rc = 0;
 
     d = rcu_lock_domain_by_any_id(dom);
     if ( d == NULL )
@@ -1026,11 +1025,11 @@ int evtchn_status(evtchn_status_t *statu
 }
 
 
-long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id)
+int evtchn_bind_vcpu(evtchn_port_t port, unsigned int vcpu_id)
 {
     struct domain *d = current->domain;
     struct evtchn *chn;
-    long           rc = 0;
+    int           rc = 0;
     struct vcpu   *v;
 
     /* Use the vcpu info to prevent speculative out-of-bound accesses */
@@ -1169,12 +1168,11 @@ int evtchn_reset(struct domain *d, bool
     return rc;
 }
 
-static long evtchn_set_priority(const struct evtchn_set_priority *set_priority)
+static int evtchn_set_priority(const struct evtchn_set_priority *set_priority)
 {
     struct domain *d = current->domain;
-    unsigned int port = set_priority->port;
-    struct evtchn *chn = _evtchn_from_port(d, port);
-    long ret;
+    struct evtchn *chn = _evtchn_from_port(d, set_priority->port);
+    int ret;
 
     if ( !chn )
         return -EINVAL;
@@ -1190,7 +1188,7 @@ static long evtchn_set_priority(const st
 
 long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
-    long rc;
+    int rc;
 
     switch ( cmd )
     {
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -54,7 +54,7 @@ void send_guest_pirq(struct domain *, co
 int evtchn_send(struct domain *d, unsigned int lport);
 
 /* Bind a local event-channel port to the specified VCPU. */
-long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
+int evtchn_bind_vcpu(evtchn_port_t port, unsigned int vcpu_id);
 
 /* Bind a VIRQ. */
 int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port);



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v5 6/6] evtchn: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq()
  2021-01-27  8:13 [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
                   ` (4 preceding siblings ...)
  2021-01-27  8:17 ` [PATCH v5 5/6] evtchn: type adjustments Jan Beulich
@ 2021-01-27  8:17 ` Jan Beulich
  2021-04-21 15:23 ` Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
  6 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2021-01-27  8:17 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall, Wei Liu,
	Stefano Stabellini

The per-vCPU virq_lock, which is being held anyway, together with there
not being any call to evtchn_port_set_pending() when v->virq_to_evtchn[]
is zero, provide sufficient guarantees. Undo the lock addition done for
XSA-343 (commit e045199c7c9c "evtchn: address races with
evtchn_reset()"). Update the description next to struct evtchn_port_ops
accordingly.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Move to end of series, for being the most controversial change.
v3: Re-base.
v2: New.

--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -812,7 +812,6 @@ void send_guest_vcpu_virq(struct vcpu *v
     unsigned long flags;
     int port;
     struct domain *d;
-    struct evtchn *chn;
 
     ASSERT(!virq_is_global(virq));
 
@@ -823,12 +822,7 @@ void send_guest_vcpu_virq(struct vcpu *v
         goto out;
 
     d = v->domain;
-    chn = evtchn_from_port(d, port);
-    if ( evtchn_read_trylock(chn) )
-    {
-        evtchn_port_set_pending(d, v->vcpu_id, chn);
-        evtchn_read_unlock(chn);
-    }
+    evtchn_port_set_pending(d, v->vcpu_id, evtchn_from_port(d, port));
 
  out:
     read_unlock_irqrestore(&v->virq_lock, flags);
@@ -857,11 +851,7 @@ void send_guest_global_virq(struct domai
         goto out;
 
     chn = evtchn_from_port(d, port);
-    if ( evtchn_read_trylock(chn) )
-    {
-        evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
-        evtchn_read_unlock(chn);
-    }
+    evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
 
  out:
     read_unlock_irqrestore(&v->virq_lock, flags);
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -193,9 +193,16 @@ int evtchn_reset(struct domain *d, bool
  * Low-level event channel port ops.
  *
  * All hooks have to be called with a lock held which prevents the channel
- * from changing state. This may be the domain event lock, the per-channel
- * lock, or in the case of sending interdomain events also the other side's
- * per-channel lock. Exceptions apply in certain cases for the PV shim.
+ * from changing state. This may be
+ * - the domain event lock,
+ * - the per-channel lock,
+ * - in the case of sending interdomain events the other side's per-channel
+ *   lock,
+ * - in the case of sending non-global vIRQ-s the per-vCPU virq_lock (in
+ *   combination with the ordering enforced through how the vCPU's
+ *   virq_to_evtchn[] gets updated),
+ * - in the case of sending global vIRQ-s vCPU 0's virq_lock.
+ * Exceptions apply in certain cases for the PV shim.
  */
 struct evtchn_port_ops {
     void (*init)(struct domain *d, struct evtchn *evtchn);



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on
  2021-01-27  8:13 [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
                   ` (5 preceding siblings ...)
  2021-01-27  8:17 ` [PATCH v5 6/6] evtchn: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq() Jan Beulich
@ 2021-04-21 15:23 ` Jan Beulich
  2021-04-21 15:56   ` Julien Grall
  6 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2021-04-21 15:23 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Julien Grall, Wei Liu,
	Stefano Stabellini

On 27.01.2021 09:13, Jan Beulich wrote:
> These are grouped into a series largely because of their origin,
> not so much because there are (heavy) dependencies among them.
> The main change from v4 is the dropping of the two patches trying
> to do away with the double event lock acquires in interdomain
> channel handling. See also the individual patches.
> 
> 1: use per-channel lock where possible
> 2: convert domain event lock to an r/w one
> 3: slightly defer lock acquire where possible
> 4: add helper for port_is_valid() + evtchn_from_port()
> 5: type adjustments
> 6: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq()

Only patch 4 here has got an ack so far - may I ask for clear feedback
as to at least some of these being acceptable (I can see the last one
being controversial, and if this was the only one left I probably
wouldn't even ping, despite thinking that it helps reduce unecessary
overhead).

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on
  2021-04-21 15:23 ` Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
@ 2021-04-21 15:56   ` Julien Grall
  2021-04-22  8:53     ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Julien Grall @ 2021-04-21 15:56 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Wei Liu, Stefano Stabellini



On 21/04/2021 16:23, Jan Beulich wrote:
> On 27.01.2021 09:13, Jan Beulich wrote:
>> These are grouped into a series largely because of their origin,
>> not so much because there are (heavy) dependencies among them.
>> The main change from v4 is the dropping of the two patches trying
>> to do away with the double event lock acquires in interdomain
>> channel handling. See also the individual patches.
>>
>> 1: use per-channel lock where possible
>> 2: convert domain event lock to an r/w one
>> 3: slightly defer lock acquire where possible
>> 4: add helper for port_is_valid() + evtchn_from_port()
>> 5: type adjustments
>> 6: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq()
> 
> Only patch 4 here has got an ack so far - may I ask for clear feedback
> as to at least some of these being acceptable (I can see the last one
> being controversial, and if this was the only one left I probably
> wouldn't even ping, despite thinking that it helps reduce unecessary
> overhead).

I left feedback for the series one the previous version (see [1]). It 
would have been nice is if it was mentionned somewhere as this is still 
unresolved.

The locking in the event channel is already quite fragile (see recent 
XSAs, follow-up bugs...). Even if the pattern is used somewhere (as you 
suggested), I don't think it is good idea to pertain it.

To be clear, I am not saying I don't care about performance. Instead I 
am trying to find a trade off between code maintenability and 
performance. So far, I didn't see any data justifying that the extra 
performance is worth the risk of making a code more fragile.

Cheers,

[1] <3c393170-09f9-6d31-c227-b599f8769e35@xen.org>

> 
> Jan
> 

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on
  2021-04-21 15:56   ` Julien Grall
@ 2021-04-22  8:53     ` Jan Beulich
  2021-05-14 15:29       ` Roger Pau Monné
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Beulich @ 2021-04-22  8:53 UTC (permalink / raw)
  To: Julien Grall
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Wei Liu,
	Stefano Stabellini, xen-devel

On 21.04.2021 17:56, Julien Grall wrote:
> 
> 
> On 21/04/2021 16:23, Jan Beulich wrote:
>> On 27.01.2021 09:13, Jan Beulich wrote:
>>> These are grouped into a series largely because of their origin,
>>> not so much because there are (heavy) dependencies among them.
>>> The main change from v4 is the dropping of the two patches trying
>>> to do away with the double event lock acquires in interdomain
>>> channel handling. See also the individual patches.
>>>
>>> 1: use per-channel lock where possible
>>> 2: convert domain event lock to an r/w one
>>> 3: slightly defer lock acquire where possible
>>> 4: add helper for port_is_valid() + evtchn_from_port()
>>> 5: type adjustments
>>> 6: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq()
>>
>> Only patch 4 here has got an ack so far - may I ask for clear feedback
>> as to at least some of these being acceptable (I can see the last one
>> being controversial, and if this was the only one left I probably
>> wouldn't even ping, despite thinking that it helps reduce unecessary
>> overhead).
> 
> I left feedback for the series one the previous version (see [1]). It 
> would have been nice is if it was mentionned somewhere as this is still 
> unresolved.

I will admit I forgot about the controversy on patch 1. I did, however,
reply to your concerns. What didn't happen is the feedback from others
that you did ask for.

And of course there are 4 more patches here (one of them having an ack,
yes) which could do with feedback. I'm certainly willing, where possible,
to further re-order the series such that controversial changes are at its
end.

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on
  2021-04-22  8:53     ` Jan Beulich
@ 2021-05-14 15:29       ` Roger Pau Monné
  2021-05-17  7:15         ` Jan Beulich
  0 siblings, 1 reply; 15+ messages in thread
From: Roger Pau Monné @ 2021-05-14 15:29 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Julien Grall, Andrew Cooper, George Dunlap, Ian Jackson, Wei Liu,
	Stefano Stabellini, xen-devel

On Thu, Apr 22, 2021 at 10:53:05AM +0200, Jan Beulich wrote:
> On 21.04.2021 17:56, Julien Grall wrote:
> > 
> > 
> > On 21/04/2021 16:23, Jan Beulich wrote:
> >> On 27.01.2021 09:13, Jan Beulich wrote:
> >>> These are grouped into a series largely because of their origin,
> >>> not so much because there are (heavy) dependencies among them.
> >>> The main change from v4 is the dropping of the two patches trying
> >>> to do away with the double event lock acquires in interdomain
> >>> channel handling. See also the individual patches.
> >>>
> >>> 1: use per-channel lock where possible
> >>> 2: convert domain event lock to an r/w one
> >>> 3: slightly defer lock acquire where possible
> >>> 4: add helper for port_is_valid() + evtchn_from_port()
> >>> 5: type adjustments
> >>> 6: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq()
> >>
> >> Only patch 4 here has got an ack so far - may I ask for clear feedback
> >> as to at least some of these being acceptable (I can see the last one
> >> being controversial, and if this was the only one left I probably
> >> wouldn't even ping, despite thinking that it helps reduce unecessary
> >> overhead).
> > 
> > I left feedback for the series one the previous version (see [1]). It 
> > would have been nice is if it was mentionned somewhere as this is still 
> > unresolved.
> 
> I will admit I forgot about the controversy on patch 1. I did, however,
> reply to your concerns. What didn't happen is the feedback from others
> that you did ask for.
> 
> And of course there are 4 more patches here (one of them having an ack,
> yes) which could do with feedback. I'm certainly willing, where possible,
> to further re-order the series such that controversial changes are at its
> end.

I think it would easier to figure out whether the changes are correct
if we had some kind of documentation about what/how the per-domain
event_lock and the per-event locks are supposed to be used. I don't
seem to be able to find any comments regarding how they are to be
used.

Regarding the changes itself in patch 1 (which I think has caused part
of the controversy here), I'm unsure they are worth it because the
functions modified all seem to be non-performance critical:
evtchn_status, domain_dump_evtchn_info, flask_get_peer_sid.

So I would say that unless we have clear rules written down for what
the per-domain event_lock protects, I would be hesitant to change any
of the logic, specially for critical paths.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on
  2021-05-14 15:29       ` Roger Pau Monné
@ 2021-05-17  7:15         ` Jan Beulich
  0 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2021-05-17  7:15 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Julien Grall, Andrew Cooper, George Dunlap, Ian Jackson, Wei Liu,
	Stefano Stabellini, xen-devel

On 14.05.2021 17:29, Roger Pau Monné wrote:
> On Thu, Apr 22, 2021 at 10:53:05AM +0200, Jan Beulich wrote:
>> On 21.04.2021 17:56, Julien Grall wrote:
>>>
>>>
>>> On 21/04/2021 16:23, Jan Beulich wrote:
>>>> On 27.01.2021 09:13, Jan Beulich wrote:
>>>>> These are grouped into a series largely because of their origin,
>>>>> not so much because there are (heavy) dependencies among them.
>>>>> The main change from v4 is the dropping of the two patches trying
>>>>> to do away with the double event lock acquires in interdomain
>>>>> channel handling. See also the individual patches.
>>>>>
>>>>> 1: use per-channel lock where possible
>>>>> 2: convert domain event lock to an r/w one
>>>>> 3: slightly defer lock acquire where possible
>>>>> 4: add helper for port_is_valid() + evtchn_from_port()
>>>>> 5: type adjustments
>>>>> 6: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq()
>>>>
>>>> Only patch 4 here has got an ack so far - may I ask for clear feedback
>>>> as to at least some of these being acceptable (I can see the last one
>>>> being controversial, and if this was the only one left I probably
>>>> wouldn't even ping, despite thinking that it helps reduce unecessary
>>>> overhead).
>>>
>>> I left feedback for the series one the previous version (see [1]). It 
>>> would have been nice is if it was mentionned somewhere as this is still 
>>> unresolved.
>>
>> I will admit I forgot about the controversy on patch 1. I did, however,
>> reply to your concerns. What didn't happen is the feedback from others
>> that you did ask for.
>>
>> And of course there are 4 more patches here (one of them having an ack,
>> yes) which could do with feedback. I'm certainly willing, where possible,
>> to further re-order the series such that controversial changes are at its
>> end.
> 
> I think it would easier to figure out whether the changes are correct
> if we had some kind of documentation about what/how the per-domain
> event_lock and the per-event locks are supposed to be used. I don't
> seem to be able to find any comments regarding how they are to be
> used.

I think especially in pass-through code there are a number of cases
where the per-domain lock really is being abused, simply for being
available without much further thought. I'm not convinced documenting
such abuse is going to help the situation. Yet of course I can see
that having documentation would make review easier ...

> Regarding the changes itself in patch 1 (which I think has caused part
> of the controversy here), I'm unsure they are worth it because the
> functions modified all seem to be non-performance critical:
> evtchn_status, domain_dump_evtchn_info, flask_get_peer_sid.
> 
> So I would say that unless we have clear rules written down for what
> the per-domain event_lock protects, I would be hesitant to change any
> of the logic, specially for critical paths.

Okay, I'll drop patch 1 and also patch 6 for being overly controversial.
Some of the other patches still look worthwhile to me, though. I'll
also consider moving the spin->r/w lock conversion last in the series.

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one
  2021-01-27  8:16 ` [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one Jan Beulich
@ 2021-05-27 11:01   ` Roger Pau Monné
  2021-05-27 11:16     ` Jan Beulich
  2022-07-07 18:00   ` Julien Grall
  1 sibling, 1 reply; 15+ messages in thread
From: Roger Pau Monné @ 2021-05-27 11:01 UTC (permalink / raw)
  To: Jan Beulich
  Cc: xen-devel, Andrew Cooper, George Dunlap, Ian Jackson,
	Julien Grall, Wei Liu, Stefano Stabellini, Kevin Tian

On Wed, Jan 27, 2021 at 09:16:07AM +0100, Jan Beulich wrote:
> Especially for the use in evtchn_move_pirqs() (called when moving a vCPU
> across pCPU-s) and the ones in EOI handling in PCI pass-through code,
> serializing perhaps an entire domain isn't helpful when no state (which
> isn't e.g. further protected by the per-channel lock) changes.

I'm unsure this move is good from a performance PoV, as the operations
switched to use the lock in read mode is a very small subset, and then
the remaining operations get a performance penalty when compared to
using a plain spin lock.

> Unfortunately this implies dropping of lock profiling for this lock,
> until r/w locks may get enabled for such functionality.
> 
> While ->notify_vcpu_id is now meant to be consistently updated with the
> per-channel lock held, an extension applies to ECS_PIRQ: The field is
> also guaranteed to not change with the per-domain event lock held for
> writing. Therefore the link_pirq_port() call from evtchn_bind_pirq()
> could in principle be moved out of the per-channel locked regions, but
> this further code churn didn't seem worth it.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> @@ -1510,9 +1509,10 @@ int evtchn_destroy(struct domain *d)
>  {
>      unsigned int i;
>  
> -    /* After this barrier no new event-channel allocations can occur. */
> +    /* After this kind-of-barrier no new event-channel allocations can occur. */
>      BUG_ON(!d->is_dying);
> -    spin_barrier(&d->event_lock);
> +    read_lock(&d->event_lock);
> +    read_unlock(&d->event_lock);

Don't you want to use write mode here to assure there are no read
users that have taken the lock before is_dying has been set, and thus
could make wrong assumptions?

As I understand the point of the barrier here is to ensure there are
no lockers carrier over from before is_dying has been set.

Thanks, Roger.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one
  2021-05-27 11:01   ` Roger Pau Monné
@ 2021-05-27 11:16     ` Jan Beulich
  0 siblings, 0 replies; 15+ messages in thread
From: Jan Beulich @ 2021-05-27 11:16 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: xen-devel, Andrew Cooper, George Dunlap, Ian Jackson,
	Julien Grall, Wei Liu, Stefano Stabellini, Kevin Tian

On 27.05.2021 13:01, Roger Pau Monné wrote:
> On Wed, Jan 27, 2021 at 09:16:07AM +0100, Jan Beulich wrote:
>> Especially for the use in evtchn_move_pirqs() (called when moving a vCPU
>> across pCPU-s) and the ones in EOI handling in PCI pass-through code,
>> serializing perhaps an entire domain isn't helpful when no state (which
>> isn't e.g. further protected by the per-channel lock) changes.
> 
> I'm unsure this move is good from a performance PoV, as the operations
> switched to use the lock in read mode is a very small subset, and then
> the remaining operations get a performance penalty when compared to
> using a plain spin lock.

Well, yes, unfortunately review of earlier versions has resulted in
there being quite a few less read_lock() uses now than I had
(mistakenly) originally. There are a few worthwhile conversions,
but on the whole maybe I should indeed drop this change.

>> @@ -1510,9 +1509,10 @@ int evtchn_destroy(struct domain *d)
>>  {
>>      unsigned int i;
>>  
>> -    /* After this barrier no new event-channel allocations can occur. */
>> +    /* After this kind-of-barrier no new event-channel allocations can occur. */
>>      BUG_ON(!d->is_dying);
>> -    spin_barrier(&d->event_lock);
>> +    read_lock(&d->event_lock);
>> +    read_unlock(&d->event_lock);
> 
> Don't you want to use write mode here to assure there are no read
> users that have taken the lock before is_dying has been set, and thus
> could make wrong assumptions?
> 
> As I understand the point of the barrier here is to ensure there are
> no lockers carrier over from before is_dying has been set.

The purpose is, as the comment says, no new event channel allocations.
Those happen under write lock, so a read-lock-based barrier is enough
here afaict.

Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one
  2021-01-27  8:16 ` [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one Jan Beulich
  2021-05-27 11:01   ` Roger Pau Monné
@ 2022-07-07 18:00   ` Julien Grall
  1 sibling, 0 replies; 15+ messages in thread
From: Julien Grall @ 2022-07-07 18:00 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Andrew Cooper, George Dunlap, Ian Jackson, Wei Liu,
	Stefano Stabellini, Roger Pau Monné,
	Kevin Tian

Hi Jan,

As discussed in [1], I think it would good to revive this patch.

AFAICT, this patch was dropped because the performance was thought to be 
minimal. However, I think it would be a better way to resolve the 
problem that one is trying to address [1].

So I will do another review of this patch.

On 27/01/2021 08:16, Jan Beulich wrote:
> Especially for the use in evtchn_move_pirqs() (called when moving a vCPU
> across pCPU-s) and the ones in EOI handling in PCI pass-through code,
> serializing perhaps an entire domain isn't helpful when no state (which
> isn't e.g. further protected by the per-channel lock) changes.
> 
> Unfortunately this implies dropping of lock profiling for this lock,
> until r/w locks may get enabled for such functionality.
> 
> While ->notify_vcpu_id is now meant to be consistently updated with the
> per-channel lock held, an extension applies to ECS_PIRQ: The field is
> also guaranteed to not change with the per-domain event lock held for
> writing. Therefore the link_pirq_port() call from evtchn_bind_pirq()
> could in principle be moved out of the per-channel locked regions, but
> this further code churn didn't seem worth it.

This doesn't seem to apply on upstream anymore. Would you be able to 
respin it?

I have looked at the place where you use read_lock() rather than 
write_lock(). They all look fine to me, so I would be fine to give my 
reviewed-by on the next version (assuming there are nothing wrong with 
the rebase :)).

Cheers,

[1] 
https://lore.kernel.org/xen-devel/acd0dfae-b045-8505-3f6c-30ce72653660@suse.com/

-- 
Julien Grall


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2022-07-07 18:01 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-27  8:13 [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
2021-01-27  8:15 ` [PATCH v5 1/6] evtchn: use per-channel lock where possible Jan Beulich
2021-01-27  8:16 ` [PATCH v5 2/6] evtchn: convert domain event lock to an r/w one Jan Beulich
2021-05-27 11:01   ` Roger Pau Monné
2021-05-27 11:16     ` Jan Beulich
2022-07-07 18:00   ` Julien Grall
2021-01-27  8:16 ` [PATCH v5 3/6] evtchn: slightly defer lock acquire where possible Jan Beulich
2021-01-27  8:16 ` [PATCH v5 4/6] evtchn: add helper for port_is_valid() + evtchn_from_port() Jan Beulich
2021-01-27  8:17 ` [PATCH v5 5/6] evtchn: type adjustments Jan Beulich
2021-01-27  8:17 ` [PATCH v5 6/6] evtchn: drop acquiring of per-channel lock from send_guest_{global,vcpu}_virq() Jan Beulich
2021-04-21 15:23 ` Ping: [PATCH v5 0/6] evtchn: (not so) recent XSAs follow-on Jan Beulich
2021-04-21 15:56   ` Julien Grall
2021-04-22  8:53     ` Jan Beulich
2021-05-14 15:29       ` Roger Pau Monné
2021-05-17  7:15         ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.