All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Beulich <JBeulich@suse.com>
To: "xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: "Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Wei Liu" <wl@xen.org>, "Roger Pau Monné" <roger.pau@citrix.com>
Subject: [Xen-devel] [PATCH v4 05/13] x86/IRQ: fix locking around vector management
Date: Tue, 16 Jul 2019 07:39:58 +0000	[thread overview]
Message-ID: <590e074c-f3b2-91ea-acaa-5a0b0409d749@suse.com> (raw)
In-Reply-To: <5cda711a-b417-76e9-d113-ea838463f225@suse.com>

All of __{assign,bind,clear}_irq_vector() manipulate struct irq_desc
fields, and hence ought to be called with the descriptor lock held in
addition to vector_lock. This is currently the case for only
set_desc_affinity() (in the common case) and destroy_irq(), which also
clarifies what the nesting behavior between the locks has to be.
Reflect the new expectation by having these functions all take a
descriptor as parameter instead of an interrupt number.

Also take care of the two special cases of calls to set_desc_affinity():
set_ioapic_affinity_irq() and VT-d's dma_msi_set_affinity() get called
directly as well, and in these cases the descriptor locks hadn't got
acquired till now. For set_ioapic_affinity_irq() this means acquiring /
releasing of the IO-APIC lock can be plain spin_{,un}lock() then.

Drop one of the two leading underscores from all three functions at
the same time.

There's one case left where descriptors get manipulated with just
vector_lock held: setup_vector_irq() assumes its caller to acquire
vector_lock, and hence can't itself acquire the descriptor locks (wrong
lock order). I don't currently see how to address this.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com> [VT-d]
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v4: Adjust comment ahead of setup_vector_irq().
v3: Also drop one leading underscore from a comment. Re-base.
v2: Also adjust set_ioapic_affinity_irq() and VT-d's
     dma_msi_set_affinity().

--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -550,14 +550,14 @@ static void clear_IO_APIC (void)
  static void
  set_ioapic_affinity_irq(struct irq_desc *desc, const cpumask_t *mask)
  {
-    unsigned long flags;
      unsigned int dest;
      int pin, irq;
      struct irq_pin_list *entry;
  
      irq = desc->irq;
  
-    spin_lock_irqsave(&ioapic_lock, flags);
+    spin_lock(&ioapic_lock);
+
      dest = set_desc_affinity(desc, mask);
      if (dest != BAD_APICID) {
          if ( !x2apic_enabled )
@@ -580,8 +580,8 @@ set_ioapic_affinity_irq(struct irq_desc
              entry = irq_2_pin + entry->next;
          }
      }
-    spin_unlock_irqrestore(&ioapic_lock, flags);
  
+    spin_unlock(&ioapic_lock);
  }
  
  /*
@@ -674,16 +674,19 @@ void /*__init*/ setup_ioapic_dest(void)
      for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
          for (pin = 0; pin < nr_ioapic_entries[ioapic]; pin++) {
              struct irq_desc *desc;
+            unsigned long flags;
  
              irq_entry = find_irq_entry(ioapic, pin, mp_INT);
              if (irq_entry == -1)
                  continue;
              irq = pin_2_irq(irq_entry, ioapic, pin);
              desc = irq_to_desc(irq);
+
+            spin_lock_irqsave(&desc->lock, flags);
              BUG_ON(!cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map));
              set_ioapic_affinity_irq(desc, desc->arch.cpu_mask);
+            spin_unlock_irqrestore(&desc->lock, flags);
          }
-
      }
  }
  
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -27,6 +27,7 @@
  #include <public/physdev.h>
  
  static int parse_irq_vector_map_param(const char *s);
+static void _clear_irq_vector(struct irq_desc *desc);
  
  /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
  bool __read_mostly opt_noirqbalance;
@@ -143,13 +144,12 @@ static void trace_irq_mask(uint32_t even
          _trace_irq_mask(event, irq, vector, mask);
  }
  
-static int __init __bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
+static int __init _bind_irq_vector(struct irq_desc *desc, int vector,
+                                   const cpumask_t *cpu_mask)
  {
      cpumask_t online_mask;
      int cpu;
-    struct irq_desc *desc = irq_to_desc(irq);
  
-    BUG_ON((unsigned)irq >= nr_irqs);
      BUG_ON((unsigned)vector >= NR_VECTORS);
  
      cpumask_and(&online_mask, cpu_mask, &cpu_online_map);
@@ -160,9 +160,9 @@ static int __init __bind_irq_vector(int
          return 0;
      if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
          return -EBUSY;
-    trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, irq, vector, &online_mask);
+    trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, desc->irq, vector, &online_mask);
      for_each_cpu(cpu, &online_mask)
-        per_cpu(vector_irq, cpu)[vector] = irq;
+        per_cpu(vector_irq, cpu)[vector] = desc->irq;
      desc->arch.vector = vector;
      cpumask_copy(desc->arch.cpu_mask, &online_mask);
      if ( desc->arch.used_vectors )
@@ -176,12 +176,18 @@ static int __init __bind_irq_vector(int
  
  int __init bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
  {
+    struct irq_desc *desc = irq_to_desc(irq);
      unsigned long flags;
      int ret;
  
-    spin_lock_irqsave(&vector_lock, flags);
-    ret = __bind_irq_vector(irq, vector, cpu_mask);
-    spin_unlock_irqrestore(&vector_lock, flags);
+    BUG_ON((unsigned)irq >= nr_irqs);
+
+    spin_lock_irqsave(&desc->lock, flags);
+    spin_lock(&vector_lock);
+    ret = _bind_irq_vector(desc, vector, cpu_mask);
+    spin_unlock(&vector_lock);
+    spin_unlock_irqrestore(&desc->lock, flags);
+
      return ret;
  }
  
@@ -266,18 +272,20 @@ void destroy_irq(unsigned int irq)
  
      spin_lock_irqsave(&desc->lock, flags);
      desc->handler = &no_irq_type;
-    clear_irq_vector(irq);
+    spin_lock(&vector_lock);
+    _clear_irq_vector(desc);
+    spin_unlock(&vector_lock);
      desc->arch.used_vectors = NULL;
      spin_unlock_irqrestore(&desc->lock, flags);
  
      xfree(action);
  }
  
-static void __clear_irq_vector(int irq)
+static void _clear_irq_vector(struct irq_desc *desc)
  {
-    int cpu, vector, old_vector;
+    unsigned int cpu;
+    int vector, old_vector, irq = desc->irq;
      cpumask_t tmp_mask;
-    struct irq_desc *desc = irq_to_desc(irq);
  
      BUG_ON(!desc->arch.vector);
  
@@ -323,11 +331,14 @@ static void __clear_irq_vector(int irq)
  
  void clear_irq_vector(int irq)
  {
+    struct irq_desc *desc = irq_to_desc(irq);
      unsigned long flags;
  
-    spin_lock_irqsave(&vector_lock, flags);
-    __clear_irq_vector(irq);
-    spin_unlock_irqrestore(&vector_lock, flags);
+    spin_lock_irqsave(&desc->lock, flags);
+    spin_lock(&vector_lock);
+    _clear_irq_vector(desc);
+    spin_unlock(&vector_lock);
+    spin_unlock_irqrestore(&desc->lock, flags);
  }
  
  int irq_to_vector(int irq)
@@ -462,8 +473,7 @@ static vmask_t *irq_get_used_vector_mask
      return ret;
  }
  
-static int __assign_irq_vector(
-    int irq, struct irq_desc *desc, const cpumask_t *mask)
+static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask)
  {
      /*
       * NOTE! The local APIC isn't very good at handling
@@ -477,7 +487,8 @@ static int __assign_irq_vector(
       * 0x80, because int 0x80 is hm, kind of importantish. ;)
       */
      static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
-    int cpu, err, old_vector;
+    unsigned int cpu;
+    int err, old_vector, irq = desc->irq;
      vmask_t *irq_used_vectors = NULL;
  
      old_vector = irq_to_vector(irq);
@@ -590,8 +601,12 @@ int assign_irq_vector(int irq, const cpu
      
      BUG_ON(irq >= nr_irqs || irq <0);
  
-    spin_lock_irqsave(&vector_lock, flags);
-    ret = __assign_irq_vector(irq, desc, mask ?: TARGET_CPUS);
+    spin_lock_irqsave(&desc->lock, flags);
+
+    spin_lock(&vector_lock);
+    ret = _assign_irq_vector(desc, mask ?: TARGET_CPUS);
+    spin_unlock(&vector_lock);
+
      if ( !ret )
      {
          ret = desc->arch.vector;
@@ -600,14 +615,16 @@ int assign_irq_vector(int irq, const cpu
          else
              cpumask_setall(desc->affinity);
      }
-    spin_unlock_irqrestore(&vector_lock, flags);
+
+    spin_unlock_irqrestore(&desc->lock, flags);
  
      return ret;
  }
  
  /*
   * Initialize vector_irq on a new cpu. This function must be called
- * with vector_lock held.
+ * with vector_lock held.  For this reason it may not itself acquire
+ * the IRQ descriptor locks, as lock nesting is the other way around.
   */
  void setup_vector_irq(unsigned int cpu)
  {
@@ -775,7 +792,6 @@ void irq_complete_move(struct irq_desc *
  
  unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t *mask)
  {
-    unsigned int irq;
      int ret;
      unsigned long flags;
      cpumask_t dest_mask;
@@ -783,10 +799,8 @@ unsigned int set_desc_affinity(struct ir
      if (!cpumask_intersects(mask, &cpu_online_map))
          return BAD_APICID;
  
-    irq = desc->irq;
-
      spin_lock_irqsave(&vector_lock, flags);
-    ret = __assign_irq_vector(irq, desc, mask);
+    ret = _assign_irq_vector(desc, mask);
      spin_unlock_irqrestore(&vector_lock, flags);
  
      if (ret < 0)
@@ -2453,7 +2467,7 @@ void fixup_irqs(const cpumask_t *mask, b
  
          /*
           * In order for the affinity adjustment below to be successful, we
-         * need __assign_irq_vector() to succeed. This in particular means
+         * need _assign_irq_vector() to succeed. This in particular means
           * clearing desc->arch.move_in_progress if this would otherwise
           * prevent the function from succeeding. Since there's no way for the
           * flag to get cleared anymore when there's no possible destination
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2134,11 +2134,16 @@ static void adjust_irq_affinity(struct a
      unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain)
                               : NUMA_NO_NODE;
      const cpumask_t *cpumask = &cpu_online_map;
+    struct irq_desc *desc;
  
      if ( node < MAX_NUMNODES && node_online(node) &&
           cpumask_intersects(&node_to_cpumask(node), cpumask) )
          cpumask = &node_to_cpumask(node);
-    dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask);
+
+    desc = irq_to_desc(drhd->iommu->msi.irq);
+    spin_lock_irq(&desc->lock);
+    dma_msi_set_affinity(desc, cpumask);
+    spin_unlock_irq(&desc->lock);
  }
  
  static int adjust_vtd_irq_affinities(void)

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2019-07-16  7:48 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-16  7:24 [Xen-devel] [PATCH v4 00/13] x86: IRQ management adjustments Jan Beulich
2019-07-16  7:37 ` [Xen-devel] [PATCH v4 01/13] x86/IRQ: deal with move-in-progress state in fixup_irqs() Jan Beulich
2019-07-19 13:20   ` Andrew Cooper
2019-07-16  7:37 ` [Xen-devel] [PATCH v4 02/13] x86/IRQ: deal with move cleanup count " Jan Beulich
2019-07-16  7:38 ` [Xen-devel] [PATCH v4 03/13] x86/IRQ: desc->affinity should strictly represent the requested value Jan Beulich
2019-07-19 13:25   ` Andrew Cooper
2019-07-16  7:38 ` [Xen-devel] [PATCH v4 04/13] x86/IRQ: consolidate use of ->arch.cpu_mask Jan Beulich
2019-07-16  7:39 ` Jan Beulich [this message]
2019-07-16  7:40 ` [Xen-devel] [PATCH v4 06/13] x86/IOMMU: don't restrict IRQ affinities to online CPUs Jan Beulich
2019-07-16  9:12   ` Roger Pau Monné
2019-07-16 10:20     ` Jan Beulich
2019-07-16 11:17       ` Roger Pau Monné
2019-07-19 13:27   ` Andrew Cooper
2019-07-24  0:53   ` Tian, Kevin
2019-07-24 19:53   ` Woods, Brian
2019-07-16  7:41 ` [Xen-devel] [PATCH v4 07/13] x86/IRQ: target online CPUs when binding guest IRQ Jan Beulich
2019-07-16  7:42 ` [Xen-devel] [PATCH v4 08/13] x86/IRQs: correct/tighten vector check in _clear_irq_vector() Jan Beulich
2019-07-16  7:42 ` [Xen-devel] [PATCH v4 09/13] x86/IRQ: make fixup_irqs() skip unconnected internally used interrupts Jan Beulich
2019-07-16  7:43 ` [Xen-devel] [PATCH v4 10/13] x86/IRQ: drop redundant cpumask_empty() from move_masked_irq() Jan Beulich
2019-07-16  7:43 ` [Xen-devel] [PATCH v4 11/13] x86/IRQ: tighten vector checks Jan Beulich
2019-07-16  7:44 ` [Xen-devel] [PATCH v4 12/13] x86/IRQ: eliminate some on-stack cpumask_t instances Jan Beulich
2019-07-16  7:45 ` [Xen-devel] [PATCH v4 13/13] x86/IRQ: move {,_}clear_irq_vector() Jan Beulich
2019-07-19 12:36 ` [Xen-devel] [PATCH v4 00/13] x86: IRQ management adjustments Andrew Cooper
2019-07-19 13:04   ` Jan Beulich
2019-07-19 13:13     ` Andrew Cooper

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=590e074c-f3b2-91ea-acaa-5a0b0409d749@suse.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=roger.pau@citrix.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.