All of lore.kernel.org
 help / color / mirror / Atom feed
From: Roger Pau Monne <roger.pau@citrix.com>
To: <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>, Wei Liu <wl@xen.org>,
	Jan Beulich <jbeulich@suse.com>,
	Roger Pau Monne <roger.pau@citrix.com>
Subject: [Xen-devel] [PATCH] x86/flush: use APIC ALLBUT destination shorthand when possible
Date: Tue, 24 Dec 2019 13:44:53 +0100	[thread overview]
Message-ID: <20191224124453.47183-1-roger.pau@citrix.com> (raw)

If the flush mask matches the mask of online CPUs use the APIC ALLBUT
destination shorthand in order to send an IPI to all CPUs on the
system except the current one. This can only be safely used when no
CPU hotplug or unplug operations are taking place, no offline CPUs or
those have been onlined and parked and finally when all CPUs in the
system have been accounted for (ie: the number of CPUs doesn't exceed
NR_CPUS and APIC IDs are below MAX_APICS).

This is specially beneficial when using the PV shim, since using the
shorthand avoids performing an APIC register write (or multiple ones
if using xAPIC mode) for each destination in the flush mask.

The lock time on a 32 vCPU guest using the shim without the shorthand
is:

Global lock flush_lock: addr=ffff82d0804b21c0, lockval=f602f602, not locked
  lock:228455938(79406065573135), block:205908580(556416605761539)

Average lock time: 347577ns

While the same guest using the shorthand:

Global lock flush_lock: addr=ffff82d0804b41c0, lockval=d9c4d9bc, cpu=12
  lock:1890775(416719148054), block:1663958(2500161282949)

Average lock time: 220395ns

Approximately a 1/3 improvement in the lock time.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/acpi/boot.c  |  1 +
 xen/arch/x86/mpparse.c    |  5 +++++
 xen/arch/x86/smp.c        | 41 ++++++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/smp.h |  2 ++
 4 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
index 15542a9bdf..88e1a89ff0 100644
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -103,6 +103,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 			       processor->lapic_flags & ACPI_MADT_ENABLED
 			       ? KERN_WARNING "WARNING: " : KERN_INFO,
 			       processor->local_apic_id, processor->uid);
+		cpu_overflow = true;
 		/*
 		 * Must not return an error here, to prevent
 		 * acpi_table_parse_entries() from terminating early.
diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
index f057d9162f..8d7739fbf4 100644
--- a/xen/arch/x86/mpparse.c
+++ b/xen/arch/x86/mpparse.c
@@ -66,6 +66,9 @@ static unsigned int __initdata disabled_cpus;
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
 
+/* Record whether CPUs haven't been added due to overflows. */
+bool __read_mostly cpu_overflow;
+
 void __init set_nr_cpu_ids(unsigned int max_cpus)
 {
 	unsigned int tot_cpus = num_processors + disabled_cpus;
@@ -160,6 +163,7 @@ static int MP_processor_info_x(struct mpc_config_processor *m,
 		printk_once(XENLOG_WARNING
 			    "WARNING: NR_CPUS limit of %u reached - ignoring further processors\n",
 			    nr_cpu_ids);
+		cpu_overflow = true;
 		return -ENOSPC;
 	}
 
@@ -167,6 +171,7 @@ static int MP_processor_info_x(struct mpc_config_processor *m,
 	    && genapic.name == apic_default.name) {
 		printk_once(XENLOG_WARNING
 			    "WARNING: CPUs limit of 8 reached - ignoring futher processors\n");
+		cpu_overflow = true;
 		return -ENOSPC;
 	}
 
diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
index 6fb39a0a24..427c33db9d 100644
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -8,6 +8,7 @@
  *	later.
  */
 
+#include <xen/cpu.h>
 #include <xen/irq.h>
 #include <xen/sched.h>
 #include <xen/delay.h>
@@ -123,6 +124,11 @@ void send_IPI_self_legacy(uint8_t vector)
     __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 }
 
+static void send_IPI_allbutself(unsigned int vector)
+{
+    __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_PHYSICAL);
+}
+
 void send_IPI_mask_flat(const cpumask_t *cpumask, int vector)
 {
     unsigned long mask = cpumask_bits(cpumask)[0];
@@ -227,14 +233,47 @@ void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
     if ( (flags & ~FLUSH_ORDER_MASK) &&
          !cpumask_subset(mask, cpumask_of(cpu)) )
     {
+        bool cpus_locked = false;
+
         spin_lock(&flush_lock);
         cpumask_and(&flush_cpumask, mask, &cpu_online_map);
         cpumask_clear_cpu(cpu, &flush_cpumask);
         flush_va      = va;
         flush_flags   = flags;
-        send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR);
+
+        /*
+         * Prevent any CPU hot{un}plug while sending the IPIs if we are to use
+         * a shorthand, also refuse to use a shorthand if not all CPUs are
+         * online or have been parked.
+         */
+        if ( system_state > SYS_STATE_smp_boot && !cpu_overflow &&
+             (cpus_locked = get_cpu_maps()) &&
+             (park_offline_cpus ||
+              cpumask_equal(&cpu_online_map, &cpu_present_map)) )
+        {
+            cpumask_copy(this_cpu(scratch_cpumask), &cpu_online_map);
+            cpumask_clear_cpu(cpu, this_cpu(scratch_cpumask));
+        }
+        else
+        {
+            if ( cpus_locked )
+            {
+                put_cpu_maps();
+                cpus_locked = false;
+            }
+            cpumask_clear(this_cpu(scratch_cpumask));
+        }
+
+        if ( cpumask_equal(&flush_cpumask, this_cpu(scratch_cpumask)) )
+            send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
+        else
+            send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR);
+
         while ( !cpumask_empty(&flush_cpumask) )
             cpu_relax();
+
+        if ( cpus_locked )
+            put_cpu_maps();
         spin_unlock(&flush_lock);
     }
 }
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index dbeed2fd41..3df4185744 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -84,6 +84,8 @@ extern cpumask_t **socket_cpumask;
 #define get_cpu_current(cpu) \
     (get_cpu_info_from_stack((unsigned long)stack_base[cpu])->current_vcpu)
 
+extern bool cpu_overflow;
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
-- 
2.24.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

             reply	other threads:[~2019-12-24 12:45 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-24 12:44 Roger Pau Monne [this message]
2019-12-27 15:03 ` [Xen-devel] [PATCH] x86/flush: use APIC ALLBUT destination shorthand when possible Andrew Cooper
2019-12-31 12:13   ` Roger Pau Monné
2020-01-03 12:08 ` Jan Beulich
2020-01-03 12:24   ` Andrew Cooper
2020-01-03 12:34   ` Roger Pau Monné
2020-01-03 12:55     ` Jan Beulich
2020-01-08 13:30       ` Roger Pau Monné
2020-01-08 13:54         ` Jan Beulich
2020-01-08 18:14           ` Roger Pau Monné
2020-01-09  9:43             ` Jan Beulich
2020-01-09 11:24               ` Roger Pau Monné
2020-01-09 12:20                 ` Roger Pau Monné

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191224124453.47183-1-roger.pau@citrix.com \
    --to=roger.pau@citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.