[patch 22/52] x86/apic/x2apic: Simplify cluster management

From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>, Peter Anvin <hpa@zytor.com>,
	Marc Zyngier <marc.zyngier@arm.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Borislav Petkov <bp@alien8.de>, Chen Yu <yu.c.chen@intel.com>,
	Rui Zhang <rui.zhang@intel.com>,
	"Rafael J. Wysocki" <rjw@rjwysocki.net>,
	Len Brown <lenb@kernel.org>,
	Dan Williams <dan.j.williams@intel.com>,
	Christoph Hellwig <hch@lst.de>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Joerg Roedel <joro@8bytes.org>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>, Tony Luck <tony.luck@intel.com>,
	"K. Y. Srinivasan" <kys@microsoft.com>,
	Alok Kataria <akataria@vmware.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Arjan van de Ven <arjan@linux.intel.com>
Subject: [patch 22/52] x86/apic/x2apic: Simplify cluster management
Date: Wed, 13 Sep 2017 23:29:24 +0200	[thread overview]
Message-ID: <20170913213153.992629420@linutronix.de> (raw)
In-Reply-To: 20170913212902.530704676@linutronix.de

[-- Attachment #1: x86-apic-x2apic--Simplify-cluster-management.patch --]
[-- Type: text/plain, Size: 8306 bytes --]

The cluster management code creates a cluster mask per cpu, which requires
that on cpu on/offline all cluster masks have to be iterated and
updated. Other information about the cluster is in different per cpu
variables.

Create a data structure which holds all information about a cluster and
fill it in when the first CPU of a cluster comes online. If another CPU of
a cluster comes online it just finds the pointer to the existing cluster
structure and reuses it.

That simplifies all usage sites and gets rid of quite some pointless
iterations over the online cpus to find the cpus which belong to the
cluster.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/x2apic_cluster.c |  154 ++++++++++++++++------------------
 1 file changed, 76 insertions(+), 78 deletions(-)

--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -10,20 +10,22 @@
 #include <asm/smp.h>
 #include "x2apic.h"
 
+struct cluster_mask {
+	unsigned int	clusterid;
+	int		node;
+	struct cpumask	mask;
+};
+
 static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
-static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
 static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
+static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
+static struct cluster_mask *cluster_hotplug_mask;
 
 static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	return x2apic_enabled();
 }
 
-static inline u32 x2apic_cluster(int cpu)
-{
-	return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
-}
-
 static void x2apic_send_IPI(int cpu, int vector)
 {
 	u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
@@ -35,49 +37,34 @@ static void x2apic_send_IPI(int cpu, int
 static void
 __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 {
-	struct cpumask *cpus_in_cluster_ptr;
-	struct cpumask *ipi_mask_ptr;
-	unsigned int cpu, this_cpu;
+	unsigned int cpu, clustercpu;
+	struct cpumask *tmpmsk;
 	unsigned long flags;
 	u32 dest;
 
 	x2apic_wrmsr_fence();
-
 	local_irq_save(flags);
 
-	this_cpu = smp_processor_id();
+	tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
+	cpumask_copy(tmpmsk, mask);
+	/* If IPI should not be sent to self, clear current CPU */
+	if (apic_dest != APIC_DEST_ALLINC)
+		cpumask_clear_cpu(smp_processor_id(), tmpmsk);
+
+	/* Collapse cpus in a cluster so a single IPI per cluster is sent */
+	for_each_cpu(cpu, tmpmsk) {
+		struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
 
-	/*
-	 * We are to modify mask, so we need an own copy
-	 * and be sure it's manipulated with irq off.
-	 */
-	ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
-	cpumask_copy(ipi_mask_ptr, mask);
-
-	/*
-	 * The idea is to send one IPI per cluster.
-	 */
-	for_each_cpu(cpu, ipi_mask_ptr) {
-		unsigned long i;
-
-		cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
 		dest = 0;
-
-		/* Collect cpus in cluster. */
-		for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
-			if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
-				dest |= per_cpu(x86_cpu_to_logical_apicid, i);
-		}
+		for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
+			dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
 
 		if (!dest)
 			continue;
 
 		__x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
-		/*
-		 * Cluster sibling cpus should be discared now so
-		 * we would not send IPI them second time.
-		 */
-		cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
+		/* Remove cluster CPUs from tmpmask */
+		cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
 	}
 
 	local_irq_restore(flags);
@@ -109,91 +96,100 @@ x2apic_cpu_mask_to_apicid(const struct c
 			  unsigned int *apicid)
 {
 	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
+	struct cluster_mask *cmsk;
 	unsigned int cpu;
 	u32 dest = 0;
-	u16 cluster;
 
 	cpu = cpumask_first(mask);
 	if (cpu >= nr_cpu_ids)
 		return -EINVAL;
 
-	dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
-	cluster = x2apic_cluster(cpu);
-
+	cmsk = per_cpu(cluster_masks, cpu);
 	cpumask_clear(effmsk);
-	for_each_cpu(cpu, mask) {
-		if (cluster != x2apic_cluster(cpu))
-			continue;
+	for_each_cpu_and(cpu, &cmsk->mask, mask) {
 		dest |= per_cpu(x86_cpu_to_logical_apicid, cpu);
 		cpumask_set_cpu(cpu, effmsk);
 	}
-
 	*apicid = dest;
 	return 0;
 }
 
 static void init_x2apic_ldr(void)
 {
-	unsigned int this_cpu = smp_processor_id();
+	struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
+	u32 cluster, apicid = apic_read(APIC_LDR);
 	unsigned int cpu;
 
-	per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
+	this_cpu_write(x86_cpu_to_logical_apicid, apicid);
 
-	cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+	if (cmsk)
+		goto update;
+
+	cluster = apicid >> 16;
 	for_each_online_cpu(cpu) {
-		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
-			continue;
-		cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
-		cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+		cmsk = per_cpu(cluster_masks, cpu);
+		/* Matching cluster found. Link and update it. */
+		if (cmsk && cmsk->clusterid == cluster)
+			goto update;
 	}
+	cmsk = cluster_hotplug_mask;
+	cluster_hotplug_mask = NULL;
+update:
+	this_cpu_write(cluster_masks, cmsk);
+	cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
 }
 
-/*
- * At CPU state changes, update the x2apic cluster sibling info.
- */
-static int x2apic_prepare_cpu(unsigned int cpu)
+static int alloc_clustermask(unsigned int cpu, int node)
 {
-	if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
-		return -ENOMEM;
+	if (per_cpu(cluster_masks, cpu))
+		return 0;
+	/*
+	 * If a hotplug spare mask exists, check whether it's on the right
+	 * node. If not, free it and allocate a new one.
+	 */
+	if (cluster_hotplug_mask) {
+		if (cluster_hotplug_mask->node == node)
+			return 0;
+		kfree(cluster_hotplug_mask);
+	}
 
-	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
-		free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
+	cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
+					    GFP_KERNEL, node);
+	if (!cluster_hotplug_mask)
 		return -ENOMEM;
-	}
+	cluster_hotplug_mask->node = node;
+	return 0;
+}
 
+static int x2apic_prepare_cpu(unsigned int cpu)
+{
+	if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
+		return -ENOMEM;
+	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
+		return -ENOMEM;
 	return 0;
 }
 
-static int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int dead_cpu)
 {
-	int cpu;
+	struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
 
-	for_each_online_cpu(cpu) {
-		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
-			continue;
-		cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
-		cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
-	}
-	free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
-	free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+	cpumask_clear_cpu(smp_processor_id(), &cmsk->mask);
+	free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
 	return 0;
 }
 
 static int x2apic_cluster_probe(void)
 {
-	int cpu = smp_processor_id();
-	int ret;
-
 	if (!x2apic_mode)
 		return 0;
 
-	ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
-				x2apic_prepare_cpu, x2apic_dead_cpu);
-	if (ret < 0) {
+	if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
+			      x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
 		pr_err("Failed to register X2APIC_PREPARE\n");
 		return 0;
 	}
-	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+	init_x2apic_ldr();
 	return 1;
 }
 
@@ -208,6 +204,8 @@ static const struct cpumask *x2apic_clus
 static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
 					     const struct cpumask *mask)
 {
+	struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
+
 	/*
 	 * To minimize vector pressure, default case of boot, device bringup
 	 * etc will use a single cpu for the interrupt destination.
@@ -220,7 +218,7 @@ static void cluster_vector_allocation_do
 	if (mask == x2apic_cluster_target_cpus())
 		cpumask_copy(retmask, cpumask_of(cpu));
 	else
-		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
+		cpumask_and(retmask, mask, &cmsk->mask);
 }
 
 static struct apic apic_x2apic_cluster __ro_after_init = {