On Thu, 2021-01-21 at 15:42 +0000, David Woodhouse wrote: > [ 2.289283] BUG: kernel NULL pointer dereference, address: 0000000000000000 > [ 2.289283] #PF: supervisor write access in kernel mode > [ 2.289283] #PF: error_code(0x0002) - not-present page > [ 2.289283] PGD 0 P4D 0 > [ 2.289283] Oops: 0002 [#1] SMP PTI > [ 2.289283] CPU: 32 PID: 0 Comm: swapper/32 Not tainted 5.10.0+ #745 > [ 2.289283] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-1.fc33 04/01/2014 > [ 2.289283] RIP: 0010:init_x2apic_ldr+0xa0/0xb0 OK... in alloc_clustermask() for each CPU we were preallocating a cluster_mask and storing it in the global cluster_hotplug_mask. Then later for each CPU we were taking the preallocated cluster_mask and setting cluster_hotplug_mask to NULL. That doesn't parallelise well :) So... ditch the global variable, let alloc_clustermask() install the appropriate cluster_mask *directly* into the target CPU's per_cpu data before it's running. And since we have to calculate the logical APIC ID for the cluster ID, we might as well set x86_cpu_to_logical_apicid at the same time. Now all that init_x2apic_ldr() actually *does* on the target CPU is set that CPU's bit in the pre-existing cluster_mask. To reduce the number of loops over all (present or online) CPUs, I've made it set the per_cpu cluster_mask for *all* CPUs in the cluster in one pass at boot time. I think the case for later hotplug is also sane; will have to test that. But it passes that qemu boot test it was failing earlier, at least... diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index b0889c48a2ac..74bb4cae8b5b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -18,7 +18,6 @@ struct cluster_mask { static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks); -static struct cluster_mask *cluster_hotplug_mask; static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { @@ -98,54 +97,61 @@ static u32 x2apic_calc_apicid(unsigned int cpu) static void init_x2apic_ldr(void) { struct cluster_mask *cmsk = this_cpu_read(cluster_masks); - u32 cluster, apicid = apic_read(APIC_LDR); - unsigned int cpu; - this_cpu_write(x86_cpu_to_logical_apicid, apicid); + BUG_ON(!cmsk); - if (cmsk) - goto update; - - cluster = apicid >> 16; - for_each_online_cpu(cpu) { - cmsk = per_cpu(cluster_masks, cpu); - /* Matching cluster found. Link and update it. */ - if (cmsk && cmsk->clusterid == cluster) - goto update; - } - cmsk = cluster_hotplug_mask; - cmsk->clusterid = cluster; - cluster_hotplug_mask = NULL; -update: - this_cpu_write(cluster_masks, cmsk); cpumask_set_cpu(smp_processor_id(), &cmsk->mask); } -static int alloc_clustermask(unsigned int cpu, int node) +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node) { + struct cluster_mask *cmsk = NULL; + u32 apicid; + if (per_cpu(cluster_masks, cpu)) return 0; - /* - * If a hotplug spare mask exists, check whether it's on the right - * node. If not, free it and allocate a new one. + + /* For the hotplug case, don't always allocate a new one */ + for_each_online_cpu(cpu) { + apicid = apic->cpu_present_to_apicid(cpu); + if (apicid != BAD_APICID && apicid >> 4 == cluster) { + cmsk = per_cpu(cluster_masks, cpu); + if (cmsk) + break; + } + } + if (!cmsk) + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node); + if (!cmsk) + return -ENOMEM; + + cmsk->node = node; + cmsk->clusterid = cluster; + + /* + * As an optimisation during boot, set the cluster_mask for *all* + * present CPUs at once, which will include 'cpu'. */ - if (cluster_hotplug_mask) { - if (cluster_hotplug_mask->node == node) - return 0; - kfree(cluster_hotplug_mask); + if (system_state < SYSTEM_RUNNING) { + for_each_present_cpu(cpu) { + u32 apicid = apic->cpu_present_to_apicid(cpu); + if (apicid != BAD_APICID && apicid >> 4 == cluster) + per_cpu(cluster_masks, cpu) = cmsk; + } } - cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), - GFP_KERNEL, node); - if (!cluster_hotplug_mask) - return -ENOMEM; - cluster_hotplug_mask->node = node; return 0; } static int x2apic_prepare_cpu(unsigned int cpu) { - if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + u32 phys_apicid = apic->cpu_present_to_apicid(cpu); + u32 cluster = phys_apicid >> 4; + u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf)); + + per_cpu(x86_cpu_to_logical_apicid, cpu) = logical_apicid; + + if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0) return -ENOMEM; if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) return -ENOMEM;