linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zwane Mwaikambo <zwane@arm.linux.org.uk>
To: Nick Piggin <piggin@cyberone.com.au>
Cc: Ingo Molnar <mingo@redhat.com>, Anton Blanchard <anton@samba.org>,
	Linus Torvalds <torvalds@osdl.org>, Andrew Morton <akpm@osdl.org>,
	Rusty Russell <rusty@rustcorp.com.au>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH][RFC] make cpu_sibling_map a cpumask_t
Date: Mon, 8 Dec 2003 15:51:12 -0500 (EST)	[thread overview]
Message-ID: <Pine.LNX.4.58.0312080109010.1758@montezuma.fsmlabs.com> (raw)
In-Reply-To: <3FD3FD52.7020001@cyberone.com.au>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1078 bytes --]

On Mon, 8 Dec 2003, Nick Piggin wrote:

> It turns the cpu_sibling_map array from an array of cpus to an array
> of cpumasks. This allows handling of more than 2 siblings, not that
> I know of any immediate need.
>
> I think it generalises cpu_sibling_map sufficiently that it can become
> generic code. This would allow architecture specific code to build the
> sibling map, and then Ingo's or my HT implementations to build their
> scheduling descriptions in generic code.
>
> I'm not aware of any reason why the kernel should not become generally
> SMT aware. It is sufficiently different to SMP that it is worth
> specialising it, although I am only aware of P4 and POWER5 implementations.

Generally i agree, it's fairly unintrusive and appears to clean up some
things. Perhaps Andrew will take it a bit later after release.

> P.S.
> I have an alternative to Ingo's HT scheduler which basically does
> the same thing. It is showing a 20% elapsed time improvement with a
> make -j3 on a 2xP4 Xeon (4 logical CPUs).

-j3 is an odd number, what does -j4, -j8, -j16 look like?

[-- Attachment #2: Type: TEXT/PLAIN, Size: 7229 bytes --]

 linux-2.6-npiggin/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c |    9 --
 linux-2.6-npiggin/arch/i386/kernel/io_apic.c                 |   19 ++--
 linux-2.6-npiggin/arch/i386/kernel/smpboot.c                 |   46 +++++------
 linux-2.6-npiggin/arch/i386/oprofile/op_model_p4.c           |    7 -
 linux-2.6-npiggin/include/asm-i386/smp.h                     |    2 
 5 files changed, 37 insertions(+), 46 deletions(-)

diff -puN include/asm-i386/smp.h~sibling-map-to-cpumask include/asm-i386/smp.h
--- linux-2.6/include/asm-i386/smp.h~sibling-map-to-cpumask	2003-12-08 14:42:28.000000000 +1100
+++ linux-2.6-npiggin/include/asm-i386/smp.h	2003-12-08 14:42:28.000000000 +1100
@@ -34,7 +34,7 @@
 extern void smp_alloc_memory(void);
 extern int pic_mode;
 extern int smp_num_siblings;
-extern int cpu_sibling_map[];
+extern cpumask_t cpu_sibling_map[];
 
 extern void smp_flush_tlb(void);
 extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
diff -puN arch/i386/kernel/smpboot.c~sibling-map-to-cpumask arch/i386/kernel/smpboot.c
--- linux-2.6/arch/i386/kernel/smpboot.c~sibling-map-to-cpumask	2003-12-08 14:42:28.000000000 +1100
+++ linux-2.6-npiggin/arch/i386/kernel/smpboot.c	2003-12-08 15:07:48.000000000 +1100
@@ -933,7 +933,7 @@ static int boot_cpu_logical_apicid;
 /* Where the IO area was mapped on multiquad, always 0 otherwise */
 void *xquad_portio;
 
-int cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 
 static void __init smp_boot_cpus(unsigned int max_cpus)
 {
@@ -1079,32 +1079,28 @@ static void __init smp_boot_cpus(unsigne
 	Dprintk("Boot done.\n");
 
 	/*
-	 * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so
-	 * that we can tell the sibling CPU efficiently.
+	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
+	 * efficiently.
 	 */
-	if (cpu_has_ht && smp_num_siblings > 1) {
-		for (cpu = 0; cpu < NR_CPUS; cpu++)
-			cpu_sibling_map[cpu] = NO_PROC_ID;
-
-		for (cpu = 0; cpu < NR_CPUS; cpu++) {
-			int 	i;
-			if (!cpu_isset(cpu, cpu_callout_map))
-				continue;
+	for (cpu = 0; cpu < NR_CPUS; cpu++)
+		cpu_sibling_map[cpu] = CPU_MASK_NONE;
 
-			for (i = 0; i < NR_CPUS; i++) {
-				if (i == cpu || !cpu_isset(i, cpu_callout_map))
-					continue;
-				if (phys_proc_id[cpu] == phys_proc_id[i]) {
-					cpu_sibling_map[cpu] = i;
-					printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]);
-					break;
-				}
-			}
-			if (cpu_sibling_map[cpu] == NO_PROC_ID) {
-				smp_num_siblings = 1;
-				printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu);
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		int siblings = 0;
+		int 	i;
+		if (!cpu_isset(cpu, cpu_callout_map))
+			continue;
+
+		for (i = 0; i < NR_CPUS; i++) {
+			if (!cpu_isset(i, cpu_callout_map))
+				continue;
+			if (phys_proc_id[cpu] == phys_proc_id[i]) {
+				siblings++;
+				cpu_set(i, cpu_sibling_map[cpu]);
 			}
 		}
+		if (siblings != smp_num_siblings)
+			printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
 	}
 
 	smpboot_setup_io_apic();
@@ -1143,8 +1139,8 @@ __init void arch_init_sched_domains(void
 
 		*cpu_domain = SD_CPU_INIT;
 		cpu_domain->span = CPU_MASK_NONE;
-		cpu_set(i, cpu_domain->span);
-		cpu_set(cpu_sibling_map[i], cpu_domain->span);
+		cpu_domain->span = cpu_sibling_map[i];
+		WARN_ON(!cpu_isset(i, cpu_sibling_map[i]));
 		cpu_domain->flags |= SD_FLAG_WAKE | SD_FLAG_IDLE;
 		cpu_domain->cache_hot_time = 0;
 		cpu_domain->cache_nice_tries = 0;
diff -puN arch/i386/kernel/io_apic.c~sibling-map-to-cpumask arch/i386/kernel/io_apic.c
--- linux-2.6/arch/i386/kernel/io_apic.c~sibling-map-to-cpumask	2003-12-08 14:42:28.000000000 +1100
+++ linux-2.6-npiggin/arch/i386/kernel/io_apic.c	2003-12-08 14:42:28.000000000 +1100
@@ -309,8 +309,7 @@ struct irq_cpu_info {
 
 #define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
 
-#define CPU_TO_PACKAGEINDEX(i) \
-		((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i)
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
 
 #define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
 #define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
@@ -393,6 +392,7 @@ static void do_irq_balance(void)
 	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
 	unsigned long move_this_load = 0;
 	int max_loaded = 0, min_loaded = 0;
+	int load;
 	unsigned long useful_load_threshold = balanced_irq_interval + 10;
 	int selected_irq;
 	int tmp_loaded, first_attempt = 1;
@@ -444,7 +444,7 @@ static void do_irq_balance(void)
 	for (i = 0; i < NR_CPUS; i++) {
 		if (!cpu_online(i))
 			continue;
-		if (physical_balance && i > cpu_sibling_map[i])
+		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 		if (min_cpu_irq > CPU_IRQ(i)) {
 			min_cpu_irq = CPU_IRQ(i);
@@ -463,7 +463,7 @@ tryanothercpu:
 	for (i = 0; i < NR_CPUS; i++) {
 		if (!cpu_online(i))
 			continue;
-		if (physical_balance && i > cpu_sibling_map[i])
+		if (i != CPU_TO_PACKAGEINDEX(i))
 			continue;
 		if (max_cpu_irq <= CPU_IRQ(i)) 
 			continue;
@@ -543,9 +543,14 @@ tryanotherirq:
 	 * We seek the least loaded sibling by making the comparison
 	 * (A+B)/2 vs B
 	 */
-	if (physical_balance && (CPU_IRQ(min_loaded) >> 1) >
-					CPU_IRQ(cpu_sibling_map[min_loaded]))
-		min_loaded = cpu_sibling_map[min_loaded];
+	load = CPU_IRQ(min_loaded) >> 1;
+	for_each_cpu(j, cpu_sibling_map[min_loaded]) {
+		if (load > CPU_IRQ(j)) {
+			/* This won't change cpu_sibling_map[min_loaded] */
+			load = CPU_IRQ(j);
+			min_loaded = j;
+		}
+	}
 
 	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
 	target_cpu_mask = cpumask_of_cpu(min_loaded);
diff -puN arch/i386/kernel/cpu/cpufreq/p4-clockmod.c~sibling-map-to-cpumask arch/i386/kernel/cpu/cpufreq/p4-clockmod.c
--- linux-2.6/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c~sibling-map-to-cpumask	2003-12-08 14:42:28.000000000 +1100
+++ linux-2.6-npiggin/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c	2003-12-08 14:42:28.000000000 +1100
@@ -67,14 +67,7 @@ static int cpufreq_p4_setdc(unsigned int
 	cpus_allowed = current->cpus_allowed;
 
 	/* only run on CPU to be set, or on its sibling */
-       affected_cpu_map = cpumask_of_cpu(cpu);
-#ifdef CONFIG_X86_HT
-	hyperthreading = ((cpu_has_ht) && (smp_num_siblings == 2));
-	if (hyperthreading) {
-		sibling = cpu_sibling_map[cpu];
-                cpu_set(sibling, affected_cpu_map);
-	}
-#endif
+	affected_cpu_map = cpu_sibling_map[cpu];
 	set_cpus_allowed(current, affected_cpu_map);
         BUG_ON(!cpu_isset(smp_processor_id(), affected_cpu_map));
 
diff -puN arch/i386/oprofile/op_model_p4.c~sibling-map-to-cpumask arch/i386/oprofile/op_model_p4.c
--- linux-2.6/arch/i386/oprofile/op_model_p4.c~sibling-map-to-cpumask	2003-12-08 14:42:28.000000000 +1100
+++ linux-2.6-npiggin/arch/i386/oprofile/op_model_p4.c	2003-12-08 14:42:28.000000000 +1100
@@ -382,11 +382,8 @@ static struct p4_event_binding p4_events
 static unsigned int get_stagger(void)
 {
 #ifdef CONFIG_SMP
-	int cpu;
-	if (smp_num_siblings > 1) {
-		cpu = smp_processor_id();
-		return (cpu_sibling_map[cpu] > cpu) ? 0 : 1;
-	}
+	int cpu = smp_processor_id();
+	return (cpu != first_cpu(cpu_sibling_map[cpu]));
 #endif	
 	return 0;
 }

_

  parent reply	other threads:[~2003-12-08 20:52 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-12-08  4:25 [PATCH][RFC] make cpu_sibling_map a cpumask_t Nick Piggin
2003-12-08 15:59 ` Anton Blanchard
2003-12-08 23:08   ` Nick Piggin
2003-12-09  0:14     ` Anton Blanchard
2003-12-11  4:25       ` [CFT][RFC] HT scheduler Nick Piggin
2003-12-11  7:24         ` Nick Piggin
2003-12-11  8:57           ` Nick Piggin
2003-12-11 11:52             ` William Lee Irwin III
2003-12-11 13:09               ` Nick Piggin
2003-12-11 13:23                 ` William Lee Irwin III
2003-12-11 13:30                   ` Nick Piggin
2003-12-11 13:32                     ` William Lee Irwin III
2003-12-11 15:30                       ` Nick Piggin
2003-12-11 15:38                         ` William Lee Irwin III
2003-12-11 15:51                           ` Nick Piggin
2003-12-11 15:56                             ` William Lee Irwin III
2003-12-11 16:37                               ` Nick Piggin
2003-12-11 16:40                                 ` William Lee Irwin III
2003-12-12  1:52                         ` [PATCH] improve rwsem scalability (was Re: [CFT][RFC] HT scheduler) Nick Piggin
2003-12-12  2:02                           ` Nick Piggin
2003-12-12  9:41                           ` Ingo Molnar
2003-12-13  0:07                             ` Nick Piggin
2003-12-14  0:44                               ` Nick Piggin
2003-12-17  5:27                                 ` Nick Piggin
2003-12-19 11:52                                   ` Nick Piggin
2003-12-19 15:06                                     ` Martin J. Bligh
2003-12-20  0:08                                       ` Nick Piggin
2003-12-12  0:58             ` [CFT][RFC] HT scheduler Rusty Russell
2003-12-11 10:01           ` Rhino
2003-12-11  8:14             ` Nick Piggin
2003-12-11 16:49               ` Rhino
2003-12-11 15:16                 ` Nick Piggin
2003-12-11 11:40             ` William Lee Irwin III
2003-12-11 17:05               ` Rhino
2003-12-11 15:17                 ` William Lee Irwin III
2003-12-11 16:28         ` Kevin P. Fleming
2003-12-11 16:41           ` Nick Piggin
2003-12-12  2:24         ` Rusty Russell
2003-12-12  7:00           ` Nick Piggin
2003-12-12  7:23             ` Rusty Russell
2003-12-13  6:43               ` Nick Piggin
2003-12-14  1:35                 ` bill davidsen
2003-12-14  2:18                   ` Nick Piggin
2003-12-14  4:32                     ` Jamie Lokier
2003-12-14  9:40                       ` Nick Piggin
2003-12-14 10:46                         ` Arjan van de Ven
2003-12-16 17:46                         ` Bill Davidsen
2003-12-16 18:22                       ` Linus Torvalds
2003-12-17  0:24                         ` Davide Libenzi
2003-12-17  0:41                           ` Linus Torvalds
2003-12-17  0:54                             ` Davide Libenzi
2003-12-16 17:34                     ` Bill Davidsen
2003-12-15  5:53                 ` Rusty Russell
2003-12-15 23:08                   ` Nick Piggin
2003-12-19  4:57                     ` Nick Piggin
2003-12-19  5:13                       ` Nick Piggin
2003-12-20  2:43                       ` Rusty Russell
2003-12-21  2:56                         ` Nick Piggin
2004-01-03 18:57                   ` Bill Davidsen
2003-12-15 20:21                 ` Zwane Mwaikambo
2003-12-15 23:20                   ` Nick Piggin
2003-12-16  0:11                     ` Zwane Mwaikambo
2003-12-12  8:59             ` Nick Piggin
2003-12-12 15:14               ` Martin J. Bligh
2003-12-08 19:44 ` [PATCH][RFC] make cpu_sibling_map a cpumask_t James Cleverdon
2003-12-08 20:38 ` Ingo Molnar
2003-12-08 20:51 ` Zwane Mwaikambo [this message]
2003-12-08 20:55   ` Ingo Molnar
2003-12-08 23:17     ` Nick Piggin
2003-12-08 23:36       ` Ingo Molnar
2003-12-08 23:58         ` Nick Piggin
2003-12-08 23:46 ` Rusty Russell
2003-12-09 13:36   ` Nick Piggin
2003-12-11 21:41     ` bill davidsen
2003-12-09  4:54 Nakajima, Jun
2003-12-09 19:42 ` William Lee Irwin III

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.58.0312080109010.1758@montezuma.fsmlabs.com \
    --to=zwane@arm.linux.org.uk \
    --cc=akpm@osdl.org \
    --cc=anton@samba.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=piggin@cyberone.com.au \
    --cc=rusty@rustcorp.com.au \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).