linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] sched: asymmetrical packing for POWER7 SMT4
@ 2010-06-08  4:57 Michael Neuling
  2010-06-08  4:57 ` [PATCH 2/3] sched: add asymmetric group packing option for sibling domain Michael Neuling
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Michael Neuling @ 2010-06-08  4:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Peter Zijlstra; +Cc: linuxppc-dev, linux-kernel

This patch series implements asymmetric SMT packing which ensures
consistently good performance on POWER7.  Without this series, tasks
will vary in performance by around +/-30% on POWER7

This new version is based on help from Vatsa and Vaidy in an attempt
to answer concerns that Peter Zijlstra had about the original series.

The 1st patch is a fix for SMT4 in general.
The 2nd adds actual the asymmetrical packing infrastructure.  
The 3rd adds the powerpc specific hooks for POWER7.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] sched: fix capacity calculations for SMT4
  2010-06-08  4:57 [PATCH 0/3] sched: asymmetrical packing for POWER7 SMT4 Michael Neuling
  2010-06-08  4:57 ` [PATCH 2/3] sched: add asymmetric group packing option for sibling domain Michael Neuling
@ 2010-06-08  4:57 ` Michael Neuling
  2010-06-08  4:57 ` [PATCH 3/3] powerpc: enabled asymmetric SMT scheduling on POWER7 Michael Neuling
  2 siblings, 0 replies; 5+ messages in thread
From: Michael Neuling @ 2010-06-08  4:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Peter Zijlstra; +Cc: linuxppc-dev, linux-kernel

From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>

Handle cpu capacity being reported as 0 on cores with more number of
hardware threads. For example on a Power7 core with 4 hardware
threads, core power is 1177 and thus power of each hardware thread is
1177/4 = 294. This low power can lead to capacity for each hardware
thread being calculated as 0, which leads to tasks bouncing within the
core madly! 

Fix this by reporting capacity for hardware threads as 1, provided
their power is not scaled down significantly because of frequency
scaling or real-time tasks usage of cpu.

Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Michael Neuling <mikey@neuling.org>

---

 include/linux/sched.h |    2 -
 kernel/sched_fair.c   |   56 +++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 10 deletions(-)

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -860,7 +860,7 @@ struct sched_group {
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
 	 * single CPU.
 	 */
-	unsigned int cpu_power;
+	unsigned int cpu_power, cpu_power_orig;
 
 	/*
 	 * The CPUs this group covers.
Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -2285,13 +2285,6 @@ static void update_cpu_power(struct sche
 	unsigned long power = SCHED_LOAD_SCALE;
 	struct sched_group *sdg = sd->groups;
 
-	if (sched_feat(ARCH_POWER))
-		power *= arch_scale_freq_power(sd, cpu);
-	else
-		power *= default_scale_freq_power(sd, cpu);
-
-	power >>= SCHED_LOAD_SHIFT;
-
 	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
 		if (sched_feat(ARCH_POWER))
 			power *= arch_scale_smt_power(sd, cpu);
@@ -2301,6 +2294,15 @@ static void update_cpu_power(struct sche
 		power >>= SCHED_LOAD_SHIFT;
 	}
 
+	sdg->cpu_power_orig = power;
+
+	if (sched_feat(ARCH_POWER))
+		power *= arch_scale_freq_power(sd, cpu);
+	else
+		power *= default_scale_freq_power(sd, cpu);
+
+	power >>= SCHED_LOAD_SHIFT;
+
 	power *= scale_rt_power(cpu);
 	power >>= SCHED_LOAD_SHIFT;
 
@@ -2333,6 +2335,36 @@ static void update_group_power(struct sc
 	sdg->cpu_power = power;
 }
 
+/*
+ * What's the influence of freq. scaling or real-time tasks on a cpu's power?
+ *
+ * Return 1 (influence exists) if power was scaled by more than 10% due to
+ * either factor, else return 0 (i.e no influence exists).
+ *
+ * This is specifically needed to handle capacity being reported as 0 for
+ * hardware threads within a Power7-like core (that have 4 or more hardware
+ * threads/core).
+ */
+static inline int
+rt_freq_influence(struct sched_group *group, struct sched_domain *sd)
+{
+	/* We need this test only at SMT domain level */
+	if (sd->child)
+		return 1;
+
+	/*
+	 * Check to see if the final cpu power was reduced by more than 10% due
+	 * to frequency scaling or real-time task's cpu usage. Note that
+	 * cpu_power could be different from cpu_power_orig even in the absence
+	 * of either factors - this is due to rounding issues in
+	 * update_cpu_power()
+	 */
+	if (group->cpu_power * 100 < group->cpu_power_orig * 90)
+		return 1;
+
+	return 0;
+}
+
 /**
  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
  * @sd: The sched_domain whose statistics are to be updated.
@@ -2426,6 +2458,8 @@ static inline void update_sg_lb_stats(st
 
 	sgs->group_capacity =
 		DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+	if (!sgs->group_capacity && !rt_freq_influence(group, sd))
+		sgs->group_capacity = 1;
 }
 
 /**
@@ -2725,7 +2759,8 @@ ret:
  */
 static struct rq *
 find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-		   unsigned long imbalance, const struct cpumask *cpus)
+		   unsigned long imbalance, const struct cpumask *cpus,
+		   struct sched_domain *sd)
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
@@ -2736,6 +2771,9 @@ find_busiest_queue(struct sched_group *g
 		unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
 		unsigned long wl;
 
+		if (!capacity && !rt_freq_influence(group, sd))
+			capacity = 1;
+
 		if (!cpumask_test_cpu(i, cpus))
 			continue;
 
@@ -2852,7 +2890,7 @@ redo:
 		goto out_balanced;
 	}
 
-	busiest = find_busiest_queue(group, idle, imbalance, cpus);
+	busiest = find_busiest_queue(group, idle, imbalance, cpus, sd);
 	if (!busiest) {
 		schedstat_inc(sd, lb_nobusyq[idle]);
 		goto out_balanced;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/3] sched: add asymmetric group packing option for sibling domain
  2010-06-08  4:57 [PATCH 0/3] sched: asymmetrical packing for POWER7 SMT4 Michael Neuling
@ 2010-06-08  4:57 ` Michael Neuling
  2010-06-08  4:57 ` [PATCH 1/3] sched: fix capacity calculations for SMT4 Michael Neuling
  2010-06-08  4:57 ` [PATCH 3/3] powerpc: enabled asymmetric SMT scheduling on POWER7 Michael Neuling
  2 siblings, 0 replies; 5+ messages in thread
From: Michael Neuling @ 2010-06-08  4:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Peter Zijlstra; +Cc: linuxppc-dev, linux-kernel

Check to see if the group is packed in a sched doman.

This is primarily intended to used at the sibling level.  Some cores
like POWER7 prefer to use lower numbered SMT threads.  In the case of
POWER7, it can move to lower SMT modes only when higher threads are
idle.  When in lower SMT modes, the threads will perform better since
they share less core resources.  Hence when we have idle threads, we
want them to be the higher ones.

This adds a hook into f_b_g() called check_asym_packing() to check the
packing.  This packing function is run on idle threads.  It checks to
see if the busiest CPU in this domain (core in the P7 case) has a
higher CPU number than what where the packing function is being run
on.  If it is, calculate the imbalance and return the higher busier
thread as the busiest group to f_b_g().  Here we are assuming a lower
CPU number will be equivalent to a lower SMT thread number.

It also creates a new SD_ASYM_PACKING flag to enable this feature at
any scheduler domain level.

It also creates an arch hook to enable this feature at the sibling
level.  The default function doesn't enable this feature.

Based heavily on patch from Peter Zijlstra.  
Fixes from Srivatsa Vaddagiri.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>

---

 include/linux/sched.h    |    4 +
 include/linux/topology.h |    1 
 kernel/sched_fair.c      |  104 +++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 104 insertions(+), 5 deletions(-)

Index: linux-2.6-ozlabs/include/linux/sched.h
===================================================================
--- linux-2.6-ozlabs.orig/include/linux/sched.h
+++ linux-2.6-ozlabs/include/linux/sched.h
@@ -804,7 +804,7 @@ enum cpu_idle_type {
 #define SD_POWERSAVINGS_BALANCE	0x0100	/* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
-
+#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
 
 enum powersavings_balance_level {
@@ -839,6 +839,8 @@ static inline int sd_balance_for_package
 	return SD_PREFER_SIBLING;
 }
 
+extern int __weak arch_sd_sibiling_asym_packing(void);
+
 /*
  * Optimise SD flags for power savings:
  * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
Index: linux-2.6-ozlabs/include/linux/topology.h
===================================================================
--- linux-2.6-ozlabs.orig/include/linux/topology.h
+++ linux-2.6-ozlabs/include/linux/topology.h
@@ -103,6 +103,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
+				| arch_sd_sibiling_asym_packing()	\
 				,					\
 	.last_balance		= jiffies,				\
 	.balance_interval	= 1,					\
Index: linux-2.6-ozlabs/kernel/sched_fair.c
===================================================================
--- linux-2.6-ozlabs.orig/kernel/sched_fair.c
+++ linux-2.6-ozlabs/kernel/sched_fair.c
@@ -2463,6 +2463,41 @@ static inline void update_sg_lb_stats(st
 }
 
 /**
+ * update_sd_pick_busiest - return 1 on busiest group
+ * @sd: sched_domain whose statistics are to be checked
+ * @sds: sched_domain statistics
+ * @sg: sched_group candidate to be checked for being the busiest
+ * @sds: sched_group statistics
+ *
+ * This returns 1 for the busiest group. If asymmetric packing is
+ * enabled and we already have a busiest, but this candidate group has
+ * a higher cpu number than the current busiest, pick this sg.
+ */
+static int update_sd_pick_busiest(struct sched_domain *sd,
+				  struct sd_lb_stats *sds,
+				  struct sched_group *sg,
+				  struct sg_lb_stats *sgs,
+				  int this_cpu)
+{
+	if (sgs->sum_nr_running > sgs->group_capacity)
+		return 1;
+
+	if (sgs->group_imb)
+		return 1;
+
+	if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
+	    this_cpu < group_first_cpu(sg)) {
+		if (!sds->busiest)
+			return 1;
+
+		if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
+			return 1;
+	}
+
+	return 0;
+}
+
+/**
  * update_sd_lb_stats - Update sched_group's statistics for load balancing.
  * @sd: sched_domain whose statistics are to be updated.
  * @this_cpu: Cpu for which load balance is currently performed.
@@ -2517,8 +2552,8 @@ static inline void update_sd_lb_stats(st
 			sds->this_nr_running = sgs.sum_nr_running;
 			sds->this_load_per_task = sgs.sum_weighted_load;
 		} else if (sgs.avg_load > sds->max_load &&
-			   (sgs.sum_nr_running > sgs.group_capacity ||
-				sgs.group_imb)) {
+			   update_sd_pick_busiest(sd, sds, group, &sgs,
+						  this_cpu)) {
 			sds->max_load = sgs.avg_load;
 			sds->busiest = group;
 			sds->busiest_nr_running = sgs.sum_nr_running;
@@ -2532,6 +2567,57 @@ static inline void update_sd_lb_stats(st
 	} while (group != sd->groups);
 }
 
+int __weak arch_sd_sibiling_asym_packing(void)
+{
+       return 0*SD_ASYM_PACKING;
+}
+
+/**
+ * check_asym_packing - Check to see if the group is packed into the
+ *			sched doman.
+ *
+ * This is primarily intended to used at the sibling level.  Some
+ * cores like POWER7 prefer to use lower numbered SMT threads.  In the
+ * case of POWER7, it can move to lower SMT modes only when higher
+ * threads are idle.  When in lower SMT modes, the threads will
+ * perform better since they share less core resources.  Hence when we
+ * have idle threads, we want them to be the higher ones.
+ *
+ * This packing function is run on idle threads.  It checks to see if
+ * the busiest CPU in this domain (core in the P7 case) has a higher
+ * CPU number than the packing function is being run on.  Here we are
+ * assuming lower CPU number will be equivalent to lower a SMT thread
+ * number.
+ *
+ * @sd: The sched_domain whose packing is to be checked.
+ * @sds: Statistics of the sched_domain which is to be packed
+ * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
+ * @imbalance: returns amount of imbalanced due to packing.
+ *
+ * Returns 1 when packing is required and a task should be moved to
+ * this CPU.  The amount of the imbalance is returned in *imbalance.
+ */
+static int check_asym_packing(struct sched_domain *sd,
+			      struct sd_lb_stats *sds,
+			      int this_cpu, unsigned long *imbalance)
+{
+	int busiest_cpu;
+
+	if (!(sd->flags & SD_ASYM_PACKING))
+		return 0;
+
+	if (!sds->busiest)
+		return 0;
+
+	busiest_cpu = group_first_cpu(sds->busiest);
+	if (this_cpu > busiest_cpu)
+		return 0;
+
+	*imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
+				       SCHED_LOAD_SCALE);
+	return 1;
+}
+
 /**
  * fix_small_imbalance - Calculate the minor imbalance that exists
  *			amongst the groups of a sched_domain, during
@@ -2724,6 +2810,10 @@ find_busiest_group(struct sched_domain *
 	if (!(*balance))
 		goto ret;
 
+	if ((idle == CPU_IDLE || idle == CPU_NEWLY_IDLE) &&
+	    check_asym_packing(sd, &sds, this_cpu, imbalance))
+		return sds.busiest;
+
 	if (!sds.busiest || sds.busiest_nr_running == 0)
 		goto out_balanced;
 
@@ -2813,9 +2903,14 @@ find_busiest_queue(struct sched_group *g
 /* Working cpumask for load_balance and load_balance_newidle. */
 static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
 
-static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
+static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
+			       int busiest_cpu, int this_cpu)
 {
 	if (idle == CPU_NEWLY_IDLE) {
+
+		if (sd->flags & SD_ASYM_PACKING && busiest_cpu > this_cpu)
+			return 1;
+
 		/*
 		 * The only task running in a non-idle cpu can be moved to this
 		 * cpu in an attempt to completely freeup the other CPU
@@ -2934,7 +3029,8 @@ redo:
 		schedstat_inc(sd, lb_failed[idle]);
 		sd->nr_balance_failed++;
 
-		if (need_active_balance(sd, sd_idle, idle)) {
+		if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest),
+					this_cpu)) {
 			raw_spin_lock_irqsave(&busiest->lock, flags);
 
 			/* don't kick the active_load_balance_cpu_stop,

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 3/3] powerpc: enabled asymmetric SMT scheduling on POWER7
  2010-06-08  4:57 [PATCH 0/3] sched: asymmetrical packing for POWER7 SMT4 Michael Neuling
  2010-06-08  4:57 ` [PATCH 2/3] sched: add asymmetric group packing option for sibling domain Michael Neuling
  2010-06-08  4:57 ` [PATCH 1/3] sched: fix capacity calculations for SMT4 Michael Neuling
@ 2010-06-08  4:57 ` Michael Neuling
  2010-06-09  8:54   ` Benjamin Herrenschmidt
  2 siblings, 1 reply; 5+ messages in thread
From: Michael Neuling @ 2010-06-08  4:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Peter Zijlstra; +Cc: linuxppc-dev, linux-kernel

The POWER7 core has dynamic SMT mode switching which is controlled by
the hypervisor.  There are 3 SMT modes:
	SMT1 uses thread  0
	SMT2 uses threads 0 & 1
	SMT4 uses threads 0, 1, 2 & 3
When in any particular SMT mode, all threads have the same performance
as each other (ie. at any moment in time, all threads perform the same).  

The SMT mode switching works such that when linux has threads 2 & 3 idle
and 0 & 1 active, it will cede (H_CEDE hypercall) threads 2 and 3 in the
idle loop and the hypervisor will automatically switch to SMT2 for that
core (independent of other cores).  The opposite is not true, so if
threads 0 & 1 are idle and 2 & 3 are active, we will stay in SMT4 mode.

Similarly if thread 0 is active and threads 1, 2 & 3 are idle, we'll go
into SMT1 mode.  

If we can get the core into a lower SMT mode (SMT1 is best), the threads
will perform better (since they share less core resources).  Hence when
we have idle threads, we want them to be the higher ones.

This adds a feature bit for asymmetric packing to powerpc and then
enables it on POWER7. 

Signed-off-by: Michael Neuling <mikey@neuling.org>

---

 arch/powerpc/include/asm/cputable.h |    3 ++-
 arch/powerpc/kernel/process.c       |    9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

Index: linux-2.6-ozlabs/arch/powerpc/include/asm/cputable.h
===================================================================
--- linux-2.6-ozlabs.orig/arch/powerpc/include/asm/cputable.h
+++ linux-2.6-ozlabs/arch/powerpc/include/asm/cputable.h
@@ -195,6 +195,7 @@ extern const char *powerpc_base_platform
 #define CPU_FTR_SAO			LONG_ASM_CONST(0x0020000000000000)
 #define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0040000000000000)
 #define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0080000000000000)
+#define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0100000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -409,7 +410,7 @@ extern const char *powerpc_base_platform
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO)
+	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
Index: linux-2.6-ozlabs/arch/powerpc/kernel/process.c
===================================================================
--- linux-2.6-ozlabs.orig/arch/powerpc/kernel/process.c
+++ linux-2.6-ozlabs/arch/powerpc/kernel/process.c
@@ -1265,3 +1265,12 @@ unsigned long randomize_et_dyn(unsigned 
 
 	return ret;
 }
+
+int arch_sd_sibiling_asym_packing(void)
+{
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)){
+		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+		return SD_ASYM_PACKING;
+	}
+	return 0;
+}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] powerpc: enabled asymmetric SMT scheduling on POWER7
  2010-06-08  4:57 ` [PATCH 3/3] powerpc: enabled asymmetric SMT scheduling on POWER7 Michael Neuling
@ 2010-06-09  8:54   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 5+ messages in thread
From: Benjamin Herrenschmidt @ 2010-06-09  8:54 UTC (permalink / raw)
  To: Michael Neuling; +Cc: Peter Zijlstra, linuxppc-dev, linux-kernel

On Tue, 2010-06-08 at 14:57 +1000, Michael Neuling wrote:
> The POWER7 core has dynamic SMT mode switching which is controlled by
> the hypervisor.  There are 3 SMT modes:
> 	SMT1 uses thread  0
> 	SMT2 uses threads 0 & 1
> 	SMT4 uses threads 0, 1, 2 & 3
> When in any particular SMT mode, all threads have the same performance
> as each other (ie. at any moment in time, all threads perform the same).  
> 
> The SMT mode switching works such that when linux has threads 2 & 3 idle
> and 0 & 1 active, it will cede (H_CEDE hypercall) threads 2 and 3 in the
> idle loop and the hypervisor will automatically switch to SMT2 for that
> core (independent of other cores).  The opposite is not true, so if
> threads 0 & 1 are idle and 2 & 3 are active, we will stay in SMT4 mode.
> 
> Similarly if thread 0 is active and threads 1, 2 & 3 are idle, we'll go
> into SMT1 mode.  
> 
> If we can get the core into a lower SMT mode (SMT1 is best), the threads
> will perform better (since they share less core resources).  Hence when
> we have idle threads, we want them to be the higher ones.
> 
> This adds a feature bit for asymmetric packing to powerpc and then
> enables it on POWER7. 
> 
> Signed-off-by: Michael Neuling <mikey@neuling.org>

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

> 
> ---
> 
>  arch/powerpc/include/asm/cputable.h |    3 ++-
>  arch/powerpc/kernel/process.c       |    9 +++++++++
>  2 files changed, 11 insertions(+), 1 deletion(-)
> 
> Index: linux-2.6-ozlabs/arch/powerpc/include/asm/cputable.h
> ===================================================================
> --- linux-2.6-ozlabs.orig/arch/powerpc/include/asm/cputable.h
> +++ linux-2.6-ozlabs/arch/powerpc/include/asm/cputable.h
> @@ -195,6 +195,7 @@ extern const char *powerpc_base_platform
>  #define CPU_FTR_SAO			LONG_ASM_CONST(0x0020000000000000)
>  #define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0040000000000000)
>  #define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0080000000000000)
> +#define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0100000000000000)
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -409,7 +410,7 @@ extern const char *powerpc_base_platform
>  	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
>  	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
>  	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
> -	    CPU_FTR_DSCR | CPU_FTR_SAO)
> +	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT)
>  #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
>  	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
>  	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
> Index: linux-2.6-ozlabs/arch/powerpc/kernel/process.c
> ===================================================================
> --- linux-2.6-ozlabs.orig/arch/powerpc/kernel/process.c
> +++ linux-2.6-ozlabs/arch/powerpc/kernel/process.c
> @@ -1265,3 +1265,12 @@ unsigned long randomize_et_dyn(unsigned 
>  
>  	return ret;
>  }
> +
> +int arch_sd_sibiling_asym_packing(void)
> +{
> +	if (cpu_has_feature(CPU_FTR_ASYM_SMT)){
> +		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
> +		return SD_ASYM_PACKING;
> +	}
> +	return 0;
> +}

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-06-09  8:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-08  4:57 [PATCH 0/3] sched: asymmetrical packing for POWER7 SMT4 Michael Neuling
2010-06-08  4:57 ` [PATCH 2/3] sched: add asymmetric group packing option for sibling domain Michael Neuling
2010-06-08  4:57 ` [PATCH 1/3] sched: fix capacity calculations for SMT4 Michael Neuling
2010-06-08  4:57 ` [PATCH 3/3] powerpc: enabled asymmetric SMT scheduling on POWER7 Michael Neuling
2010-06-09  8:54   ` Benjamin Herrenschmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).