linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
@ 2008-10-24 11:19 Mike Travis
  2008-10-24 12:01 ` Pavel Machek
  0 siblings, 1 reply; 26+ messages in thread
From: Mike Travis @ 2008-10-24 11:19 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rusty Russell, Andrew Morton, Thomas Gleixner, Jack Steiner,
	Pavel Machek, H. Peter Anvin, Richard Purdie, LKML

[Ingo - could you let me know what's holding up this driver?  Thanks!, Mike]

The SGI UV system has no LEDS but uses one of the system controller
regs to indicate the online internal state of the cpu.  There is a
heartbeat bit indicating that the cpu is responding to interrupts,
and an idle bit indicating whether the cpu has been more or less than
50% idle each heartbeat period.  The current period is one second.

When a cpu panics, an error code is written by BIOS to this same reg.

So the reg has been renamed the "System Controller Interface Reg".

This patchset provides the following:

  * x86_64: Add base functionality for writing to the specific SCIR's
    for each cpu.

  * idle: Add an idle callback to measure the idle "on" and "off" times.

  * heartbeat: Invert "heartbeat" bit to indicate the cpu is "active".

  * if hotplug enabled, all bits are set (0xff) when the cpu is disabled.

Based on linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/genx2apic_uv_x.c |  138 +++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uv/uv_hub.h      |   62 +++++++++++++++++
 2 files changed, 200 insertions(+)

--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_uv_x.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/threads.h>
+#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
@@ -18,6 +19,8 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/timer.h>
+#include <asm/idle.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
@@ -357,6 +360,139 @@ static __init void uv_rtc_init(void)
 		sn_rtc_cycles_per_second = ticks_per_sec;
 }
 
+/*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+	struct timer_list *timer = &uv_hub_info->scir.timer;
+	unsigned char bits = uv_hub_info->scir.state;
+
+	/* flip heartbeat bit */
+	bits ^= SCIR_CPU_HEARTBEAT;
+
+	/* determine if we were mostly idle or not */
+	if (uv_hub_info->scir.idle_off && uv_hub_info->scir.idle_on) {
+		if (uv_hub_info->scir.idle_off > uv_hub_info->scir.idle_on)
+			bits |= SCIR_CPU_ACTIVITY;
+		else
+			bits &= ~SCIR_CPU_ACTIVITY;
+	}
+
+	/* reset idle counters */
+	uv_hub_info->scir.idle_on = 0;
+	uv_hub_info->scir.idle_off = 0;
+
+	/* update system controller interface reg */
+	uv_set_scir_bits(bits);
+
+	/* enable next timer period */
+	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static int uv_idle(struct notifier_block *nfb, unsigned long action, void *junk)
+{
+	unsigned long elapsed = jiffies - uv_hub_info->scir.last;
+
+	/*
+	 * update activity to indicate current state,
+	 * measure time since last change
+	 */
+	if (action == IDLE_START) {
+
+		uv_hub_info->scir.state &= ~SCIR_CPU_ACTIVITY;
+		uv_hub_info->scir.idle_on += elapsed;
+		uv_hub_info->scir.last = jiffies;
+
+	} else if (action == IDLE_END) {
+
+		uv_hub_info->scir.state |= SCIR_CPU_ACTIVITY;
+		uv_hub_info->scir.idle_off += elapsed;
+		uv_hub_info->scir.last = jiffies;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block uv_idle_notifier = {
+	.notifier_call = uv_idle,
+};
+
+static void __cpuinit uv_heartbeat_enable(int cpu)
+{
+	if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+		struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+		setup_timer(timer, uv_heartbeat, cpu);
+		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+		add_timer_on(timer, cpu);
+		uv_cpu_hub_info(cpu)->scir.enabled = 1;
+	}
+
+	/* check boot cpu */
+	if (!uv_cpu_hub_info(0)->scir.enabled)
+		uv_heartbeat_enable(0);
+}
+
+static void __cpuinit uv_heartbeat_disable(int cpu)
+{
+	if (uv_cpu_hub_info(cpu)->scir.enabled) {
+		uv_cpu_hub_info(cpu)->scir.enabled = 0;
+		del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+	}
+	uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * cpu hotplug notifier
+ */
+static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+				       unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		uv_heartbeat_enable(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		uv_heartbeat_disable(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+	hotcpu_notifier(uv_scir_cpu_notify, 0);
+	idle_notifier_register(&uv_idle_notifier);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+	idle_notifier_register(&uv_idle_notifier);
+}
+
+static __init int uv_init_heartbeat(void)
+{
+	int cpu;
+
+	if (is_uv_system())
+		for_each_online_cpu(cpu)
+			uv_heartbeat_enable(cpu);
+	return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
 static bool uv_system_inited;
 
 void __init uv_system_init(void)
@@ -435,6 +571,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
 		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 		max_pnode = max(pnode, max_pnode);
@@ -449,6 +586,7 @@ void __init uv_system_init(void)
 	map_mmr_high(max_pnode);
 	map_config_high(max_pnode);
 	map_mmioh_high(max_pnode);
+	uv_scir_register_cpu_notifier();
 	uv_system_inited = true;
 }
 
--- linux-2.6.tip.orig/include/asm-x86/uv/uv_hub.h
+++ linux-2.6.tip/include/asm-x86/uv/uv_hub.h
@@ -112,6 +112,16 @@
  */
 #define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_NODES * 2)
 
+struct uv_scir_s {
+	struct timer_list timer;
+	unsigned long	offset;
+	unsigned long	last;
+	unsigned long	idle_on;
+	unsigned long	idle_off;
+	unsigned char	state;
+	unsigned char	enabled;
+};
+
 /*
  * The following defines attributes of the HUB chip. These attributes are
  * frequently referenced and are kept in the per-cpu data areas of each cpu.
@@ -130,7 +140,9 @@ struct uv_hub_info_s {
 	unsigned char	blade_processor_id;
 	unsigned char	m_val;
 	unsigned char	n_val;
+	struct uv_scir_s scir;
 };
+
 DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 #define uv_hub_info 		(&__get_cpu_var(__uv_hub_info))
 #define uv_cpu_hub_info(cpu)	(&per_cpu(__uv_hub_info, cpu))
@@ -162,6 +174,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __
 
 #define UV_APIC_PNODE_SHIFT	6
 
+/* Local Bus from cpu's perspective */
+#define LOCAL_BUS_BASE		0x1c00000
+#define LOCAL_BUS_SIZE		(4 * 1024 * 1024)
+
+/*
+ * System Controller Interface Reg
+ *
+ * Note there are NO leds on a UV system.  This register is only
+ * used by the system controller to monitor system-wide operation.
+ * There are 64 regs per node.  With Nahelem cpus (2 cores per node,
+ * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
+ * a node.
+ *
+ * The window is located at top of ACPI MMR space
+ */
+#define SCIR_WINDOW_COUNT	64
+#define SCIR_LOCAL_MMR_BASE	(LOCAL_BUS_BASE + \
+				 LOCAL_BUS_SIZE - \
+				 SCIR_WINDOW_COUNT)
+
+#define SCIR_CPU_HEARTBEAT	0x01	/* timer interrupt */
+#define SCIR_CPU_ACTIVITY	0x02	/* not idle */
+#define SCIR_CPU_HB_INTERVAL	(HZ)	/* once per second */
+
 /*
  * Macros for converting between kernel virtual addresses, socket local physical
  * addresses, and UV global physical addresses.
@@ -276,6 +312,16 @@ static inline void uv_write_local_mmr(un
 	*uv_local_mmr_address(offset) = val;
 }
 
+static inline unsigned char uv_read_local_mmr8(unsigned long offset)
+{
+	return *((unsigned char *)uv_local_mmr_address(offset));
+}
+
+static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
+{
+	*((unsigned char *)uv_local_mmr_address(offset)) = val;
+}
+
 /*
  * Structures and definitions for converting between cpu, node, pnode, and blade
  * numbers.
@@ -350,5 +396,21 @@ static inline int uv_num_possible_blades
 	return uv_possible_blades;
 }
 
+/* Update SCIR state */
+static inline void uv_set_scir_bits(unsigned char value)
+{
+	if (uv_hub_info->scir.state != value) {
+		uv_hub_info->scir.state = value;
+		uv_write_local_mmr8(uv_hub_info->scir.offset, value);
+	}
+}
+static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
+{
+	if (uv_cpu_hub_info(cpu)->scir.state != value) {
+		uv_cpu_hub_info(cpu)->scir.state = value;
+		uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value);
+	}
+}
+
 #endif /* ASM_X86__UV__UV_HUB_H */
 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 11:19 [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver Mike Travis
@ 2008-10-24 12:01 ` Pavel Machek
  2008-10-24 12:05   ` Ingo Molnar
  2008-10-24 12:14   ` Mike Travis
  0 siblings, 2 replies; 26+ messages in thread
From: Pavel Machek @ 2008-10-24 12:01 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Rusty Russell, Andrew Morton, Thomas Gleixner,
	Jack Steiner, H. Peter Anvin, Richard Purdie, LKML

On Fri 2008-10-24 04:19:04, Mike Travis wrote:
> [Ingo - could you let me know what's holding up this driver?
> Thanks!, Mike]

I thought that...

 > +static __init void uv_scir_register_cpu_notifier(void)
> +{
> +	hotcpu_notifier(uv_scir_cpu_notify, 0);
> +	idle_notifier_register(&uv_idle_notifier);
> +}

...Ingo said that idle_notifier_register is deprecated and going away?

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 12:01 ` Pavel Machek
@ 2008-10-24 12:05   ` Ingo Molnar
  2008-10-24 12:27     ` Mike Travis
  2008-10-24 12:14   ` Mike Travis
  1 sibling, 1 reply; 26+ messages in thread
From: Ingo Molnar @ 2008-10-24 12:05 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Mike Travis, Rusty Russell, Andrew Morton, Thomas Gleixner,
	Jack Steiner, H. Peter Anvin, Richard Purdie, LKML


* Pavel Machek <pavel@suse.cz> wrote:

> On Fri 2008-10-24 04:19:04, Mike Travis wrote:
> > [Ingo - could you let me know what's holding up this driver?
> > Thanks!, Mike]
> 
> I thought that...
> 
>  > +static __init void uv_scir_register_cpu_notifier(void)
> > +{
> > +	hotcpu_notifier(uv_scir_cpu_notify, 0);
> > +	idle_notifier_register(&uv_idle_notifier);
> > +}
> 
> ...Ingo said that idle_notifier_register is deprecated and going away?

yes, but i suspect Mike noticed that i acked Len's use of idle notifiers 
in the ACPI tree, and now he wants to refresh this discussion?

i cannot really believe why anyone would want to slow down the from-idle 
hotpath in such a lame way:

+       } else if (action == IDLE_END) {
+
+               uv_hub_info->scir.state |= SCIR_CPU_ACTIVITY;
+               uv_hub_info->scir.idle_off += elapsed;
+               uv_hub_info->scir.last = jiffies;
+       }

with such a lame and low-res timestamp:

+       unsigned long elapsed = jiffies - uv_hub_info->scir.last;

it's an absolutely pointless act of adding overhead. Just use a regular 
timer mechanism to update uv_hub_info->scir.idle_off.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 12:01 ` Pavel Machek
  2008-10-24 12:05   ` Ingo Molnar
@ 2008-10-24 12:14   ` Mike Travis
  1 sibling, 0 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-24 12:14 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Ingo Molnar, Rusty Russell, Andrew Morton, Thomas Gleixner,
	Jack Steiner, H. Peter Anvin, Richard Purdie, LKML

Pavel Machek wrote:
> On Fri 2008-10-24 04:19:04, Mike Travis wrote:
>> [Ingo - could you let me know what's holding up this driver?
>> Thanks!, Mike]
> 
> I thought that...
> 
>  > +static __init void uv_scir_register_cpu_notifier(void)
>> +{
>> +	hotcpu_notifier(uv_scir_cpu_notify, 0);
>> +	idle_notifier_register(&uv_idle_notifier);
>> +}
> 
> ...Ingo said that idle_notifier_register is deprecated and going away?
> 
> 									Pavel

Is there any other way to get an indication then?  This is also in ia64 code
though not with the formality of a callback -- it's a static function pointer:

arch/ia64/kernel/process.c:
	void (*ia64_mark_idle)(int);

arch/ia64/sn/kernel/setup.c:
	ia64_mark_idle = &snidle;

arch/ia64/sn/kernel/idle.c
	void snidle(int state)
	{
	        if (state) {
	                if (pda->idle_flag == 0) {
	                        /*
	                         * Turn the activity LED off.
        	                 */
        	                set_led_bits(0, LED_CPU_ACTIVITY);
        	        }

	       	        pda->idle_flag = 1;
	        } else {
	                /*
	                 * Turn the activity LED on.
	                 */
	                set_led_bits(LED_CPU_ACTIVITY, LED_CPU_ACTIVITY);

	                pda->idle_flag = 0;
	        }
	}

As I mentioned, I would be happy to use an alternative but I haven't yet found one.
Should I put the above into x86_64?

Thanks,
Mike

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 12:05   ` Ingo Molnar
@ 2008-10-24 12:27     ` Mike Travis
  2008-10-24 18:12       ` Andi Kleen
  2008-10-27 11:43       ` Ingo Molnar
  0 siblings, 2 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-24 12:27 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pavel Machek, Rusty Russell, Andrew Morton, Thomas Gleixner,
	Jack Steiner, H. Peter Anvin, Richard Purdie, LKML

Ingo Molnar wrote:
> * Pavel Machek <pavel@suse.cz> wrote:
> 
>> On Fri 2008-10-24 04:19:04, Mike Travis wrote:
>>> [Ingo - could you let me know what's holding up this driver?
>>> Thanks!, Mike]
>> I thought that...
>>
>>  > +static __init void uv_scir_register_cpu_notifier(void)
>>> +{
>>> +	hotcpu_notifier(uv_scir_cpu_notify, 0);
>>> +	idle_notifier_register(&uv_idle_notifier);
>>> +}
>> ...Ingo said that idle_notifier_register is deprecated and going away?
> 
> yes, but i suspect Mike noticed that i acked Len's use of idle notifiers 
> in the ACPI tree, and now he wants to refresh this discussion?
> 
> i cannot really believe why anyone would want to slow down the from-idle 
> hotpath in such a lame way:
> 
> +       } else if (action == IDLE_END) {
> +
> +               uv_hub_info->scir.state |= SCIR_CPU_ACTIVITY;
> +               uv_hub_info->scir.idle_off += elapsed;
> +               uv_hub_info->scir.last = jiffies;
> +       }
> 
> with such a lame and low-res timestamp:
> 
> +       unsigned long elapsed = jiffies - uv_hub_info->scir.last;
> 
> it's an absolutely pointless act of adding overhead. Just use a regular 
> timer mechanism to update uv_hub_info->scir.idle_off.

I do use the timer mechanism (once per second) to update the scir state.
Unfortunately, the state is *always* not idle during the timer callback
(since we are actively executing), so I needed some way of knowing the
idle state prior to the timer callback.  The idle callback is only used
to record the amount of time this cpu's been idle during the last second
so on the once per second interrupt, I can determine if this cpu was more,
or less idle during that same time period.

The hardware saves the last 64 seconds for diagnostic purposes so this is
not meant to be a high-res indication but a longer term cpu history (along
with the other error records recorded, like memory/cpu/bus faults, etc.)
The System Controller copies these records into the system logs.

Also, this is only on a UV system, I'm not introducing any overhead for
any other x86_64 systems.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 12:27     ` Mike Travis
@ 2008-10-24 18:12       ` Andi Kleen
  2008-10-24 22:18         ` Mike Travis
  2008-10-27 11:43       ` Ingo Molnar
  1 sibling, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2008-10-24 18:12 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML

Mike Travis <travis@sgi.com> writes:
>
> I do use the timer mechanism (once per second) to update the scir state.
> Unfortunately, the state is *always* not idle during the timer callback
> (since we are actively executing), so I needed some way of knowing the
> idle state prior to the timer callback.

The idle thread always has pid 0. So if the timer sees current->pid == 0
it interrupted idle.

But sampling only once a second would be presumably not very accurate.
You could just check the per cpu statistics the kernel keeps
anyways in the timer.

I agree with Ingo that this doesn't belong in a idle notifier.

-Andi
-- 
ak@linux.intel.com

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 18:12       ` Andi Kleen
@ 2008-10-24 22:18         ` Mike Travis
  2008-10-24 22:24           ` Mike Travis
                             ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-24 22:18 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML

Andi Kleen wrote:
> Mike Travis <travis@sgi.com> writes:
>> I do use the timer mechanism (once per second) to update the scir state.
>> Unfortunately, the state is *always* not idle during the timer callback
>> (since we are actively executing), so I needed some way of knowing the
>> idle state prior to the timer callback.
> 
> The idle thread always has pid 0. So if the timer sees current->pid == 0
> it interrupted idle.
> 
> But sampling only once a second would be presumably not very accurate.
> You could just check the per cpu statistics the kernel keeps
> anyways in the timer.
> 
> I agree with Ingo that this doesn't belong in a idle notifier.
> 
> -Andi

Cool, thanks!  I rewrote the patch to use current->pid, removing the
idle callback.

On ia64 the activity indicator was kept up to date by modifying the
register state every time the cpu went into and out of idle.  Ingo
thought this was excessive I/O overhead and since then, I've learned
that the UV system has a FIFO for these registers.  So flipping the
bit every time we changed idle state would quickly fill the FIFO and
wouldn't be keeping in sync with the companion bit which indicates
that the cpu is interruptible.

So the hw group decided that having a once per second indicator, even
if it's only a very narrow (instant) sampling period, would meet their 
requirements.

Thanks!
Mike

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 22:18         ` Mike Travis
@ 2008-10-24 22:24           ` Mike Travis
  2008-10-27 11:42             ` Ingo Molnar
  2008-10-25  6:56           ` [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver Andi Kleen
  2008-10-27 11:36           ` Ingo Molnar
  2 siblings, 1 reply; 26+ messages in thread
From: Mike Travis @ 2008-10-24 22:24 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andi Kleen, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML

Subject: SGI X86 UV: Provide a System Activity Indicator driver

The SGI UV system has no LEDS but uses one of the system controller
regs to indicate the online internal state of the cpu.  There is a
heartbeat bit indicating that the cpu is responding to interrupts,
and an idle bit indicating whether the cpu is idle when the heartbeat
interrupt occurs.  The current period is one second.

When a cpu panics, an error code is written by BIOS to this same reg.

This patchset provides the following:

  * x86_64: Add base functionality for writing to the specific SCIR's
    for each cpu.

  * heartbeat: Invert "heartbeat" bit to indicate the cpu is
    "interruptible".  If the current thread is the idle thread,
    then indicate system is "idle".

  * if hotplug enabled, all bits are set (0xff) when the cpu is disabled.

Based on linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/uv/uv_hub.h |   63 +++++++++++++++++++++++-
 arch/x86/kernel/genx2apic_uv_x.c |  102 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+), 1 deletion(-)

--- test-tip-latest.orig/arch/x86/include/asm/uv/uv_hub.h
+++ test-tip-latest/arch/x86/include/asm/uv/uv_hub.h
@@ -112,6 +112,16 @@
  */
 #define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_NODES * 2)
 
+struct uv_scir_s {
+	struct timer_list timer;
+	unsigned long	offset;
+	unsigned long	last;
+	unsigned long	idle_on;
+	unsigned long	idle_off;
+	unsigned char	state;
+	unsigned char	enabled;
+};
+
 /*
  * The following defines attributes of the HUB chip. These attributes are
  * frequently referenced and are kept in the per-cpu data areas of each cpu.
@@ -130,7 +140,9 @@ struct uv_hub_info_s {
 	unsigned char	blade_processor_id;
 	unsigned char	m_val;
 	unsigned char	n_val;
+	struct uv_scir_s scir;
 };
+
 DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 #define uv_hub_info 		(&__get_cpu_var(__uv_hub_info))
 #define uv_cpu_hub_info(cpu)	(&per_cpu(__uv_hub_info, cpu))
@@ -162,6 +174,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __
 
 #define UV_APIC_PNODE_SHIFT	6
 
+/* Local Bus from cpu's perspective */
+#define LOCAL_BUS_BASE		0x1c00000
+#define LOCAL_BUS_SIZE		(4 * 1024 * 1024)
+
+/*
+ * System Controller Interface Reg
+ *
+ * Note there are NO leds on a UV system.  This register is only
+ * used by the system controller to monitor system-wide operation.
+ * There are 64 regs per node.  With Nahelem cpus (2 cores per node,
+ * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
+ * a node.
+ *
+ * The window is located at top of ACPI MMR space
+ */
+#define SCIR_WINDOW_COUNT	64
+#define SCIR_LOCAL_MMR_BASE	(LOCAL_BUS_BASE + \
+				 LOCAL_BUS_SIZE - \
+				 SCIR_WINDOW_COUNT)
+
+#define SCIR_CPU_HEARTBEAT	0x01	/* timer interrupt */
+#define SCIR_CPU_ACTIVITY	0x02	/* not idle */
+#define SCIR_CPU_HB_INTERVAL	(HZ)	/* once per second */
+
 /*
  * Macros for converting between kernel virtual addresses, socket local physical
  * addresses, and UV global physical addresses.
@@ -276,6 +312,16 @@ static inline void uv_write_local_mmr(un
 	*uv_local_mmr_address(offset) = val;
 }
 
+static inline unsigned char uv_read_local_mmr8(unsigned long offset)
+{
+	return *((unsigned char *)uv_local_mmr_address(offset));
+}
+
+static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
+{
+	*((unsigned char *)uv_local_mmr_address(offset)) = val;
+}
+
 /*
  * Structures and definitions for converting between cpu, node, pnode, and blade
  * numbers.
@@ -350,5 +396,20 @@ static inline int uv_num_possible_blades
 	return uv_possible_blades;
 }
 
-#endif /* _ASM_X86_UV_UV_HUB_H */
+/* Update SCIR state */
+static inline void uv_set_scir_bits(unsigned char value)
+{
+	if (uv_hub_info->scir.state != value) {
+		uv_hub_info->scir.state = value;
+		uv_write_local_mmr8(uv_hub_info->scir.offset, value);
+	}
+}
+static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
+{
+	if (uv_cpu_hub_info(cpu)->scir.state != value) {
+		uv_cpu_hub_info(cpu)->scir.state = value;
+		uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value);
+	}
+}
 
+#endif /* _ASM_X86_UV_UV_HUB_H */
--- test-tip-latest.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ test-tip-latest/arch/x86/kernel/genx2apic_uv_x.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/threads.h>
+#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
@@ -18,6 +19,8 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/timer.h>
+#include <asm/current.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
@@ -358,6 +361,103 @@ static __init void uv_rtc_init(void)
 }
 
 /*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+	struct timer_list *timer = &uv_hub_info->scir.timer;
+	unsigned char bits = uv_hub_info->scir.state;
+
+	/* flip heartbeat bit */
+	bits ^= SCIR_CPU_HEARTBEAT;
+
+	/* are we the idle thread? */
+	if (current->pid == 0)
+		bits &= ~SCIR_CPU_ACTIVITY;
+	else
+		bits |= SCIR_CPU_ACTIVITY;
+
+	/* update system controller interface reg */
+	uv_set_scir_bits(bits);
+
+	/* enable next timer period */
+	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static void __cpuinit uv_heartbeat_enable(int cpu)
+{
+	if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+		struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+		setup_timer(timer, uv_heartbeat, cpu);
+		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+		add_timer_on(timer, cpu);
+		uv_cpu_hub_info(cpu)->scir.enabled = 1;
+	}
+
+	/* check boot cpu */
+	if (!uv_cpu_hub_info(0)->scir.enabled)
+		uv_heartbeat_enable(0);
+}
+
+static void __cpuinit uv_heartbeat_disable(int cpu)
+{
+	if (uv_cpu_hub_info(cpu)->scir.enabled) {
+		uv_cpu_hub_info(cpu)->scir.enabled = 0;
+		del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+	}
+	uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * cpu hotplug notifier
+ */
+static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+				       unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		uv_heartbeat_enable(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		uv_heartbeat_disable(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+	hotcpu_notifier(uv_scir_cpu_notify, 0);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+}
+
+static __init int uv_init_heartbeat(void)
+{
+	int cpu;
+
+	if (is_uv_system())
+		for_each_online_cpu(cpu)
+			uv_heartbeat_enable(cpu);
+	return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
+/*
  * Called on each cpu to initialize the per_cpu UV data area.
  * 	ZZZ hotplug not supported yet
  */
@@ -453,6 +553,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
 		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+		uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 		max_pnode = max(pnode, max_pnode);
@@ -469,4 +570,5 @@ void __init uv_system_init(void)
 	map_mmioh_high(max_pnode);
 
 	uv_cpu_init();
+	uv_scir_register_cpu_notifier();
 }


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 22:18         ` Mike Travis
  2008-10-24 22:24           ` Mike Travis
@ 2008-10-25  6:56           ` Andi Kleen
  2008-10-27 15:12             ` Mike Travis
  2008-10-27 11:36           ` Ingo Molnar
  2 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2008-10-25  6:56 UTC (permalink / raw)
  To: Mike Travis
  Cc: Andi Kleen, Ingo Molnar, Pavel Machek, Rusty Russell,
	Andrew Morton, Thomas Gleixner, Jack Steiner, H. Peter Anvin,
	Richard Purdie, LKML

> Cool, thanks!  I rewrote the patch to use current->pid, removing the
> idle callback.

As a additional measure you could make the timer deferred when in 
idle state.  Then you would only actually do something when 
changing from idle to non idle or keeps being busy, but nothing 
when a CPU stays idle for a longer time.

The frequency wouldn't necessarily be 1 second then though.

-Andi

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 22:18         ` Mike Travis
  2008-10-24 22:24           ` Mike Travis
  2008-10-25  6:56           ` [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver Andi Kleen
@ 2008-10-27 11:36           ` Ingo Molnar
  2 siblings, 0 replies; 26+ messages in thread
From: Ingo Molnar @ 2008-10-27 11:36 UTC (permalink / raw)
  To: Mike Travis
  Cc: Andi Kleen, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML


* Mike Travis <travis@sgi.com> wrote:

> > The idle thread always has pid 0. So if the timer sees 
> > current->pid == 0 it interrupted idle.

that's still hacky as it hardcodes the idle == pid-0 assumption. 
Please use the idle_cpu() function of the scheduler instead.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 22:24           ` Mike Travis
@ 2008-10-27 11:42             ` Ingo Molnar
  2008-10-27 14:38               ` Mike Travis
  2008-10-27 14:51               ` [PATCH 1/1] SGI x86 UV: Update SCIR driver to use idle_cpu function Mike Travis
  0 siblings, 2 replies; 26+ messages in thread
From: Ingo Molnar @ 2008-10-27 11:42 UTC (permalink / raw)
  To: Mike Travis
  Cc: Andi Kleen, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML


* Mike Travis <travis@sgi.com> wrote:

> Subject: SGI X86 UV: Provide a System Activity Indicator driver
> 
> The SGI UV system has no LEDS but uses one of the system controller
> regs to indicate the online internal state of the cpu.  There is a
> heartbeat bit indicating that the cpu is responding to interrupts,
> and an idle bit indicating whether the cpu is idle when the heartbeat
> interrupt occurs.  The current period is one second.
> 
> When a cpu panics, an error code is written by BIOS to this same reg.
> 
> This patchset provides the following:
> 
>   * x86_64: Add base functionality for writing to the specific SCIR's
>     for each cpu.
> 
>   * heartbeat: Invert "heartbeat" bit to indicate the cpu is
>     "interruptible".  If the current thread is the idle thread,
>     then indicate system is "idle".
> 
>   * if hotplug enabled, all bits are set (0xff) when the cpu is disabled.
> 
> Based on linux-2.6.tip/master.
> 
> Signed-off-by: Mike Travis <travis@sgi.com>
> ---
>  arch/x86/include/asm/uv/uv_hub.h |   63 +++++++++++++++++++++++-
>  arch/x86/kernel/genx2apic_uv_x.c |  102 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 164 insertions(+), 1 deletion(-)

applied to tip/x86/uv, thanks Mike!

Please send the cpu_idle() cleanup patch separately.

Another minor thing i noticed:

@@ -130,7 +140,9 @@ struct uv_hub_info_s {
 	unsigned char	blade_processor_id;
 	unsigned char	m_val;
 	unsigned char	n_val;
+	struct uv_scir_s scir;

please align the new field vertically, like they were aligned before - 
by adding another tab to the whole lineup. (This will also make it 
appear nicer when viewed together with followup definitions below this 
section)

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-24 12:27     ` Mike Travis
  2008-10-24 18:12       ` Andi Kleen
@ 2008-10-27 11:43       ` Ingo Molnar
  1 sibling, 0 replies; 26+ messages in thread
From: Ingo Molnar @ 2008-10-27 11:43 UTC (permalink / raw)
  To: Mike Travis
  Cc: Pavel Machek, Rusty Russell, Andrew Morton, Thomas Gleixner,
	Jack Steiner, H. Peter Anvin, Richard Purdie, LKML


* Mike Travis <travis@sgi.com> wrote:

> Also, this is only on a UV system, I'm not introducing any overhead 
> for any other x86_64 systems.

true, but it sets a bad example that others might follow - and it's 
not hard to fix anyway.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-27 11:42             ` Ingo Molnar
@ 2008-10-27 14:38               ` Mike Travis
  2008-10-27 14:51               ` [PATCH 1/1] SGI x86 UV: Update SCIR driver to use idle_cpu function Mike Travis
  1 sibling, 0 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-27 14:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andi Kleen, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML

Ingo Molnar wrote:
...
> 
> Please send the cpu_idle() cleanup patch separately.
> 
> Another minor thing i noticed:
> 
> @@ -130,7 +140,9 @@ struct uv_hub_info_s {
>  	unsigned char	blade_processor_id;
>  	unsigned char	m_val;
>  	unsigned char	n_val;
> +	struct uv_scir_s scir;
> 
> please align the new field vertically, like they were aligned before - 
> by adding another tab to the whole lineup. (This will also make it 
> appear nicer when viewed together with followup definitions below this 
> section)

Ok, will do, thanks!

Mike

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/1] SGI x86 UV: Update SCIR driver to use idle_cpu function
  2008-10-27 11:42             ` Ingo Molnar
  2008-10-27 14:38               ` Mike Travis
@ 2008-10-27 14:51               ` Mike Travis
  2008-10-27 18:06                 ` Ingo Molnar
  1 sibling, 1 reply; 26+ messages in thread
From: Mike Travis @ 2008-10-27 14:51 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andi Kleen, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML

Subject: SGI x86 UV: Update SCIR driver to use idle_cpu function

> Please send the cpu_idle() cleanup patch separately.

Change UV heartbeat function to use idle_cpu to determine cpu's
"idleness".  Realign uv_hub definitions.

Signed-of-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/uv/uv_hub.h |   26 +++++++++++++-------------
 arch/x86/kernel/genx2apic_uv_x.c |    4 ++--
 2 files changed, 15 insertions(+), 15 deletions(-)

--- test-tip-latest.orig/arch/x86/include/asm/uv/uv_hub.h
+++ test-tip-latest/arch/x86/include/asm/uv/uv_hub.h
@@ -128,19 +128,19 @@ struct uv_scir_s {
  * They are kept together in a struct to minimize cache misses.
  */
 struct uv_hub_info_s {
-	unsigned long	global_mmr_base;
-	unsigned long	gpa_mask;
-	unsigned long	gnode_upper;
-	unsigned long	lowmem_remap_top;
-	unsigned long	lowmem_remap_base;
-	unsigned short	pnode;
-	unsigned short	pnode_mask;
-	unsigned short	coherency_domain_number;
-	unsigned short	numa_blade_id;
-	unsigned char	blade_processor_id;
-	unsigned char	m_val;
-	unsigned char	n_val;
-	struct uv_scir_s scir;
+	unsigned long		global_mmr_base;
+	unsigned long		gpa_mask;
+	unsigned long		gnode_upper;
+	unsigned long		lowmem_remap_top;
+	unsigned long		lowmem_remap_base;
+	unsigned short		pnode;
+	unsigned short		pnode_mask;
+	unsigned short		coherency_domain_number;
+	unsigned short		numa_blade_id;
+	unsigned char		blade_processor_id;
+	unsigned char		m_val;
+	unsigned char		n_val;
+	struct uv_scir_s	scir;
 };
 
 DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
--- test-tip-latest.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ test-tip-latest/arch/x86/kernel/genx2apic_uv_x.c
@@ -371,8 +371,8 @@ static void uv_heartbeat(unsigned long i
 	/* flip heartbeat bit */
 	bits ^= SCIR_CPU_HEARTBEAT;
 
-	/* are we the idle thread? */
-	if (current->pid == 0)
+	/* is this cpu idle? */
+	if (idle_cpu(smp_processor_id()))
 		bits &= ~SCIR_CPU_ACTIVITY;
 	else
 		bits |= SCIR_CPU_ACTIVITY;


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-25  6:56           ` [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver Andi Kleen
@ 2008-10-27 15:12             ` Mike Travis
  0 siblings, 0 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-27 15:12 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML

Andi Kleen wrote:
>> Cool, thanks!  I rewrote the patch to use current->pid, removing the
>> idle callback.
> 
> As a additional measure you could make the timer deferred when in 
> idle state.  Then you would only actually do something when 
> changing from idle to non idle or keeps being busy, but nothing 
> when a CPU stays idle for a longer time.
> 
> The frequency wouldn't necessarily be 1 second then though.
> 
> -Andi

Hi Andi,

Sorry, I didn't see this message until now.  I will look into doing this
and ask the hw group if it's acceptable to drop BMC records when the cpu
is idle.  (I think they may still want them since this might also be
confused with "the cpu is no longer uninterruptible".)

Thanks,
Mike

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI x86 UV: Update SCIR driver to use idle_cpu function
  2008-10-27 14:51               ` [PATCH 1/1] SGI x86 UV: Update SCIR driver to use idle_cpu function Mike Travis
@ 2008-10-27 18:06                 ` Ingo Molnar
  2008-10-27 18:46                   ` [PATCH 1/1] SGI x86 UV: Use raw_smp_processor_id Mike Travis
  0 siblings, 1 reply; 26+ messages in thread
From: Ingo Molnar @ 2008-10-27 18:06 UTC (permalink / raw)
  To: Mike Travis
  Cc: Andi Kleen, Pavel Machek, Rusty Russell, Andrew Morton,
	Thomas Gleixner, Jack Steiner, H. Peter Anvin, Richard Purdie,
	LKML


* Mike Travis <travis@sgi.com> wrote:

> Subject: SGI x86 UV: Update SCIR driver to use idle_cpu function
> 
> > Please send the cpu_idle() cleanup patch separately.
> 
> Change UV heartbeat function to use idle_cpu to determine cpu's
> "idleness".  Realign uv_hub definitions.
> 
> Signed-of-by: Mike Travis <travis@sgi.com>
> ---
>  arch/x86/include/asm/uv/uv_hub.h |   26 +++++++++++++-------------
>  arch/x86/kernel/genx2apic_uv_x.c |    4 ++--
>  2 files changed, 15 insertions(+), 15 deletions(-)

applied to tip/x86/uv, thanks Mike!

one small comment:

> +	/* is this cpu idle? */
> +	if (idle_cpu(smp_processor_id()))
>  		bits &= ~SCIR_CPU_ACTIVITY;

were you ever to run an -rt kernel on that hardware, this would 
produce a warning. raw_smp_processor_id() would be more appropriate i 
guess.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/1] SGI x86 UV: Use raw_smp_processor_id
  2008-10-27 18:06                 ` Ingo Molnar
@ 2008-10-27 18:46                   ` Mike Travis
  0 siblings, 0 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-27 18:46 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Jack Steiner, LKML

Subject: SGI x86 UV: Use raw_smp_processor_id

> one small comment:
> 
>> +	/* is this cpu idle? */
>> +	if (idle_cpu(smp_processor_id()))
>>  		bits &= ~SCIR_CPU_ACTIVITY;
> 
> were you ever to run an -rt kernel on that hardware, this would 
> produce a warning. raw_smp_processor_id() would be more appropriate i 
> guess.

Thanks for the heads up!  Yes, -rt is supported.  Might as well avoid that
problem now.

Signed-of-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/genx2apic_uv_x.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- test-tip-latest.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ test-tip-latest/arch/x86/kernel/genx2apic_uv_x.c
@@ -372,7 +372,7 @@ static void uv_heartbeat(unsigned long i
 	bits ^= SCIR_CPU_HEARTBEAT;
 
 	/* is this cpu idle? */
-	if (idle_cpu(smp_processor_id()))
+	if (idle_cpu(raw_smp_processor_id()))
 		bits &= ~SCIR_CPU_ACTIVITY;
 	else
 		bits |= SCIR_CPU_ACTIVITY;

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-02  8:37           ` Pavel Machek
@ 2008-10-02 14:33             ` Mike Travis
  0 siblings, 0 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-02 14:33 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Ingo Molnar, Andrew Morton, rpurdie, Jack Steiner, linux-kernel,
	Thomas Gleixner, H. Peter Anvin

Pavel Machek wrote:
>> Pavel Machek wrote:
>>>> Another relevant point is that I will be adding a bit more functionality
>>>> to disable the timer interrupt on truly "idle" cpus (like have been idle
>>>> for some amount of seconds).  We would then use the "exit from idle"
>>>> callback to reestablish the timer interrupt.  [This would allow them to
>>>> enter power down states if appropriate.]
>>> Should you look at nohz instead of reinventing it? 
>> Thanks, I did look at it.  Quite complex.  Maybe I'm missing something
>> but I don't see how it fits in?  Are you saying I should be using
> 
> I seen your remark above about disabling timer interrupt on idle
> cpus. That's exactly nohz functionality, right? Maybe I misunderstood
> you and you meant "my monitoring interrupt" and not "generic system
> timer interrupt"?
> 
> 									Pavel

Ahh, yes, now I see... ;-)

And thanks for the pointer, I hadn't looked at this before.

In regards to "powering down" a UV system, it's not clear yet that it will
help much unless we can power down a whole lot more of the system than just
the cpus... ;-)  [but in reality, reducing power use any time you can is 
real necessity.]

Btw, are you ok with the remainder of the patch?

Thanks,
Mike

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-01 18:15         ` Mike Travis
  2008-10-01 19:41           ` Mike Travis
@ 2008-10-02  8:37           ` Pavel Machek
  2008-10-02 14:33             ` Mike Travis
  1 sibling, 1 reply; 26+ messages in thread
From: Pavel Machek @ 2008-10-02  8:37 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Andrew Morton, rpurdie, Jack Steiner, linux-kernel,
	Thomas Gleixner, H. Peter Anvin

> Pavel Machek wrote:
> >> Another relevant point is that I will be adding a bit more functionality
> >> to disable the timer interrupt on truly "idle" cpus (like have been idle
> >> for some amount of seconds).  We would then use the "exit from idle"
> >> callback to reestablish the timer interrupt.  [This would allow them to
> >> enter power down states if appropriate.]
> > 
> > Should you look at nohz instead of reinventing it? 
> 
> Thanks, I did look at it.  Quite complex.  Maybe I'm missing something
> but I don't see how it fits in?  Are you saying I should be using

I seen your remark above about disabling timer interrupt on idle
cpus. That's exactly nohz functionality, right? Maybe I misunderstood
you and you meant "my monitoring interrupt" and not "generic system
timer interrupt"?

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-10-01 18:15         ` Mike Travis
@ 2008-10-01 19:41           ` Mike Travis
  2008-10-02  8:37           ` Pavel Machek
  1 sibling, 0 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-01 19:41 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Ingo Molnar, Andrew Morton, rpurdie, Jack Steiner, linux-kernel,
	Thomas Gleixner, H. Peter Anvin

Mike Travis wrote:
> Pavel Machek wrote:
>>> Another relevant point is that I will be adding a bit more functionality
>>> to disable the timer interrupt on truly "idle" cpus (like have been idle
>>> for some amount of seconds).  We would then use the "exit from idle"
>>> callback to reestablish the timer interrupt.  [This would allow them to
>>> enter power down states if appropriate.]
>> Should you look at nohz instead of reinventing it? 
> 
> Thanks, I did look at it.  Quite complex.  Maybe I'm missing something
> but I don't see how it fits in?  Are you saying I should be using data
> in the percpu tick_sched to gather the idle information for the once
> per second per cpu status update interrupt?  I see the @idle_active
> entry but wouldn't this always be false during the timer interrupt?
> Using any other entries would appear to be more complex than a simple
> store byte and subtracting two longs.
> 
> Or perhaps I should somehow hook into the sched_timer interrupt instead
> of having a separate once per second per cpu interrupt?  (Does this
> sched_timer interrupt each cpu once per second?)
> 
>>>> As i suggested in my previous mail about this topic, a low-frequency 
>>>> sampling method should be used instead, to indicate system status. I 
>>>> thought the leds drivers have all that in place already.
>>> It is low frequency (once per second), this is just setting what's to
>>> be sampled.
>>>
>>> As I mentioned, this is not for LED displays (human readable), it's for the
>>> system controller to monitor how all parts of the system are running, and
>>> this one is just the cpu parts.  The LED driver approach would have me
>>> registering 4096 led devices, with all their callbacks, 4096 strings saying
>>> "LED0001", etc., and I still cannot associate a specific register bit
>>> (AKA LED if that's what it was), with a specific cpu using the LED driver.
>>>
>>> The LED driver is fine for a couple of blinking lights indicating overall
>>> system activity, disk activity, etc.  (Btw, I did not see a network trigger,
>>> or a paging trigger, or an out of memory trigger, or some other things that
>>> might be useful for real time monitoring of the system.)
>> ...so add them...
>>
>>> But the LED driver has way more overhead than needed for this simple application.
>>>
>> So overhead from led driver is not okay, while overhead from messing
>> with idle loop is okay? Interesting...
>> 								Pavel
> 
> The overhead is mainly the registration of descriptor blocks for the
> 4096 registers representing the 4096 cpus all at separate addresses.
> The overhead in this patch for maintaining the "idle" state (prior to the
> timer interrupt causing "exit_idle") is storing a byte and subtracting the
> current jiffies from the jiffies at the last one second timer interrupt.
> (Even this subtraction can be removed, the only *important* item is
> whether the cpu is currently idle or not.)

Actually, this comparing idle time vs. not idle time during the last
second is what gets around the problem that the system goes to not
idle servicing the timer interrupt, which hides the real idle state.
If anyone has a suggestion on how to get a once per second per cpu
timer callback which does not call exit_idle, (or any other means of
indicating whether the cpu is idle), I'd be more than happy to remove
the idle callback function.

In discussions with SGI's RAS engineering it's felt that this status
is very important for their current RAS analysis programs, making the
system overhead for UV more than worthwhile.

Thanks,
Mike

> 
> This data is written to node local memory that's highly likely to be in
> the cache, as the same memory block is used for all UV hub operations.
> 
> Unfortunately, I am experiencing a simulator problem at the moment or
> I'd be able to quantify the exact amount of time added to the exit_idle()
> function, but it's basically noise in the overall resumption of a thread.
> 
> One other factor, this overhead is *only* for UV systems, no other x86_64
> systems or architectures are affected, so again I'm not understanding the
> objection.  This request was made from our hardware and RAS engineers,
> and is identical to what's been in the ia64 kernel for a few years now.
> 
> Perhaps the confusion is it's near relationship to real "LED" lights?
> The original name "LED" is historical.  The bits are read by a system
> controller that has the job of monitoring the entire system, including
> both soft and hard errors and determining faulty [or near faulty]
> system components.  For example, if a node suddenly hangs, this is a
> one of the diagnostic aids used in determining the state of that node.
> (Btw, the SCIR register that is written to once per second is a FIFO so
> it contains the last 64 updates of this register giving a temporal view
> of each cpu as well.)
> 
> Thanks,
> Mike


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-09-28 19:42       ` Pavel Machek
@ 2008-10-01 18:15         ` Mike Travis
  2008-10-01 19:41           ` Mike Travis
  2008-10-02  8:37           ` Pavel Machek
  0 siblings, 2 replies; 26+ messages in thread
From: Mike Travis @ 2008-10-01 18:15 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Ingo Molnar, Andrew Morton, rpurdie, Jack Steiner, linux-kernel,
	Thomas Gleixner, H. Peter Anvin

Pavel Machek wrote:
>> Another relevant point is that I will be adding a bit more functionality
>> to disable the timer interrupt on truly "idle" cpus (like have been idle
>> for some amount of seconds).  We would then use the "exit from idle"
>> callback to reestablish the timer interrupt.  [This would allow them to
>> enter power down states if appropriate.]
> 
> Should you look at nohz instead of reinventing it? 

Thanks, I did look at it.  Quite complex.  Maybe I'm missing something
but I don't see how it fits in?  Are you saying I should be using data
in the percpu tick_sched to gather the idle information for the once
per second per cpu status update interrupt?  I see the @idle_active
entry but wouldn't this always be false during the timer interrupt?
Using any other entries would appear to be more complex than a simple
store byte and subtracting two longs.

Or perhaps I should somehow hook into the sched_timer interrupt instead
of having a separate once per second per cpu interrupt?  (Does this
sched_timer interrupt each cpu once per second?)

> 
>>> As i suggested in my previous mail about this topic, a low-frequency 
>>> sampling method should be used instead, to indicate system status. I 
>>> thought the leds drivers have all that in place already.
>> It is low frequency (once per second), this is just setting what's to
>> be sampled.
>>
>> As I mentioned, this is not for LED displays (human readable), it's for the
>> system controller to monitor how all parts of the system are running, and
>> this one is just the cpu parts.  The LED driver approach would have me
>> registering 4096 led devices, with all their callbacks, 4096 strings saying
>> "LED0001", etc., and I still cannot associate a specific register bit
>> (AKA LED if that's what it was), with a specific cpu using the LED driver.
>>
>> The LED driver is fine for a couple of blinking lights indicating overall
>> system activity, disk activity, etc.  (Btw, I did not see a network trigger,
>> or a paging trigger, or an out of memory trigger, or some other things that
>> might be useful for real time monitoring of the system.)
> 
> ...so add them...
> 
>> But the LED driver has way more overhead than needed for this simple application.
>>
> 
> So overhead from led driver is not okay, while overhead from messing
> with idle loop is okay? Interesting...
> 								Pavel

The overhead is mainly the registration of descriptor blocks for the
4096 registers representing the 4096 cpus all at separate addresses.
The overhead in this patch for maintaining the "idle" state (prior to the
timer interrupt causing "exit_idle") is storing a byte and subtracting the
current jiffies from the jiffies at the last one second timer interrupt.
(Even this subtraction can be removed, the only *important* item is
whether the cpu is currently idle or not.)

This data is written to node local memory that's highly likely to be in
the cache, as the same memory block is used for all UV hub operations.

Unfortunately, I am experiencing a simulator problem at the moment or
I'd be able to quantify the exact amount of time added to the exit_idle()
function, but it's basically noise in the overall resumption of a thread.

One other factor, this overhead is *only* for UV systems, no other x86_64
systems or architectures are affected, so again I'm not understanding the
objection.  This request was made from our hardware and RAS engineers,
and is identical to what's been in the ia64 kernel for a few years now.

Perhaps the confusion is it's near relationship to real "LED" lights?
The original name "LED" is historical.  The bits are read by a system
controller that has the job of monitoring the entire system, including
both soft and hard errors and determining faulty [or near faulty]
system components.  For example, if a node suddenly hangs, this is a
one of the diagnostic aids used in determining the state of that node.
(Btw, the SCIR register that is written to once per second is a FIFO so
it contains the last 64 updates of this register giving a temporal view
of each cpu as well.)

Thanks,
Mike

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-09-22 14:42     ` Mike Travis
@ 2008-09-28 19:42       ` Pavel Machek
  2008-10-01 18:15         ` Mike Travis
  0 siblings, 1 reply; 26+ messages in thread
From: Pavel Machek @ 2008-09-28 19:42 UTC (permalink / raw)
  To: Mike Travis
  Cc: Ingo Molnar, Andrew Morton, rpurdie, Jack Steiner, linux-kernel,
	Thomas Gleixner, H. Peter Anvin


> Another relevant point is that I will be adding a bit more functionality
> to disable the timer interrupt on truly "idle" cpus (like have been idle
> for some amount of seconds).  We would then use the "exit from idle"
> callback to reestablish the timer interrupt.  [This would allow them to
> enter power down states if appropriate.]

Should you look at nohz instead of reinventing it? 

> > As i suggested in my previous mail about this topic, a low-frequency 
> > sampling method should be used instead, to indicate system status. I 
> > thought the leds drivers have all that in place already.
> 
> It is low frequency (once per second), this is just setting what's to
> be sampled.
> 
> As I mentioned, this is not for LED displays (human readable), it's for the
> system controller to monitor how all parts of the system are running, and
> this one is just the cpu parts.  The LED driver approach would have me
> registering 4096 led devices, with all their callbacks, 4096 strings saying
> "LED0001", etc., and I still cannot associate a specific register bit
> (AKA LED if that's what it was), with a specific cpu using the LED driver.
> 
> The LED driver is fine for a couple of blinking lights indicating overall
> system activity, disk activity, etc.  (Btw, I did not see a network trigger,
> or a paging trigger, or an out of memory trigger, or some other things that
> might be useful for real time monitoring of the system.)

...so add them...

> But the LED driver has way more overhead than needed for this simple application.
> 

So overhead from led driver is not okay, while overhead from messing
with idle loop is okay? Interesting...
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-09-22 11:48   ` Ingo Molnar
  2008-09-22 14:42     ` Mike Travis
@ 2008-09-22 14:47     ` Mike Travis
  1 sibling, 0 replies; 26+ messages in thread
From: Mike Travis @ 2008-09-22 14:47 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, rpurdie, Jack Steiner, linux-kernel,
	Thomas Gleixner, H. Peter Anvin

Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
> 
>>  arch/x86/kernel/genx2apic_uv_x.c |  138 +++++++++++++++++++++++++++++++++++++++
>>  include/asm-x86/uv/uv_hub.h      |   62 +++++++++++++++++
> 
> hm, why is this in genx2apic_uv_x.c, not a separate file?

This keeps it in the uv_hub percpu area, a chunk of memory that is accessed
quite often for many of the basic UV operations.  This should increase the
probability that the data is already located in the cpu's cache.

Thanks,
Mike


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-09-22 11:48   ` Ingo Molnar
@ 2008-09-22 14:42     ` Mike Travis
  2008-09-28 19:42       ` Pavel Machek
  2008-09-22 14:47     ` Mike Travis
  1 sibling, 1 reply; 26+ messages in thread
From: Mike Travis @ 2008-09-22 14:42 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, rpurdie, Jack Steiner, linux-kernel,
	Thomas Gleixner, H. Peter Anvin

Ingo Molnar wrote:
> * Mike Travis <travis@sgi.com> wrote:
> 
>>  arch/x86/kernel/genx2apic_uv_x.c |  138 +++++++++++++++++++++++++++++++++++++++
>>  include/asm-x86/uv/uv_hub.h      |   62 +++++++++++++++++
> 
> hm, why is this in genx2apic_uv_x.c, not a separate file?
> 
>> +	idle_notifier_register(&uv_idle_notifier);
> 
> no, we really dont want such overhead in an idle notifier. (those 
> interfaces will go away)

Hi Ingo,

The overhead is very low and I've not been able to figure out how to 
"estimate" per cpu usage without tons more code (duplicating "top" in
the kernel.)  The actual I/O (well O ;-) is once per second.  The other
important point is this is only on an SGI UV system, so no other systems
are affected.  Because of this, I'm a bit confused by your objection.

I tried a couple of other approaches, but because the timer wakeup
causes the idle state to be exited, it's difficult to determine if
the cpu was idle before the timer interrupt.  (I even tried putting a
"wasidle" in the pda to check the idle state prior to the last exit
from idle, but this did not appear to be reliable.)  The idle callback
solves this rather nicely.

Here's the same code that's been in ia64 for quite a while:

arch/ia64/sn/kernel/setup.c:
	void __init sn_setup(char **cmdline_p)
	{
		...
		ia64_mark_idle = &snidle;

arch/ia64/kernel/process.c:
	cpu_idle (void)
	{
        	void (*mark_idle)(int) = ia64_mark_idle;
		...

                if (mark_idle)
                        (*mark_idle)(1);

(The x86 callback approach is much cleaner.)

Another relevant point is that I will be adding a bit more functionality
to disable the timer interrupt on truly "idle" cpus (like have been idle
for some amount of seconds).  We would then use the "exit from idle"
callback to reestablish the timer interrupt.  [This would allow them to
enter power down states if appropriate.]

> 
> As i suggested in my previous mail about this topic, a low-frequency 
> sampling method should be used instead, to indicate system status. I 
> thought the leds drivers have all that in place already.

It is low frequency (once per second), this is just setting what's to
be sampled.

As I mentioned, this is not for LED displays (human readable), it's for the
system controller to monitor how all parts of the system are running, and
this one is just the cpu parts.  The LED driver approach would have me
registering 4096 led devices, with all their callbacks, 4096 strings saying
"LED0001", etc., and I still cannot associate a specific register bit
(AKA LED if that's what it was), with a specific cpu using the LED driver.

The LED driver is fine for a couple of blinking lights indicating overall
system activity, disk activity, etc.  (Btw, I did not see a network trigger,
or a paging trigger, or an out of memory trigger, or some other things that
might be useful for real time monitoring of the system.)

But the LED driver has way more overhead than needed for this simple application.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-09-19 14:37 ` [PATCH 1/1] " Mike Travis
@ 2008-09-22 11:48   ` Ingo Molnar
  2008-09-22 14:42     ` Mike Travis
  2008-09-22 14:47     ` Mike Travis
  0 siblings, 2 replies; 26+ messages in thread
From: Ingo Molnar @ 2008-09-22 11:48 UTC (permalink / raw)
  To: Mike Travis
  Cc: Andrew Morton, rpurdie, Jack Steiner, linux-kernel,
	Thomas Gleixner, H. Peter Anvin


* Mike Travis <travis@sgi.com> wrote:

>  arch/x86/kernel/genx2apic_uv_x.c |  138 +++++++++++++++++++++++++++++++++++++++
>  include/asm-x86/uv/uv_hub.h      |   62 +++++++++++++++++

hm, why is this in genx2apic_uv_x.c, not a separate file?

> +	idle_notifier_register(&uv_idle_notifier);

no, we really dont want such overhead in an idle notifier. (those 
interfaces will go away)

As i suggested in my previous mail about this topic, a low-frequency 
sampling method should be used instead, to indicate system status. I 
thought the leds drivers have all that in place already.

	Ingo

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver
  2008-09-19 14:37 [PATCH 0/1] " Mike Travis
@ 2008-09-19 14:37 ` Mike Travis
  2008-09-22 11:48   ` Ingo Molnar
  0 siblings, 1 reply; 26+ messages in thread
From: Mike Travis @ 2008-09-19 14:37 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, rpurdie, Jack Steiner, linux-kernel

[-- Attachment #1: uv_led_support --]
[-- Type: text/plain, Size: 8473 bytes --]

The SGI UV system has no LEDS but uses one of the system controller
regs to indicate the online internal state of the cpu.  There is a
heartbeat bit indicating that the cpu is responding to interrupts,
and an idle bit indicating whether the cpu has been more or less than
50% idle each heartbeat period.  The current period is one second.

When a cpu panics, an error code is written by BIOS to this same reg.

So the reg has been renamed the "System Controller Interface Reg".

This patchset provides the following:

  * x86_64: Add base functionality for writing to the specific SCIR's
    for each cpu.

  * idle: Add an idle callback to measure the idle "on" and "off" times.

  * heartbeat: Invert "heartbeat" bit to indicate the cpu is "active".

  * if hotplug enabled, all bits are set (0xff) when the cpu is disabled.

Based on linux-2.6.tip/master.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/kernel/genx2apic_uv_x.c |  138 +++++++++++++++++++++++++++++++++++++++
 include/asm-x86/uv/uv_hub.h      |   62 +++++++++++++++++
 2 files changed, 200 insertions(+)

--- linux-2.6.tip.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ linux-2.6.tip/arch/x86/kernel/genx2apic_uv_x.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/threads.h>
+#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
@@ -18,6 +19,8 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
+#include <linux/timer.h>
+#include <asm/idle.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
@@ -358,6 +361,139 @@ static __init void uv_rtc_init(void)
 		sn_rtc_cycles_per_second = ticks_per_sec;
 }
 
+/*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+	struct timer_list *timer = &uv_hub_info->scir.timer;
+	unsigned char bits = uv_hub_info->scir.state;
+
+	/* flip heartbeat bit */
+	bits ^= SCIR_CPU_HEARTBEAT;
+
+	/* determine if we were mostly idle or not */
+	if (uv_hub_info->scir.idle_off && uv_hub_info->scir.idle_on) {
+		if (uv_hub_info->scir.idle_off > uv_hub_info->scir.idle_on)
+			bits |= SCIR_CPU_ACTIVITY;
+		else
+			bits &= ~SCIR_CPU_ACTIVITY;
+	}
+
+	/* reset idle counters */
+	uv_hub_info->scir.idle_on = 0;
+	uv_hub_info->scir.idle_off = 0;
+
+	/* update system controller interface reg */
+	uv_set_scir_bits(bits);
+
+	/* enable next timer period */
+	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static int uv_idle(struct notifier_block *nfb, unsigned long action, void *junk)
+{
+	unsigned long elapsed = jiffies - uv_hub_info->scir.last;
+
+	/*
+	 * update activity to indicate current state,
+	 * measure time since last change
+	 */
+	if (action == IDLE_START) {
+
+		uv_hub_info->scir.state &= ~SCIR_CPU_ACTIVITY;
+		uv_hub_info->scir.idle_on += elapsed;
+		uv_hub_info->scir.last = jiffies;
+
+	} else if (action == IDLE_END) {
+
+		uv_hub_info->scir.state |= SCIR_CPU_ACTIVITY;
+		uv_hub_info->scir.idle_off += elapsed;
+		uv_hub_info->scir.last = jiffies;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block uv_idle_notifier = {
+	.notifier_call = uv_idle,
+};
+
+static void __cpuinit uv_heartbeat_enable(int cpu)
+{
+	if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+		struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+		setup_timer(timer, uv_heartbeat, cpu);
+		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+		add_timer_on(timer, cpu);
+		uv_cpu_hub_info(cpu)->scir.enabled = 1;
+	}
+
+	/* check boot cpu */
+	if (!uv_cpu_hub_info(0)->scir.enabled)
+		uv_heartbeat_enable(0);
+}
+
+static void __cpuinit uv_heartbeat_disable(int cpu)
+{
+	if (uv_cpu_hub_info(cpu)->scir.enabled) {
+		uv_cpu_hub_info(cpu)->scir.enabled = 0;
+		del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+	}
+	uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * cpu hotplug notifier
+ */
+static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+				       unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		uv_heartbeat_enable(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		uv_heartbeat_disable(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+	hotcpu_notifier(uv_scir_cpu_notify, 0);
+	idle_notifier_register(&uv_idle_notifier);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+	idle_notifier_register(&uv_idle_notifier);
+}
+
+static __init int uv_init_heartbeat(void)
+{
+	int cpu;
+
+	if (is_uv_system())
+		for_each_online_cpu(cpu)
+			uv_heartbeat_enable(cpu);
+	return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
 static bool uv_system_inited;
 
 void __init uv_system_init(void)
@@ -436,6 +572,7 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
 		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
 		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
 		uv_node_to_blade[nid] = blade;
 		uv_cpu_to_blade[cpu] = blade;
 		max_pnode = max(pnode, max_pnode);
@@ -450,6 +587,7 @@ void __init uv_system_init(void)
 	map_mmr_high(max_pnode);
 	map_config_high(max_pnode);
 	map_mmioh_high(max_pnode);
+	uv_scir_register_cpu_notifier();
 	uv_system_inited = true;
 }
 
--- linux-2.6.tip.orig/include/asm-x86/uv/uv_hub.h
+++ linux-2.6.tip/include/asm-x86/uv/uv_hub.h
@@ -112,6 +112,16 @@
  */
 #define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_NODES * 2)
 
+struct uv_scir_s {
+	struct timer_list timer;
+	unsigned long	offset;
+	unsigned long	last;
+	unsigned long	idle_on;
+	unsigned long	idle_off;
+	unsigned char	state;
+	unsigned char	enabled;
+};
+
 /*
  * The following defines attributes of the HUB chip. These attributes are
  * frequently referenced and are kept in the per-cpu data areas of each cpu.
@@ -130,7 +140,9 @@ struct uv_hub_info_s {
 	unsigned char	blade_processor_id;
 	unsigned char	m_val;
 	unsigned char	n_val;
+	struct uv_scir_s scir;
 };
+
 DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 #define uv_hub_info 		(&__get_cpu_var(__uv_hub_info))
 #define uv_cpu_hub_info(cpu)	(&per_cpu(__uv_hub_info, cpu))
@@ -162,6 +174,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __
 
 #define UV_APIC_PNODE_SHIFT	6
 
+/* Local Bus from cpu's perspective */
+#define LOCAL_BUS_BASE		0x1c00000
+#define LOCAL_BUS_SIZE		(4 * 1024 * 1024)
+
+/*
+ * System Controller Interface Reg
+ *
+ * Note there are NO leds on a UV system.  This register is only
+ * used by the system controller to monitor system-wide operation.
+ * There are 64 regs per node.  With Nahelem cpus (8 cpus per core,
+ * 2 threads per cpu, 2 cpus per node) there are 32 cpu threads on
+ * a node.
+ *
+ * The window is located at top of ACPI MMR space
+ */
+#define SCIR_WINDOW_COUNT	64
+#define SCIR_LOCAL_MMR_BASE	(LOCAL_BUS_BASE + \
+				 LOCAL_BUS_SIZE - \
+				 SCIR_WINDOW_COUNT)
+
+#define SCIR_CPU_HEARTBEAT	0x01	/* timer interrupt */
+#define SCIR_CPU_ACTIVITY	0x02	/* not idle */
+#define SCIR_CPU_HB_INTERVAL	(HZ)	/* once per second */
+
 /*
  * Macros for converting between kernel virtual addresses, socket local physical
  * addresses, and UV global physical addresses.
@@ -276,6 +312,16 @@ static inline void uv_write_local_mmr(un
 	*uv_local_mmr_address(offset) = val;
 }
 
+static inline unsigned char uv_read_local_mmr8(unsigned long offset)
+{
+	return *((unsigned char *)uv_local_mmr_address(offset));
+}
+
+static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
+{
+	*((unsigned char *)uv_local_mmr_address(offset)) = val;
+}
+
 /*
  * Structures and definitions for converting between cpu, node, pnode, and blade
  * numbers.
@@ -350,5 +396,21 @@ static inline int uv_num_possible_blades
 	return uv_possible_blades;
 }
 
+/* Update SCIR state */
+static inline void uv_set_scir_bits(unsigned char value)
+{
+	if (uv_hub_info->scir.state != value) {
+		uv_hub_info->scir.state = value;
+		uv_write_local_mmr8(uv_hub_info->scir.offset, value);
+	}
+}
+static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
+{
+	if (uv_cpu_hub_info(cpu)->scir.state != value) {
+		uv_cpu_hub_info(cpu)->scir.state = value;
+		uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value);
+	}
+}
+
 #endif /* ASM_X86__UV__UV_HUB_H */
 


^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2008-10-27 18:45 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-10-24 11:19 [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver Mike Travis
2008-10-24 12:01 ` Pavel Machek
2008-10-24 12:05   ` Ingo Molnar
2008-10-24 12:27     ` Mike Travis
2008-10-24 18:12       ` Andi Kleen
2008-10-24 22:18         ` Mike Travis
2008-10-24 22:24           ` Mike Travis
2008-10-27 11:42             ` Ingo Molnar
2008-10-27 14:38               ` Mike Travis
2008-10-27 14:51               ` [PATCH 1/1] SGI x86 UV: Update SCIR driver to use idle_cpu function Mike Travis
2008-10-27 18:06                 ` Ingo Molnar
2008-10-27 18:46                   ` [PATCH 1/1] SGI x86 UV: Use raw_smp_processor_id Mike Travis
2008-10-25  6:56           ` [PATCH 1/1] SGI X86 UV: Provide a System Activity Indicator driver Andi Kleen
2008-10-27 15:12             ` Mike Travis
2008-10-27 11:36           ` Ingo Molnar
2008-10-27 11:43       ` Ingo Molnar
2008-10-24 12:14   ` Mike Travis
  -- strict thread matches above, loose matches on Subject: below --
2008-09-19 14:37 [PATCH 0/1] " Mike Travis
2008-09-19 14:37 ` [PATCH 1/1] " Mike Travis
2008-09-22 11:48   ` Ingo Molnar
2008-09-22 14:42     ` Mike Travis
2008-09-28 19:42       ` Pavel Machek
2008-10-01 18:15         ` Mike Travis
2008-10-01 19:41           ` Mike Travis
2008-10-02  8:37           ` Pavel Machek
2008-10-02 14:33             ` Mike Travis
2008-09-22 14:47     ` Mike Travis

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).