All of lore.kernel.org
 help / color / mirror / Atom feed
* [Linux-ia64] SAL error record logging/decoding
@ 2003-05-07 23:41 Bjorn Helgaas
  2003-05-08  0:05 ` David Mosberger
                   ` (21 more replies)
  0 siblings, 22 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-07 23:41 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: text/plain, Size: 2001 bytes --]

The MCA/INIT/CMC/CPE log decoding currently in arch/ia64/kernel/mca.c
has some problems:

	- It doesn't know much about OEM-specific sections.
	- At boot-time, it sometimes takes so long to print
	  the log to the console that the BSP erroneously
	  assumes an AP is stuck.  This sometimes causes
	  *another* MCA.
	- The log goes ONLY to the console, where the output
	  may be lost.

So here's some fodder for discussion.  I don't claim that this is ready
for prime time; I just want to get some feedback on whether this
is a reasonable approach.

The attached patch (against 2.4.21-rc1) makes the raw, binary
error records straight from SAL available via files in /proc:

	/proc/sal/cpu<n>/{mca,init,cmc,cpe}

If you read the file, you get the raw data.  If you write "clear" to
it, you invalidate the current error record (which as I read the spec,
may potentially make another, pending record available to be read).

The idea is that

	- An rc script run at boot-time can save all the logs in
	  files, clearing each afterwards.
	- A user-level analysis tool can decode them as needed
	  (perhaps also run from the same rc script above).
	- The user-level analyzer need not be open-source, if
	  people are worried about IP in the OEM-specific sections.
	- A baseline open-source analyzer can provide at least the
	  functionality available today in the kernel decoder.

So, attached are the kernel patch against 2.4.21-rc1 and a simple
user program ("salinfo") to decode the logs.  Note that the kernel
patch removes the SAL clear_state_info calls from mca.c, so the error
records will be preserved until the user program can read them.
This feels like the right thing to me (only a user program
can know that the logs have been saved somewhere safe), but
no doubt there are issues here.

The user-space analyzer is derived from the current kernel code
in mca.c and should produce identical output.  For now, I left
all the code in the kernel as well, but ultimately it could be
removed.

Bjorn

[-- Attachment #2: diffs --]
[-- Type: text/x-diff, Size: 6379 bytes --]

===== arch/ia64/kernel/mca.c 1.23 vs edited =====
--- 1.23/arch/ia64/kernel/mca.c	Fri Apr 18 04:07:09 2003
+++ edited/arch/ia64/kernel/mca.c	Fri May  2 11:24:15 2003
@@ -156,10 +156,6 @@
 	 */
 
 	platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk);
-	/* temporary: only clear SAL logs on hardware-corrected errors
-		or if we're logging an error after an MCA-initiated reboot */
-	if ((sal_info_type > 1) || (called_from_init))
-		ia64_sal_clear_state_info(sal_info_type);
 
 	return platform_err;
 }
@@ -1235,9 +1231,6 @@
 	proc_ptr = &plog_ptr->proc_err;
 
 	ia64_process_min_state_save(&SAL_LPI_PSI_INFO(proc_ptr)->min_state_area);
-
-	/* Clear the INIT SAL logs now that they have been saved in the OS buffer */
-	ia64_sal_clear_state_info(SAL_INFO_TYPE_INIT);
 
 	init_handler_platform(proc_ptr, pt, sw);	/* call platform specific routines */
 }
===== arch/ia64/kernel/salinfo.c 1.1 vs edited =====
--- 1.1/arch/ia64/kernel/salinfo.c	Thu Sep 12 10:43:47 2002
+++ edited/arch/ia64/kernel/salinfo.c	Tue May  6 14:53:28 2003
@@ -4,6 +4,8 @@
  * Creates entries in /proc/sal for various system features.
  *
  * Copyright (c) 2001 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2003 Hewlett-Packard Co
+ *	Bjorn Helgaas <bjorn_helgaas@hp.com>
  *
  * 10/30/2001	jbarnes@sgi.com		copied much of Stephane's palinfo
  *					code to create this file
@@ -12,8 +14,10 @@
 #include <linux/types.h>
 #include <linux/proc_fs.h>
 #include <linux/module.h>
+#include <linux/smp.h>
 
 #include <asm/sal.h>
+#include <asm/uaccess.h>
 
 MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
 MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
@@ -40,25 +44,191 @@
 
 #define NR_SALINFO_ENTRIES (sizeof(salinfo_entries)/sizeof(salinfo_entry_t))
 
-/*
- * One for each feature and one more for the directory entry...
- */
-static struct proc_dir_entry *salinfo_proc_entries[NR_SALINFO_ENTRIES + 1];
+static char *salinfo_log_name[] = {
+	"mca",
+	"init",
+	"cmc",
+	"cpe",
+};
+
+static struct proc_dir_entry *salinfo_proc_entries[
+	ARRAY_SIZE(salinfo_entries) +			/* /proc/sal/bus_lock */
+	(NR_CPUS * ARRAY_SIZE(salinfo_log_name)) +	/* /proc/sal/cpu0/mca */
+	NR_CPUS +					/* /proc/sal/cpu0 */
+	1];						/* /proc/sal */
+
+struct salinfo_log_data {
+	int	type;
+	u8	*log_buffer;
+	u64	log_size;
+};
+
+static void
+salinfo_log_read_cpu(void *context)
+{
+	struct salinfo_log_data *info = context;
+	u64 size;
+
+	size = ia64_sal_get_state_info_size(info->type);
+	info->log_buffer = kmalloc(size, GFP_ATOMIC);
+	if (!info->log_buffer)
+		return;
+
+	info->log_size = ia64_sal_get_state_info(info->type, (u64 *) info->log_buffer);
+}
+
+static ssize_t
+salinfo_log_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_log_data info;
+	int cpu, ret;
+	void *data;
+	size_t size;
+
+	if (!suser())
+		return -EPERM;
+
+	MOD_INC_USE_COUNT;
+
+	cpu = (u64) entry->data >> 16;
+	info.type = (u64) entry->data & 0xffff;
+
+	if (cpu == smp_processor_id())
+		salinfo_log_read_cpu(&info);
+	else {
+#ifdef CONFIG_SMP
+		smp_call_function_single(cpu, salinfo_log_read_cpu, &info, 0, 1);
+#else
+		printk(KERN_ERR "%s: trying to read CPU %d data from %d\n",
+			__FUNCTION__, cpu, smp_processor_id());
+		info.log_buffer = 0;
+#endif
+	}
+
+	if (!info.log_buffer || *ppos >= info.log_size) {
+		ret = 0;
+		goto out;
+	}
+
+	data = info.log_buffer + file->f_pos;
+	size = info.log_size - file->f_pos;
+	if (size > count)
+		size = count;
+
+	if (copy_to_user(buffer, data, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	*ppos += size;
+	ret = size;
+
+out:
+	kfree(info.log_buffer);
+
+	MOD_DEC_USE_COUNT;
+
+	return ret;
+}
+
+static void
+salinfo_log_write_cpu(void *context)
+{
+	u64 type = (u64) context;
+
+	ia64_sal_clear_state_info(type);
+}
+
+static ssize_t
+salinfo_log_write(struct file *file, const char *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	char cmd[16];
+	int cpu;
+	u64 type;
+
+	if (!suser())
+		return -EPERM;
+
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+
+	memset(cmd, 0, sizeof(cmd));
+	if (copy_from_user(cmd, buffer, sizeof(cmd)))
+		return -EFAULT;
+
+	if (strncmp(cmd, "clear", 5))
+		return count;
+
+	MOD_INC_USE_COUNT;
+
+	cpu = (u64) entry->data >> 16;
+	type = (u64) entry->data & 0xffff;
+
+	if (cpu == smp_processor_id())
+		salinfo_log_write_cpu((void *) type);
+	else {
+#ifdef CONFIG_SMP
+		smp_call_function_single(cpu, salinfo_log_write_cpu, (void *) type, 0, 1);
+#else
+		printk(KERN_ERR "%s: trying to clear CPU %d data from %d\n",
+			__FUNCTION__, cpu, smp_processor_id());
+#endif
+	}
+
+	MOD_DEC_USE_COUNT;
+
+	return count;
+}
+
+static struct file_operations salinfo_log_fops = {
+	.read  = salinfo_log_read,
+	.write = salinfo_log_write,
+};
 
 static int __init
 salinfo_init(void)
 {
 	struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
 	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
-	int i;
+	struct proc_dir_entry *cpu_dir, *entry;
+#define CPUSTR "cpu%d"
+	char name[sizeof(CPUSTR)];
+	int i, j;
 
 	salinfo_dir = proc_mkdir("sal", NULL);
+	if (!salinfo_dir)
+		return 0;
 
 	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
 		/* pass the feature bit in question as misc data */
 		*sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
 						  salinfo_read, (void *)salinfo_entries[i].feature);
 	}
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+
+		sprintf(name, CPUSTR, i);
+		cpu_dir = proc_mkdir(name, salinfo_dir);
+		if (!cpu_dir)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(salinfo_log_name); j++) {
+			entry = create_proc_entry(salinfo_log_name[j], 0, cpu_dir);
+			if (entry) {
+				entry->proc_fops = &salinfo_log_fops;
+				entry->data = (void *) ((u64) i << 16 | j);
+				*sdir++ = entry;
+			}
+		}
+		*sdir++ = cpu_dir;
+	}
+
 	*sdir++ = salinfo_dir;
 
 	return 0;
@@ -69,7 +239,7 @@
 {
 	int i = 0;
 
-	for (i = 0; i < NR_SALINFO_ENTRIES ; i++) {
+	for (i = 0; i < ARRAY_SIZE(salinfo_proc_entries); i++) {
 		if (salinfo_proc_entries[i])
 			remove_proc_entry (salinfo_proc_entries[i]->name, NULL);
 	}

[-- Attachment #3: salinfo.tar.gz --]
[-- Type: application/x-tgz, Size: 32027 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
@ 2003-05-08  0:05 ` David Mosberger
  2003-05-08  0:13 ` Luck, Tony
                   ` (20 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: David Mosberger @ 2003-05-08  0:05 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Wed, 7 May 2003 17:41:08 -0600, Bjorn Helgaas <bjorn_helgaas@hp.com> said:

  Bjorn> The attached patch (against 2.4.21-rc1) makes the raw, binary
  Bjorn> error records straight from SAL available via files in /proc:

  Bjorn> 	/proc/sal/cpu<n>/{mca,init,cmc,cpe}

  Bjorn> If you read the file, you get the raw data.  If you write
  Bjorn> "clear" to it, you invalidate the current error record (which
  Bjorn> as I read the spec, may potentially make another, pending
  Bjorn> record available to be read).

Looks excellent to me, except: wouldn't you want to make this a
filesystem instead?  (Do I sound like Al Viro? ;-)

So instead of /proc/sal you'd do:

	mount -t salfs dummy /wherever

and then you'd get cpu<n>/{mca,init,cmc,cpe} in /wherever.  As of
2.5.67, we have:

	$ cat /proc/filesystems |grep nodev
	nodev   sysfs
	nodev   rootfs
	nodev   bdev
	nodev   proc
	nodev   sockfs
	nodev   usbfs
	nodev   usbdevfs
	nodev   futexfs
	nodev   tmpfs
	nodev   pipefs
	nodev   eventpollfs
	nodev   binfmt_misc
	nodev   devpts
	nodev   ramfs
	nodev   nfs
	nodev   nfs4
	nodev   nfsd
	nodev   autofs
	nodev   rpc_pipefs

So clearly there is a precedent.  Plus it would avoid all the
inefficies of /proc.

	--david


^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
  2003-05-08  0:05 ` David Mosberger
@ 2003-05-08  0:13 ` Luck, Tony
  2003-05-08 19:32 ` Bjorn Helgaas
                   ` (19 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Luck, Tony @ 2003-05-08  0:13 UTC (permalink / raw)
  To: linux-ia64

> From: Bjorn Helgaas [mailto:bjorn_helgaas@hp.com] 
> 
> The MCA/INIT/CMC/CPE log decoding currently in arch/ia64/kernel/mca.c
> has some problems:
> 
> 	- It doesn't know much about OEM-specific sections.
> 	- At boot-time, it sometimes takes so long to print
> 	  the log to the console that the BSP erroneously
> 	  assumes an AP is stuck.  This sometimes causes
> 	  *another* MCA.
> 	- The log goes ONLY to the console, where the output
> 	  may be lost.
> 
> So here's some fodder for discussion.  I don't claim that 
> this is ready
> for prime time; I just want to get some feedback on whether this
> is a reasonable approach.
> 
> The attached patch (against 2.4.21-rc1) makes the raw, binary
> error records straight from SAL available via files in /proc:
> 
> 	/proc/sal/cpu<n>/{mca,init,cmc,cpe}
> 
> If you read the file, you get the raw data.  If you write "clear" to
> it, you invalidate the current error record (which as I read the spec,
> may potentially make another, pending record available to be read).
> 
> The idea is that
> 
> 	- An rc script run at boot-time can save all the logs in
> 	  files, clearing each afterwards.
> 	- A user-level analysis tool can decode them as needed
> 	  (perhaps also run from the same rc script above).
> 	- The user-level analyzer need not be open-source, if
> 	  people are worried about IP in the OEM-specific sections.
> 	- A baseline open-source analyzer can provide at least the
> 	  functionality available today in the kernel decoder.
> 
> So, attached are the kernel patch against 2.4.21-rc1 and a simple
> user program ("salinfo") to decode the logs.  Note that the kernel
> patch removes the SAL clear_state_info calls from mca.c, so the error
> records will be preserved until the user program can read them.
> This feels like the right thing to me (only a user program
> can know that the logs have been saved somewhere safe), but
> no doubt there are issues here.
> 
> The user-space analyzer is derived from the current kernel code
> in mca.c and should produce identical output.  For now, I left
> all the code in the kernel as well, but ultimately it could be
> removed.

Definitely a step in the right direction.  SAL error records are
much too big, ugly and verbose to have them run through "printk"
to the console. Parsing in userland is great too.

I've also hit some issues with MCA recovery where printing the
error information from within the MCA handler tripped into other
problems (perhaps because of the time taken as you suggest).  So
I've been pondering some such mechanism too.

When to clear record from the SAL error log is a thorny question.
There are two conflicting goals:
1) Making sure that we minimize the chance that we lose error
information ... i.e. we would like to be sure that the error
record was saved to some permanent storage before we clear it

2) We need to clear records from the SAL log as soon as we can to
make space for subsequent records to be logged (and to reveal other
records that are already in the log).

I think that fact that we need to clear a record to see the next one
might force into taking a few risks of losing a message ... which
makes me believe that we need a mechanism to read and delete an error
record from the log and buffer it someplace until it can be picked up
from /proc (rather than using the "clear" command to the /proc
interface that you suggest).

-Tony



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
  2003-05-08  0:05 ` David Mosberger
  2003-05-08  0:13 ` Luck, Tony
@ 2003-05-08 19:32 ` Bjorn Helgaas
  2003-05-20 22:58 ` Bjorn Helgaas
                   ` (18 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-08 19:32 UTC (permalink / raw)
  To: linux-ia64

On Wednesday 07 May 2003 6:13 pm, Luck, Tony wrote:
> When to clear record from the SAL error log is a thorny question.
> There are two conflicting goals:
> 1) Making sure that we minimize the chance that we lose error
> information ... i.e. we would like to be sure that the error
> record was saved to some permanent storage before we clear it
> 
> 2) We need to clear records from the SAL log as soon as we can to
> make space for subsequent records to be logged (and to reveal other
> records that are already in the log).
> 
> I think that fact that we need to clear a record to see the next one
> might force into taking a few risks of losing a message ... which
> makes me believe that we need a mechanism to read and delete an error
> record from the log and buffer it someplace until it can be picked up
> from /proc (rather than using the "clear" command to the /proc
> interface that you suggest).

I actually implemented such a read/buffer/clear mechanism, but
the buffer management makes it much more complicated and I couldn't
see any benefit, based on the following reasoning:

There's always a window between SAL_CHECK (where the error records
are created, consuming buffer space) and SAL_CLEAR_STATE_INFO (where
the buffer space is freed).  Information about events that occur in
that window may be lost, regardless of whether the error records are
cleared by the kernel or by a user application.

I'm unconvinced by the argument that the kernel should call
SAL_CLEAR_STATE_INFO in order to reduce (but not eliminate)
the window.

Here's a likely scenario that shows why I think we have to make
sure the log gets to stable storage before we clear it:

	- MCA occurs
	- Linux reboots
	- Kernel calls SAL_GET_STATE_INFO, copies records to buffer
	- Kernel calls SAL_CLEAR_STATE_INFO
	- Kernel panics because MCA corrupted root filesystem

Now the MCA error records are lost, and it's not even because SAL
ran out of buffer space!  We might argue that for this reason, the
kernel ought to decode the records to the console, but even then
the console output might not be logged, and vital OEM data might
not be decoded at all.

With my proposal, we at least have the possibility of dumping the
error records from the EFI user interface, even if we can no longer
boot the kernel.

Bjorn



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (2 preceding siblings ...)
  2003-05-08 19:32 ` Bjorn Helgaas
@ 2003-05-20 22:58 ` Bjorn Helgaas
  2003-05-21 18:06 ` Luck, Tony
                   ` (17 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-20 22:58 UTC (permalink / raw)
  To: linux-ia64

I'd like to make some sort of forward progress on this issue
before releasing a 2.4.21 ia64 patch.  By default this will
be to apply the original patch I posted on May 7.

I'm not trying to railroad anything here, but 2.4.21 is
getting close, and I'd rather not wait until 2.4.22.  So
if there are any major issues with the proposal, please
raise them!  The only real issue so far has been when
the error records should cleared from the firmware log,
and I'm certainly open to more discussion on that.

As for 2.5, I'm willing to turn it into a filesystem if
that's the Right Thing (though it feels like all the
existing /proc/pal, /proc/sal, and /proc/efi stuff should
be converted at the same time, which is more than I really
had in mind).  In any case, it will take me some time to
learn about how that infrastructure works.

Bjorn



^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (3 preceding siblings ...)
  2003-05-20 22:58 ` Bjorn Helgaas
@ 2003-05-21 18:06 ` Luck, Tony
  2003-05-21 20:48 ` Luck, Tony
                   ` (16 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Luck, Tony @ 2003-05-21 18:06 UTC (permalink / raw)
  To: linux-ia64

> From: Bjorn Helgaas [mailto:bjorn_helgaas@hp.com]
> 
> I'd like to make some sort of forward progress on this issue
> before releasing a 2.4.21 ia64 patch.  By default this will
> be to apply the original patch I posted on May 7.
> 
> I'm not trying to railroad anything here, but 2.4.21 is
> getting close, and I'd rather not wait until 2.4.22.  So
> if there are any major issues with the proposal, please
> raise them!  The only real issue so far has been when
> the error records should cleared from the firmware log,
> and I'm certainly open to more discussion on that.
> 
> As for 2.5, I'm willing to turn it into a filesystem if
> that's the Right Thing (though it feels like all the
> existing /proc/pal, /proc/sal, and /proc/efi stuff should
> be converted at the same time, which is more than I really
> had in mind).  In any case, it will take me some time to
> learn about how that infrastructure works.

My only reservation with this is making sure that we deal
with error records in a rapid manner, so that a sequence
of errors occurring in quick succession have less chance
of overflowing the SAL log.

Is there any easy way to get a notification that there is
a record waiting in one of the /proc/sal/cpuX/* files ...
so that a daemon process can wake promptly to pick up the
error, log it, and issue the "clear" to free space for the
next error?  Does /proc support a poll/select mechanism that
could be used here?  Or perhaps there could be a top level
file /proc/sal/error which blocks on read from the daemon
and doesn't return until there's a record to be read? Maybe
the returned data from the read would be the cpu and error
class to tell the daemon which file to read?

Without some such method, the daemon needs to keep reading
all the /proc/sal/cpuX/* files at regular intervals.  This
doesn't look like it scales well. A hypothetical 256 cpu
machine has 1024 such files that need to be checked, so even
with a poll interval of 10 seconds for each file, we'd need
to issue 102 reads per second from the daemon.

-Tony


^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (4 preceding siblings ...)
  2003-05-21 18:06 ` Luck, Tony
@ 2003-05-21 20:48 ` Luck, Tony
  2003-05-21 21:51 ` Luck, Tony
                   ` (15 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Luck, Tony @ 2003-05-21 20:48 UTC (permalink / raw)
  To: linux-ia64

> I'd like to make some sort of forward progress on this issue
> before releasing a 2.4.21 ia64 patch.  By default this will
> be to apply the original patch I posted on May 7.

A coding nit ...

The #ifdef CONFIG_SMP in the read/write functions seems a little
convoluted, resulting in the weird can't ever happen error message
for the non-SMP case that is trying to read/write from the wrong
cpu.  Why not write each in the form:

#ifdef CONFIG_SMP
        if (cpu == smp_processor_id())
                salinfo_log_read_cpu(&info);
        else
                smp_call_function_single(cpu, salinfo_log_read_cpu, &info, 0, 1);
#else
        salinfo_log_read_cpu(&info);
#endif

-Tony


^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (5 preceding siblings ...)
  2003-05-21 20:48 ` Luck, Tony
@ 2003-05-21 21:51 ` Luck, Tony
  2003-05-22 21:29 ` Bjorn Helgaas
                   ` (14 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Luck, Tony @ 2003-05-21 21:51 UTC (permalink / raw)
  To: linux-ia64

Some minor issues with the "salinfo" tool.

1) It doesn't compile :-(

 mca.c: In function `ia64_log_processor_info_print':
 mca.c:961: `printf' undeclared (first use in this function)
 mca.c:961: (Each undeclared identifier is reported only once
 mca.c:961: for each function it appears in.)
 make: *** [mca.o] Error 1

I added an "extern int printf(char *, ...);" declaration rather
than risking including <stdio.h>

2) I crashed my machine with an injected machine check, and
then rebooted.  All four of the /proc/sal/cpuX/mca files had
a copy of the same error record.  Echoing "clear" to one of
them made them all go away.

I think this is normal ... but it may require some interesting
documentation to say why things work like this.

3) The salinfo tool uses exponential increases in the size of the
read that it tries from the /proc/sal/cpuX/mca file.  My particular
error record was 5560 bytes long and strace reports:

  read(3, ""..., 1024) = 1024
  read(3, ""..., 1024) = 1024
  read(3, ""..., 2048) = 2048
  read(3, ""..., 4096) = 1464
  read(3, "", 2632)    = 0

A hypothetically large enough record would result in salinfo reading
more than a page in one piece through /proc, which I think breaks the
way arch/ia64/kernel/salinfo.c is interfacing with /proc.  Perhaps
the salinfo utility should just grow the buffer in 1k increments with
	alloc += 1024;
rather than using
	alloc *= 2;

4) Reading this way is also kind of weird in that every partial read
results in the kernel going back to re-fetch the data from the SAL
with another call to ia64_sal_get_state_info().  One kludgy fix would
be to have the salinfo tool use "getpagesize()" as the initial size
and increment for the buffer it uses (at least for kernels with a 16k
page size ... error records should generally be small enough for a
single slurp). Though we'd still do one extra call to get the nbytes==0
return to signify the EOF (unless we assume the partial read got us
all the data?)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (6 preceding siblings ...)
  2003-05-21 21:51 ` Luck, Tony
@ 2003-05-22 21:29 ` Bjorn Helgaas
  2003-05-23  0:24 ` Bjorn Helgaas
                   ` (13 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-22 21:29 UTC (permalink / raw)
  To: linux-ia64

On Wednesday 21 May 2003 2:48 pm, Luck, Tony wrote:
> The #ifdef CONFIG_SMP in the read/write functions seems a little
> convoluted, resulting in the weird can't ever happen error message
> for the non-SMP case that is trying to read/write from the wrong
> cpu.  Why not write each in the form:
> 
> #ifdef CONFIG_SMP
>         if (cpu = smp_processor_id())
>                 salinfo_log_read_cpu(&info);
>         else
>                 smp_call_function_single(cpu, salinfo_log_read_cpu, &info, 0, 1);
> #else
>         salinfo_log_read_cpu(&info);
> #endif

Nice!  I copied the original from somewhere (can't
remember where ATM), but yours is much better.  Thanks!

Bjorn


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (7 preceding siblings ...)
  2003-05-22 21:29 ` Bjorn Helgaas
@ 2003-05-23  0:24 ` Bjorn Helgaas
  2003-05-23 15:42 ` Luck, Tony
                   ` (12 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-23  0:24 UTC (permalink / raw)
  To: linux-ia64

On Wednesday 21 May 2003 3:51 pm, Luck, Tony wrote:
> Some minor issues with the "salinfo" tool.
> 
> 1) It doesn't compile :-(

I compiled for me (on debian), but I'll add the prototype.

> 2) I crashed my machine with an injected machine check, and
> then rebooted.  All four of the /proc/sal/cpuX/mca files had
> a copy of the same error record.  Echoing "clear" to one of
> them made them all go away.

Hmm...  this sounds like a reflection of the underlying firmware
behavior.  I tried this on a 2-way HP box, and the cpu0/mca
file was different than cpu1/mca, and clearing one did not
clear the other.

> I think this is normal ... but it may require some interesting
> documentation to say why things work like this.

Why do you think that's normal?  It sounds pretty strange
to me.

> 3) The salinfo tool uses exponential increases in the size of the
> read that it tries from the /proc/sal/cpuX/mca file.  
> ...
> A hypothetically large enough record would result in salinfo reading
> more than a page in one piece through /proc, which I think breaks the
> way arch/ia64/kernel/salinfo.c is interfacing with /proc.

I actually expected that to be a problem, but I copied the
code from the /proc/acpi/dsdt stuff, and it seems to be
able to export over 40K of data on my x86 laptop just fine.
So maybe both ACPI and my salinfo stuff are broken, but
I haven't seen any complaints about the ACPI version.
(A weak argument, I know; I just don't know very much
about doing things in /proc :-)

> 4) Reading this way is also kind of weird in that every partial read
> results in the kernel going back to re-fetch the data from the SAL
> with another call to ia64_sal_get_state_info().  One kludgy fix would
> be to have the salinfo tool use "getpagesize()" as the initial size
> and increment for the buffer it uses (at least for kernels with a 16k
> page size ... error records should generally be small enough for a
> single slurp). Though we'd still do one extra call to get the nbytes=0
> return to signify the EOF (unless we assume the partial read got us
> all the data?)

I think making the initial size 8K or 16K seems reasonable.  I
wanted to minimize the management of the kernel buffer, but
I suppose we could do the allocate/get_state_info at open-time,
and deallocate in close.  I'll look at that tomorrow.

Bjorn


^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (8 preceding siblings ...)
  2003-05-23  0:24 ` Bjorn Helgaas
@ 2003-05-23 15:42 ` Luck, Tony
  2003-05-28 23:26 ` Bjorn Helgaas
                   ` (11 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Luck, Tony @ 2003-05-23 15:42 UTC (permalink / raw)
  To: linux-ia64

> > 2) I crashed my machine with an injected machine check, and
> > then rebooted.  All four of the /proc/sal/cpuX/mca files had
> > a copy of the same error record.  Echoing "clear" to one of
> > them made them all go away.
> 
> Hmm...  this sounds like a reflection of the underlying firmware
> behavior.  I tried this on a 2-way HP box, and the cpu0/mca
> file was different than cpu1/mca, and clearing one did not
> clear the other.
> 
> > I think this is normal ... but it may require some interesting
> > documentation to say why things work like this.
> 
> Why do you think that's normal?  It sounds pretty strange
> to me.

I think that a fatal error record that is retrieved after the
reboot isn't really attached to any particular CPU ... so I can
see the same thing whichever cpu calls into SAL to look at the
log.  Since there is only one record there, clearing it from any
cpu makes it go away globally.  But I'll have to re-read a lot of
SAL spec to see if that is:
	1) intended behaviour
	2) a quirky, but legal SAL implementation
	3) a bug

> > 3) The salinfo tool uses exponential increases in the size of the
> > read that it tries from the /proc/sal/cpuX/mca file.  
> > ...
> > A hypothetically large enough record would result in salinfo reading
> > more than a page in one piece through /proc, which I think 
> breaks the
> > way arch/ia64/kernel/salinfo.c is interfacing with /proc.
> 
> I actually expected that to be a problem, but I copied the
> code from the /proc/acpi/dsdt stuff, and it seems to be
> able to export over 40K of data on my x86 laptop just fine.
> So maybe both ACPI and my salinfo stuff are broken, but
> I haven't seen any complaints about the ACPI version.
> (A weak argument, I know; I just don't know very much
> about doing things in /proc :-)

I'm not fully up to speed on /proc either ... I think your code
is right after all, I was just mixed up with an alternate "read"
interface to /proc that was intended for simpler use, and had a
one page limit.

> > 4) Reading this way is also kind of weird in that every partial read
> > results in the kernel going back to re-fetch the data from the SAL
> > with another call to ia64_sal_get_state_info().  One kludgy 
> fix would
> > be to have the salinfo tool use "getpagesize()" as the initial size
> > and increment for the buffer it uses (at least for kernels 
> with a 16k
> > page size ... error records should generally be small enough for a
> > single slurp). Though we'd still do one extra call to get 
> the nbytes==0
> > return to signify the EOF (unless we assume the partial read got us
> > all the data?)
> 
> I think making the initial size 8K or 16K seems reasonable.  I
> wanted to minimize the management of the kernel buffer, but
> I suppose we could do the allocate/get_state_info at open-time,
> and deallocate in close.  I'll look at that tomorrow.

If this comes together cleanly, then great ... otherwise don't sweat
this too much ... if reading SAL error records is in your performance
path, then your machine is in deep trouble!

-Tony


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (9 preceding siblings ...)
  2003-05-23 15:42 ` Luck, Tony
@ 2003-05-28 23:26 ` Bjorn Helgaas
  2003-05-29  0:07 ` Keith Owens
                   ` (10 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-28 23:26 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: text/plain, Size: 11561 bytes --]

On Wednesday 21 May 2003 12:06 pm, Luck, Tony wrote:
> Is there any easy way to get a notification that there is
> a record waiting in one of the /proc/sal/cpuX/* files ...

That's a good question that hadn't even occurred to me.
Here's another pass; see what you think:

	- Added support for poll(2) on the cpuX/* files
	- App keeps all the (4*NR_CPUS) files open
	- poll(2) tells app which files have data to be read
	- Added wakeup at the place we currently call
	  ia64_log_print

It got a little more involved than I hoped, because I didn't
want to call get_state_info() for all (4*NR_CPUS) files every
time the app called poll(2).

Oh, and I removed the MCA printks from the kernel.  I'll
go back and pull all the other printks out too, but I'll
propose that as a separate patch to avoid cluttering this
one.

Later, Tony wrote:
> 4) Reading this way is also kind of weird in that every partial read
> results in the kernel going back to re-fetch the data from the SAL
> with another call to ia64_sal_get_state_info().

I thought about trying to avoid this by caching the records in
open(), but then the app has to close and reopen the file in
order to see new data.  I really want to avoid managing buffers
of this stuff in the kernel, and I couldn't figure out a good
way to avoid the extra get_state_info() calls without maintaining
a lot more state.  With the current patch, we do about 4 or 5
get_state_info() calls per event:

	- ia64_os_mca_tlb_error_check() in mca_asm.S
	- ia64_log_get() in mca.c ISR
	- salinfo_log_read_cpu() when app reads record
	- salinfo_log_read_cpu() (to get EOF indication
	  after a short read)
	- salinfo_log_clear_cpu() when app clears record
	  (to see whether this makes another record available)

and I think we can get rid of the one in mca.c by just doing
the appropriate wakeup.  If the kernel doesn't decode anything,
there's probably no need to even get the record.

That still seems like a lot, but at least it scales with the
number of events that occur, not the number of CPUs and event
types.

Bjorn


===== arch/ia64/kernel/mca.c 1.26 vs edited =====
--- 1.26/arch/ia64/kernel/mca.c	Fri May 16 04:33:42 2003
+++ edited/arch/ia64/kernel/mca.c	Wed May 28 09:48:33 2003
@@ -130,12 +130,14 @@
  */
 static int cmc_polling_enabled = 1;
 
+extern void salinfo_log_wakeup(int);
+
 /*
  *  ia64_mca_log_sal_error_record
  *
- *  This function retrieves a specified error record type from SAL, sends it to
- *  the system log, and notifies SALs to clear the record from its non-volatile
- *  memory.
+ *  This function retrieves a specified error record type from SAL,
+ *  wakes up any processes waiting for error records, and sends it to
+ *  the system log.
  *
  *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE/INIT)
  *  Outputs :   platform error status
@@ -155,11 +157,8 @@
 	 * 3. set ia64_os_mca_recovery_successful flag, if applicable
 	 */
 
+	salinfo_log_wakeup(sal_info_type);
 	platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk);
-	/* temporary: only clear SAL logs on hardware-corrected errors
-		or if we're logging an error after an MCA-initiated reboot */
-	if ((sal_info_type > 1) || (called_from_init))
-		ia64_sal_clear_state_info(sal_info_type);
 
 	return platform_err;
 }
@@ -369,9 +368,7 @@
  *  ia64_mca_check_errors
  *
  *  External entry to check for error records which may have been posted by SAL
- *  for a prior failure which resulted in a machine shutdown before an the
- *  error could be logged.  This function must be called after the filesystem
- *  is initialized.
+ *  for a prior failure.
  *
  *  Inputs  :   None
  *
@@ -383,6 +380,7 @@
 	/*
 	 *  If there is an MCA error record pending, get it and log it.
 	 */
+	printk("CPU %d: checking for saved MCA error records\n", smp_processor_id());
 	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA, 1);
 
 	return 0;
@@ -1250,9 +1248,6 @@
 
 	ia64_process_min_state_save(&SAL_LPI_PSI_INFO(proc_ptr)->min_state_area);
 
-	/* Clear the INIT SAL logs now that they have been saved in the OS buffer */
-	ia64_sal_clear_state_info(SAL_INFO_TYPE_INIT);
-
 	init_handler_platform(proc_ptr, pt, sw);	/* call platform specific routines */
 }
 
@@ -2302,10 +2297,8 @@
 
 	switch(sal_info_type) {
 	      case SAL_INFO_TYPE_MCA:
-		prfunc("+BEGIN HARDWARE ERROR STATE AT MCA\n");
-		platform_err = ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type),
-							    prfunc);
-		prfunc("+END HARDWARE ERROR STATE AT MCA\n");
+		prfunc("+CPU %d: SAL log contains MCA error record\n", smp_processor_id());
+		ia64_log_rec_header_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
 		break;
 	      case SAL_INFO_TYPE_INIT:
 		prfunc("+MCA INIT ERROR LOG (UNIMPLEMENTED)\n");
===== arch/ia64/kernel/salinfo.c 1.1 vs edited =====
--- 1.1/arch/ia64/kernel/salinfo.c	Thu Sep 12 10:43:47 2002
+++ edited/arch/ia64/kernel/salinfo.c	Wed May 28 15:46:59 2003
@@ -4,16 +4,21 @@
  * Creates entries in /proc/sal for various system features.
  *
  * Copyright (c) 2001 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2003 Hewlett-Packard Co
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  *
  * 10/30/2001	jbarnes@sgi.com		copied much of Stephane's palinfo
  *					code to create this file
  */
 
 #include <linux/types.h>
+#include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/module.h>
+#include <linux/smp.h>
 
 #include <asm/sal.h>
+#include <asm/uaccess.h>
 
 MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
 MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
@@ -40,25 +45,236 @@
 
 #define NR_SALINFO_ENTRIES (sizeof(salinfo_entries)/sizeof(salinfo_entry_t))
 
-/*
- * One for each feature and one more for the directory entry...
- */
-static struct proc_dir_entry *salinfo_proc_entries[NR_SALINFO_ENTRIES + 1];
+static char *salinfo_log_name[] = {
+	"mca",
+	"init",
+	"cmc",
+	"cpe",
+};
+
+static struct proc_dir_entry *salinfo_proc_entries[
+	ARRAY_SIZE(salinfo_entries) +			/* /proc/sal/bus_lock */
+	(NR_CPUS * ARRAY_SIZE(salinfo_log_name)) +	/* /proc/sal/cpu0/mca */
+	NR_CPUS +					/* /proc/sal/cpu0 */
+	1];						/* /proc/sal */
+
+struct salinfo_log_data {
+	int	type;
+	u8	*log_buffer;
+	u64	log_size;
+};
+
+struct salinfo_wait_queue {
+	int			cpu;
+	int			type;
+	int			event_ready;
+	wait_queue_head_t	queue;
+};
+
+static struct salinfo_wait_queue *salinfo_wait[NR_CPUS][ARRAY_SIZE(salinfo_log_name)];
+
+void
+salinfo_log_wakeup(int type)
+{
+	int cpu = smp_processor_id();
+
+	if (type < ARRAY_SIZE(salinfo_log_name)) {
+		struct salinfo_wait_queue *wait = salinfo_wait[cpu][type];
+
+		if (wait) {
+			wait->event_ready = 1;
+			wake_up_interruptible(&wait->queue);
+		}
+	}
+}
+
+static int
+salinfo_log_open(struct inode *inode, struct file *file)
+{
+	if (!suser())
+		return -EPERM;
+	return 0;
+}
+
+static void
+call_on_cpu(int cpu, void (*fn)(void *), void *arg)
+{
+#ifdef CONFIG_SMP
+	if (cpu == smp_processor_id())
+		(*fn)(arg);
+	else
+		smp_call_function_single(cpu, fn, arg, 0, 1);
+#else
+	(*fn)(arg);
+#endif
+}
+
+static void
+salinfo_log_read_cpu(void *context)
+{
+	struct salinfo_log_data *info = context;
+	struct salinfo_wait_queue *wait = salinfo_wait[smp_processor_id()][info->type];
+	u64 size;
+
+	size = ia64_sal_get_state_info_size(info->type);
+	info->log_buffer = kmalloc(size, GFP_ATOMIC);
+	if (!info->log_buffer)
+		return;
+
+	wait->event_ready = 0;
+	info->log_size = ia64_sal_get_state_info(info->type, (u64 *) info->log_buffer);
+	if (info->log_size)
+		salinfo_log_wakeup(info->type);
+}
+
+static ssize_t
+salinfo_log_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_wait_queue *wait = entry->data;
+	struct salinfo_log_data info;
+	int ret;
+	void *data;
+	size_t size;
+
+	info.type = wait->type;
+	call_on_cpu(wait->cpu, salinfo_log_read_cpu, &info);
+
+	if (!info.log_buffer || *ppos >= info.log_size) {
+		ret = 0;
+		goto out;
+	}
+
+	data = info.log_buffer + file->f_pos;
+	size = info.log_size - file->f_pos;
+	if (size > count)
+		size = count;
+
+	if (copy_to_user(buffer, data, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	*ppos += size;
+	ret = size;
+
+out:
+	kfree(info.log_buffer);
+	return ret;
+}
+
+static void
+salinfo_log_clear_cpu(void *context)
+{
+	struct salinfo_wait_queue *wait = context;
+	struct salinfo_log_data info;
+
+	wait->event_ready = 0;		/* avoid race with another wakeup */
+	ia64_sal_clear_state_info(wait->type);
+
+	info.type = wait->type;
+	salinfo_log_read_cpu(&info);
+	if (info.log_buffer && info.log_size)
+		salinfo_log_wakeup(wait->type);
+
+	kfree(info.log_buffer);
+}
+
+static ssize_t
+salinfo_log_write(struct file *file, const char *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_wait_queue *wait = entry->data;
+	char cmd[16];
+
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+
+	memset(cmd, 0, sizeof(cmd));
+	if (copy_from_user(cmd, buffer, sizeof(cmd)))
+		return -EFAULT;
+
+	if (strncmp(cmd, "clear", 5))
+		return count;
+
+	call_on_cpu(wait->cpu, salinfo_log_clear_cpu, wait);
+	return count;
+}
+
+static unsigned int
+salinfo_log_poll(struct file *file, poll_table *polltab)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_wait_queue *wait = entry->data;
+	unsigned int mask;
+
+	poll_wait(file, &wait->queue, polltab);
+	mask = POLLOUT | POLLWRNORM;
+	if (wait->event_ready)
+		mask |= POLLIN | POLLRDNORM;
+	return mask;
+}
+
+static struct file_operations salinfo_log_fops = {
+	.open  = salinfo_log_open,
+	.read  = salinfo_log_read,
+	.write = salinfo_log_write,
+	.poll  = salinfo_log_poll,
+};
 
 static int __init
 salinfo_init(void)
 {
 	struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
 	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
-	int i;
+	struct proc_dir_entry *cpu_dir, *entry;
+	struct salinfo_wait_queue *wait;
+#define CPUSTR "cpu%d"
+	char name[sizeof(CPUSTR)];
+	int i, j;
 
 	salinfo_dir = proc_mkdir("sal", NULL);
+	if (!salinfo_dir)
+		return 0;
 
 	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
 		/* pass the feature bit in question as misc data */
 		*sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
 						  salinfo_read, (void *)salinfo_entries[i].feature);
 	}
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+
+		sprintf(name, CPUSTR, i);
+		cpu_dir = proc_mkdir(name, salinfo_dir);
+		if (!cpu_dir)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(salinfo_log_name); j++) {
+			wait = kmalloc(sizeof(*wait), GFP_KERNEL);
+			if (!wait)
+				continue;
+
+			entry = create_proc_entry(salinfo_log_name[j], 0, cpu_dir);
+			if (entry) {
+				wait->cpu = i;
+				wait->type = j;
+				wait->event_ready = 1;	/* assume we missed one */
+				init_waitqueue_head(&wait->queue);
+				salinfo_wait[i][j] = wait;
+				entry->data = wait;
+				entry->proc_fops = &salinfo_log_fops;
+				*sdir++ = entry;
+			}
+		}
+		*sdir++ = cpu_dir;
+	}
+
 	*sdir++ = salinfo_dir;
 
 	return 0;
@@ -69,7 +285,7 @@
 {
 	int i = 0;
 
-	for (i = 0; i < NR_SALINFO_ENTRIES ; i++) {
+	for (i = 0; i < ARRAY_SIZE(salinfo_proc_entries); i++) {
 		if (salinfo_proc_entries[i])
 			remove_proc_entry (salinfo_proc_entries[i]->name, NULL);
 	}


[-- Attachment #2: salinfo-0.1.tar.gz --]
[-- Type: application/x-tgz, Size: 33333 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (10 preceding siblings ...)
  2003-05-28 23:26 ` Bjorn Helgaas
@ 2003-05-29  0:07 ` Keith Owens
  2003-05-29  1:34 ` Bjorn Helgaas
                   ` (9 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Keith Owens @ 2003-05-29  0:07 UTC (permalink / raw)
  To: linux-ia64

On Wed, 28 May 2003 17:26:02 -0600, 
Bjorn Helgaas <bjorn_helgaas@hp.com> wrote:
>+#define CPUSTR "cpu%d"
>+	char name[sizeof(CPUSTR)];

Too small.  name[] will overflow at cpu 100.



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (11 preceding siblings ...)
  2003-05-29  0:07 ` Keith Owens
@ 2003-05-29  1:34 ` Bjorn Helgaas
  2003-05-29  1:37 ` Keith Owens
                   ` (8 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-29  1:34 UTC (permalink / raw)
  To: linux-ia64

On Wednesday 28 May 2003 6:07 pm, Keith Owens wrote:
> >+#define CPUSTR "cpu%d"
> >+	char name[sizeof(CPUSTR)];
> 
> Too small.  name[] will overflow at cpu 100.

Thanks.  I remember thinking about that when I copied the code,
but forgot to fix it.  How about something like this:

--- 1.12/arch/ia64/kernel/palinfo.c	Mon Apr 21 04:52:56 2003
+++ edited/arch/ia64/kernel/palinfo.c	Wed May 28 19:02:32 2003
@@ -914,7 +914,7 @@
 	struct proc_dir_entry **pdir = palinfo_proc_entries;
 	struct proc_dir_entry *palinfo_dir, *cpu_dir;
 	int i, j;
-	char cpustr[sizeof(CPUSTR)];
+	char cpustr[sizeof(CPUSTR) + 2];
 
 	printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
 
@@ -928,7 +928,7 @@
 
 		if (!cpu_online(i)) continue;
 
-		sprintf(cpustr,CPUSTR, i);
+		snprintf(cpustr, sizeof(cpustr), CPUSTR, i);
 
 		cpu_dir = proc_mkdir(cpustr, palinfo_dir);
 
--- arch/ia64/kernel/salinfo.c.orig	2003-05-28 19:03:04.000000000 -0600
+++ arch/ia64/kernel/salinfo.c	2003-05-28 19:03:21.000000000 -0600
@@ -233,7 +233,7 @@
 	struct proc_dir_entry *cpu_dir, *entry;
 	struct salinfo_wait_queue *wait;
 #define CPUSTR "cpu%d"
-	char name[sizeof(CPUSTR)];
+	char name[sizeof(CPUSTR) + 2];
 	int i, j;
 
 	salinfo_dir = proc_mkdir("sal", NULL);
@@ -250,7 +250,7 @@
 		if (!cpu_online(i))
 			continue;
 
-		sprintf(name, CPUSTR, i);
+		snprintf(name, sizeof(name), CPUSTR, i);
 		cpu_dir = proc_mkdir(name, salinfo_dir);
 		if (!cpu_dir)
 			continue;



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (12 preceding siblings ...)
  2003-05-29  1:34 ` Bjorn Helgaas
@ 2003-05-29  1:37 ` Keith Owens
  2003-05-29 20:49 ` Luck, Tony
                   ` (7 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Keith Owens @ 2003-05-29  1:37 UTC (permalink / raw)
  To: linux-ia64

On Wed, 28 May 2003 19:34:01 -0600, 
Bjorn Helgaas <bjorn_helgaas@hp.com> wrote:
>On Wednesday 28 May 2003 6:07 pm, Keith Owens wrote:
>> >+#define CPUSTR "cpu%d"
>> >+	char name[sizeof(CPUSTR)];
>> 
>> Too small.  name[] will overflow at cpu 100.
>
>Thanks.  I remember thinking about that when I copied the code,
>but forgot to fix it.  How about something like this:
>+	char cpustr[sizeof(CPUSTR) + 2];

That will do until we go past cpu 9,999.  "10,000 cpus should be enough
for anybody" :)



^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (13 preceding siblings ...)
  2003-05-29  1:37 ` Keith Owens
@ 2003-05-29 20:49 ` Luck, Tony
  2003-05-29 21:31 ` Bjorn Helgaas
                   ` (6 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Luck, Tony @ 2003-05-29 20:49 UTC (permalink / raw)
  To: linux-ia64

Digging back in this thread to last Thursday ...

> > 2) I crashed my machine with an injected machine check, and
> > then rebooted.  All four of the /proc/sal/cpuX/mca files had
> > a copy of the same error record.  Echoing "clear" to one of
> > them made them all go away.
> 
> Hmm...  this sounds like a reflection of the underlying firmware
> behavior.  I tried this on a 2-way HP box, and the cpu0/mca
> file was different than cpu1/mca, and clearing one did not
> clear the other.
> 
> > I think this is normal ... but it may require some interesting
> > documentation to say why things work like this.
> 
> Why do you think that's normal?  It sounds pretty strange
> to me.

I asked a SAL expert here who said:

 "The SAL spec does not require that the SAL_GET_STATE_INFO API
  be called on the processor where the error was detected (for
  recoverable and fatal errors).  So in this case, the SAL has
  logged it to flash before handing off to the OS.  When the OS
  calls SAL_GET_STATE_INFO, it just retrieves the last error in
  the queue from the flash image.  The processor section of the
  error record has a field for the processsor LID --- so you can
  check if the right processor observed the error."


What error did you inject in the case that you describe above
where you saw different independent records in cpu0/mca and
cpu1/mca?

-Tony


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (14 preceding siblings ...)
  2003-05-29 20:49 ` Luck, Tony
@ 2003-05-29 21:31 ` Bjorn Helgaas
  2003-05-29 21:47 ` Luck, Tony
                   ` (5 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-29 21:31 UTC (permalink / raw)
  To: linux-ia64

On Thursday 29 May 2003 2:49 pm, Luck, Tony wrote:
> Digging back in this thread to last Thursday ...
> 
> > > 2) I crashed my machine with an injected machine check, and
> > > then rebooted.  All four of the /proc/sal/cpuX/mca files had
> > > a copy of the same error record.  Echoing "clear" to one of
> > > them made them all go away.
> > 
> > > I think this is normal ... but it may require some interesting
> > > documentation to say why things work like this.
> > 
> > Why do you think that's normal?  It sounds pretty strange
> > to me.
> 
> I asked a SAL expert here who said:
> 
>  "The SAL spec does not require that the SAL_GET_STATE_INFO API
>   be called on the processor where the error was detected (for
>   recoverable and fatal errors).  So in this case, the SAL has
>   logged it to flash before handing off to the OS.  When the OS
>   calls SAL_GET_STATE_INFO, it just retrieves the last error in
>   the queue from the flash image.  The processor section of the
>   error record has a field for the processsor LID --- so you can
>   check if the right processor observed the error."

The SAL spec says

  In an MP environment, processor record information pertains to the
  processor on which this call is executed and the platform information
  pertains to the platform.

I interpret this to mean that a GET_STATE_INFO call can return
platform information no matter which CPU makes the call, but that
processor information can only be returned on the processor that
took the error.

So if you injected a platform MCA that created no processor
error sections, it makes sense to me that you'd see the same
thing in each file, and that clearing one would clear them all.

The salinfo code only sets the "event_ready" flag for the CPU
that calls salinfo_log_wakeup(), so assuming that only one CPU
calls ia64_mca_ucmc_handler(), the user's poll(2) will indicate
only one file ready to read.  The daemon would read that file
and clear it.  So it would see only one error record, which is
probably what everybody expects.

> What error did you inject in the case that you describe above
> where you saw different independent records in cpu0/mca and
> cpu1/mca?

I just did my usual "dd if=/dev/mem of=/dev/null".  This MCAs
when we walk into a memory hole, but the MCA is detected by
the processor, not the platform.  So I'm guessing what I see is
that one CPU returns both platform and processor sections,
and the other returns only platform sections.  It's not clear
to me why the other CPU has a platform error section, or
how it should work to clear these.

Bjorn


^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (15 preceding siblings ...)
  2003-05-29 21:31 ` Bjorn Helgaas
@ 2003-05-29 21:47 ` Luck, Tony
  2003-05-29 22:38 ` Bjorn Helgaas
                   ` (4 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Luck, Tony @ 2003-05-29 21:47 UTC (permalink / raw)
  To: linux-ia64

So some SAL records are platform related, and some
are tied to a specific CPU.  And different SAL
implementations might fuzz the issue further by
making cpu dependent records visible at the platform
level.

Stepping back from this (a few miles) for a broader
view.  What benefit do we gain at the application
level by making all the mca/init/cmci/cpei files
visible on a per-cpu basis?

For platform level errors, this just causes confusion
as the same record is definitely available on all cpus.
But if your application is "poll"ing all the files, only
one needs to read&clear.

For cpu-level errors (assuming that a SAL implementation
keeps them in separate queues) ... I'm not certain why
the application needs to be able to read from a cpu-spcific
file.  If all the error records were funneled into a
single file, would we lose anything?

-Tony


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (16 preceding siblings ...)
  2003-05-29 21:47 ` Luck, Tony
@ 2003-05-29 22:38 ` Bjorn Helgaas
  2003-05-29 23:33 ` Luck, Tony
                   ` (3 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-29 22:38 UTC (permalink / raw)
  To: linux-ia64

On Thursday 29 May 2003 3:47 pm, Luck, Tony wrote:
> ... What benefit do we gain at the application
> level by making all the mca/init/cmci/cpei files
> visible on a per-cpu basis?

I really like the idea of having a file be an exact binary
image of the buffer from SAL, i.e., no extra headers, etc.

> For platform level errors, this just causes confusion
> as the same record is definitely available on all cpus.
> But if your application is "poll"ing all the files, only
> one needs to read&clear.

If the application is using poll(2), it will only see the
record available on one of the files.  If the application
does its own periodic polling *and* it reads all the
files before clearing any of them, it will see several
copies.

> ...  If all the error records were funneled into a
> single file, would we lose anything?

There is a certain appeal to using a single file, at least from
the application perspective.  Let's run this up the flagpole
and see whether anybody salutes:

	- we export two files: "control" and "data"
	- app uses poll(2) on "control"
	- SAL log events set a bit for CPU and event type
	  and do a wakeup
	- app returns from poll()
	- app reads "control"
	- kernel supplies "cpu 5 cpe" as read(2) data
	- app writes same data ("cpu 5 cpe") to "control"
	- app reads "data"
	- kernel calls GET_STATE_INFO and supplies
	  raw data to app
	- app writes "clear cpu 5 cpe" to "control"
	- kernel clears CPU/event bit, calls CLEAR_STATE_INFO,
	  and calls GET_STATE_INFO, does wakeup if more data

Is that too ugly for words?  It keeps the unadorned SAL data,
requires only two files, and could probably even be driven from
a shell script (if we make read(2) on "control" blocking).  It
feels sort of Plan 9-ish, which is always appealing.  Plus, it
avoids the problem of having hundreds of "cpuXXXX" directories
on all those monster SGI boxes :-)

There might be fairness issues if events occur faster than the
app reads them -- might have to round-robin through the
CPUs when supplying "control" data.  Or we could use a pair
of files for each type of event, i.e., /proc/sal/mca/{control,data}.

Bjorn



^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (17 preceding siblings ...)
  2003-05-29 22:38 ` Bjorn Helgaas
@ 2003-05-29 23:33 ` Luck, Tony
  2003-05-30 11:56 ` Matthew Wilcox
                   ` (2 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: Luck, Tony @ 2003-05-29 23:33 UTC (permalink / raw)
  To: linux-ia64

> There is a certain appeal to using a single file, at least from
> the application perspective.  Let's run this up the flagpole
> and see whether anybody salutes:
> 
> 	- we export two files: "control" and "data"
> 	- app uses poll(2) on "control"
> 	- SAL log events set a bit for CPU and event type
> 	  and do a wakeup
> 	- app returns from poll()
> 	- app reads "control"
> 	- kernel supplies "cpu 5 cpe" as read(2) data
> 	- app writes same data ("cpu 5 cpe") to "control"
> 	- app reads "data"
> 	- kernel calls GET_STATE_INFO and supplies
> 	  raw data to app
> 	- app writes "clear cpu 5 cpe" to "control"
> 	- kernel clears CPU/event bit, calls CLEAR_STATE_INFO,
> 	  and calls GET_STATE_INFO, does wakeup if more data
> 
> Is that too ugly for words?  It keeps the unadorned SAL data,
> requires only two files, and could probably even be driven from
> a shell script (if we make read(2) on "control" blocking).  It
> feels sort of Plan 9-ish, which is always appealing.  Plus, it
> avoids the problem of having hundreds of "cpuXXXX" directories
> on all those monster SGI boxes :-)
> 
> There might be fairness issues if events occur faster than the
> app reads them -- might have to round-robin through the
> CPUs when supplying "control" data.  Or we could use a pair
> of files for each type of event, i.e., /proc/sal/mca/{control,data}.

Seems odd that we read "cpu 5 cpe" from the "control" file, and then
turn around to write the same thing back to the same file.

Perhaps you are overloading the function of the control file?  With
three files: {event,control,data} you get all the above functionality,
but with a simpler to understand interface.

Applications block reading the "event" file (or use poll(2) on it) to
find out that it is worth reading.  When they read, they see the "cpu
5 cpe" type message.  They write that to "control", and then read from
"data".

Or, back to two files: {event,data} ... we read from "event" to see
what happened.  Writing to "data" controls what will be read from "data".

-Tony



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (18 preceding siblings ...)
  2003-05-29 23:33 ` Luck, Tony
@ 2003-05-30 11:56 ` Matthew Wilcox
  2003-05-30 20:27 ` Bjorn Helgaas
  2003-05-30 20:31 ` Bjorn Helgaas
  21 siblings, 0 replies; 23+ messages in thread
From: Matthew Wilcox @ 2003-05-30 11:56 UTC (permalink / raw)
  To: linux-ia64

On Thu, May 29, 2003 at 04:33:49PM -0700, Luck, Tony wrote:
> Seems odd that we read "cpu 5 cpe" from the "control" file, and then
> turn around to write the same thing back to the same file.

Agreed.

> Applications block reading the "event" file (or use poll(2) on it) to
> find out that it is worth reading.  When they read, they see the "cpu
> 5 cpe" type message.  They write that to "control", and then read from
> "data".
> 
> Or, back to two files: {event,data} ... we read from "event" to see
> what happened.  Writing to "data" controls what will be read from "data".

I like this alternative better.  But there's a bit of a race condition if
two programs are attempting to use these files at the same time.

-- 
"It's not Hollywood.  War is real, war is primarily not about defeat or
victory, it is about death.  I've seen thousands and thousands of dead bodies.
Do you think I want to have an academic debate on this subject?" -- Robert Fisk


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (19 preceding siblings ...)
  2003-05-30 11:56 ` Matthew Wilcox
@ 2003-05-30 20:27 ` Bjorn Helgaas
  2003-05-30 20:31 ` Bjorn Helgaas
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-30 20:27 UTC (permalink / raw)
  To: linux-ia64

On Friday 30 May 2003 5:56 am, Matthew Wilcox wrote:
> > Or, back to two files: {event,data} ... we read from "event" to see
> > what happened.  Writing to "data" controls what will be read from "data".
> 
> I like this alternative better.  But there's a bit of a race condition if
> two programs are attempting to use these files at the same time.

OK, I coded this up.  Actually, I made two files per event type,
so we have

	/proc/sal/mca/event
	/proc/sal/mca/data
	/proc/sal/init/event
	... etc

The "event" files are read-only and reads block if no error records
are available.  When records become available, the data returned
looks like "read 0".  If several CPUs have records available, reads
round-robin through them.

The "data" files are read/write and single-open, so a smart app can
keep it open to prevent races.  If you write the data you got from
an "event" file (i.e., "read 0"), subsequent reads from "data" return
error records from the indicated CPU.  You can also write "clear 0"
to clear the error record.

So a simple-minded loop looks like this:

	EVENT=/proc/sal/cpe/event
	DATA=/proc/sal/cpe/data
	LOG=/var/log/sal/cpe/log

	while read ACTION CPU < $EVENT; do
	    echo $ACTION $CPU > $DATA
	    cat  $DATA > $LOG
	    echo clear $CPU > $DATA
	    decode $LOG
	done

Of course a better logger would keep the data file open to
prevent races, generate new log files for each event, etc.

I didn't implement poll(2), but we could if it seems necessary.

The basic logging/decoding package is here:

    ftp://kernel.org/pub/linux/kernel/people/helgaas/salinfo-0.1.tar.gz

Bjorn


=== arch/ia64/kernel/mca.c 1.26 vs edited ==--- 1.26/arch/ia64/kernel/mca.c	Fri May 16 04:33:42 2003
+++ edited/arch/ia64/kernel/mca.c	Thu May 29 19:24:29 2003
@@ -130,12 +130,14 @@
  */
 static int cmc_polling_enabled = 1;
 
+extern void salinfo_log_wakeup(int);
+
 /*
  *  ia64_mca_log_sal_error_record
  *
- *  This function retrieves a specified error record type from SAL, sends it to
- *  the system log, and notifies SALs to clear the record from its non-volatile
- *  memory.
+ *  This function retrieves a specified error record type from SAL,
+ *  wakes up any processes waiting for error records, and sends it to
+ *  the system log.
  *
  *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE/INIT)
  *  Outputs :   platform error status
@@ -155,11 +157,8 @@
 	 * 3. set ia64_os_mca_recovery_successful flag, if applicable
 	 */
 
+	salinfo_log_wakeup(sal_info_type);
 	platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk);
-	/* temporary: only clear SAL logs on hardware-corrected errors
-		or if we're logging an error after an MCA-initiated reboot */
-	if ((sal_info_type > 1) || (called_from_init))
-		ia64_sal_clear_state_info(sal_info_type);
 
 	return platform_err;
 }
@@ -369,9 +368,7 @@
  *  ia64_mca_check_errors
  *
  *  External entry to check for error records which may have been posted by SAL
- *  for a prior failure which resulted in a machine shutdown before an the
- *  error could be logged.  This function must be called after the filesystem
- *  is initialized.
+ *  for a prior failure.
  *
  *  Inputs  :   None
  *
@@ -383,6 +380,7 @@
 	/*
 	 *  If there is an MCA error record pending, get it and log it.
 	 */
+	printk("CPU %d: checking for saved MCA error records\n", smp_processor_id());
 	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA, 1);
 
 	return 0;
@@ -1250,9 +1248,6 @@
 
 	ia64_process_min_state_save(&SAL_LPI_PSI_INFO(proc_ptr)->min_state_area);
 
-	/* Clear the INIT SAL logs now that they have been saved in the OS buffer */
-	ia64_sal_clear_state_info(SAL_INFO_TYPE_INIT);
-
 	init_handler_platform(proc_ptr, pt, sw);	/* call platform specific routines */
 }
 
@@ -2302,10 +2297,8 @@
 
 	switch(sal_info_type) {
 	      case SAL_INFO_TYPE_MCA:
-		prfunc("+BEGIN HARDWARE ERROR STATE AT MCA\n");
-		platform_err = ia64_log_platform_info_print(IA64_LOG_CURR_BUFFER(sal_info_type),
-							    prfunc);
-		prfunc("+END HARDWARE ERROR STATE AT MCA\n");
+		prfunc("+CPU %d: SAL log contains MCA error record\n", smp_processor_id());
+		ia64_log_rec_header_print(IA64_LOG_CURR_BUFFER(sal_info_type), prfunc);
 		break;
 	      case SAL_INFO_TYPE_INIT:
 		prfunc("+MCA INIT ERROR LOG (UNIMPLEMENTED)\n");
=== arch/ia64/kernel/salinfo.c 1.1 vs edited ==--- 1.1/arch/ia64/kernel/salinfo.c	Thu Sep 12 10:43:47 2002
+++ edited/arch/ia64/kernel/salinfo.c	Fri May 30 12:46:28 2003
@@ -4,6 +4,8 @@
  * Creates entries in /proc/sal for various system features.
  *
  * Copyright (c) 2001 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2003 Hewlett-Packard Co
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  *
  * 10/30/2001	jbarnes@sgi.com		copied much of Stephane's palinfo
  *					code to create this file
@@ -12,8 +14,10 @@
 #include <linux/types.h>
 #include <linux/proc_fs.h>
 #include <linux/module.h>
+#include <linux/smp.h>
 
 #include <asm/sal.h>
+#include <asm/uaccess.h>
 
 MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
 MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
@@ -40,25 +44,326 @@
 
 #define NR_SALINFO_ENTRIES (sizeof(salinfo_entries)/sizeof(salinfo_entry_t))
 
-/*
- * One for each feature and one more for the directory entry...
- */
-static struct proc_dir_entry *salinfo_proc_entries[NR_SALINFO_ENTRIES + 1];
+static char *salinfo_log_name[] = {
+	"mca",
+	"init",
+	"cmc",
+	"cpe",
+};
+
+static struct proc_dir_entry *salinfo_proc_entries[
+	ARRAY_SIZE(salinfo_entries) +			/* /proc/sal/bus_lock */
+	ARRAY_SIZE(salinfo_log_name) +			/* /proc/sal/{mca,...} */
+	(2 * ARRAY_SIZE(salinfo_log_name)) +		/* /proc/sal/mca/{event,data} */
+	1];						/* /proc/sal */
+
+struct salinfo_log_data {
+	int	type;
+	u8	*log_buffer;
+	u64	log_size;
+};
+
+struct salinfo_event {
+	int			type;
+	int			cpu;		/* next CPU to check */
+	volatile unsigned long	cpu_mask;
+	wait_queue_head_t	queue;
+};
+
+static struct salinfo_event *salinfo_event[ARRAY_SIZE(salinfo_log_name)];
+
+struct salinfo_data {
+	int	open;		/* single-open to prevent races */
+	int	type;
+	int	cpu;		/* "current" cpu for reads */
+};
+
+static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
+
+static spinlock_t data_lock;
+
+void
+salinfo_log_wakeup(int type)
+{
+	if (type < ARRAY_SIZE(salinfo_log_name)) {
+		struct salinfo_event *event = salinfo_event[type];
+
+		if (event) {
+			set_bit(smp_processor_id(), &event->cpu_mask);
+			wake_up_interruptible(&event->queue);
+		}
+	}
+}
+
+static int
+salinfo_event_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_event *event = entry->data;
+
+	if (!suser())
+		return -EPERM;
+	return 0;
+}
+
+static ssize_t
+salinfo_event_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_event *event = entry->data;
+	char cmd[32];
+	size_t size;
+	int i, n, cpu = -1;
+
+retry:
+	if (!event->cpu_mask) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		interruptible_sleep_on(&event->queue);
+		if (signal_pending(current))
+			return -EINTR;
+	}
+
+	n = event->cpu;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (event->cpu_mask & 1UL << n) {
+			cpu = n;
+			break;
+		}
+		if (++n = NR_CPUS)
+			n = 0;
+	}
+
+	if (cpu = -1)
+		goto retry;
+
+	/* for next read, start checking at next CPU */
+	event->cpu = cpu;
+	if (++event->cpu = NR_CPUS)
+		event->cpu = 0;
+
+	snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
+
+	size = strlen(cmd);
+	if (size > count)
+		size = count;
+	if (copy_to_user(buffer, cmd, size))
+		return -EFAULT;
+
+	return size;
+}
+
+static struct file_operations salinfo_event_fops = {
+	.open  = salinfo_event_open,
+	.read  = salinfo_event_read,
+};
+
+static int
+salinfo_log_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_data *data = entry->data;
+
+	if (!suser())
+		return -EPERM;
+
+	spin_lock(&data_lock);
+	if (data->open) {
+		spin_unlock(&data_lock);
+		return -EBUSY;
+	}
+	data->open = 1;
+	spin_unlock(&data_lock);
+
+	return 0;
+}
+
+static int
+salinfo_log_release(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_data *data = entry->data;
+
+	spin_lock(&data_lock);
+	data->open = 0;
+	spin_unlock(&data_lock);
+	return 0;
+}
+
+static void
+call_on_cpu(int cpu, void (*fn)(void *), void *arg)
+{
+	if (cpu = smp_processor_id())
+		(*fn)(arg);
+#ifdef CONFIG_SMP
+	else if (cpu_online(cpu))	/* cpu may be unvalidated */
+		smp_call_function_single(cpu, fn, arg, 0, 1);
+#endif
+}
+
+static void
+salinfo_log_read_cpu(void *context)
+{
+	struct salinfo_log_data *info = context;
+	struct salinfo_event *event = salinfo_event[info->type];
+	u64 size;
+
+	size = ia64_sal_get_state_info_size(info->type);
+	info->log_buffer = kmalloc(size, GFP_ATOMIC);
+	if (!info->log_buffer)
+		return;
+
+	clear_bit(smp_processor_id(), &event->cpu_mask);
+	info->log_size = ia64_sal_get_state_info(info->type, (u64 *) info->log_buffer);
+	if (info->log_size)
+		salinfo_log_wakeup(info->type);
+}
+
+static ssize_t
+salinfo_log_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_data *data = entry->data;
+	struct salinfo_log_data info;
+	int ret;
+	void *saldata;
+	size_t size;
+
+	info.type = data->type;
+	call_on_cpu(data->cpu, salinfo_log_read_cpu, &info);
+	if (!info.log_buffer || *ppos >= info.log_size) {
+		ret = 0;
+		goto out;
+	}
+
+	saldata = info.log_buffer + file->f_pos;
+	size = info.log_size - file->f_pos;
+	if (size > count)
+		size = count;
+	if (copy_to_user(buffer, saldata, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	*ppos += size;
+	ret = size;
+
+out:
+	kfree(info.log_buffer);
+	return ret;
+}
+
+static void
+salinfo_log_clear_cpu(void *context)
+{
+	struct salinfo_data *data = context;
+	struct salinfo_event *event = salinfo_event[data->type];
+	struct salinfo_log_data info;
+
+	clear_bit(smp_processor_id(), &event->cpu_mask);
+	ia64_sal_clear_state_info(data->type);
+
+	/* clearing one record may make another visible */
+	info.type = data->type;
+	salinfo_log_read_cpu(&info);
+	if (info.log_buffer && info.log_size)
+		salinfo_log_wakeup(data->type);
+
+	kfree(info.log_buffer);
+}
+
+static ssize_t
+salinfo_log_write(struct file *file, const char *buffer, size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+	struct salinfo_data *data = entry->data;
+	char cmd[32];
+	size_t size;
+	int cpu;
+
+	size = sizeof(cmd);
+	if (count < size)
+		size = count;
+	if (copy_from_user(cmd, buffer, size))
+		return -EFAULT;
+
+	if (sscanf(cmd, "read %d", &cpu) = 1)
+		data->cpu = cpu;
+	else if (sscanf(cmd, "clear %d", &cpu) = 1)
+		call_on_cpu(cpu, salinfo_log_clear_cpu, data);
+	else if (sscanf(cmd, "wake %d", &cpu) = 1)	// FIXME for debug
+		call_on_cpu(cpu, salinfo_log_wakeup, data->type);
+
+	return count;
+}
+
+static struct file_operations salinfo_data_fops = {
+	.open    = salinfo_log_open,
+	.release = salinfo_log_release,
+	.read    = salinfo_log_read,
+	.write   = salinfo_log_write,
+};
 
 static int __init
 salinfo_init(void)
 {
 	struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
 	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
-	int i;
+	struct proc_dir_entry *dir, *entry;
+	struct salinfo_event *event;
+	struct salinfo_data *data;
+	int i, j;
 
 	salinfo_dir = proc_mkdir("sal", NULL);
+	if (!salinfo_dir)
+		return 0;
 
 	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
 		/* pass the feature bit in question as misc data */
 		*sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
 						  salinfo_read, (void *)salinfo_entries[i].feature);
 	}
+
+	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
+		dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
+		if (!dir)
+			continue;
+
+		entry = create_proc_entry("event", S_IRUSR, dir);
+		if (!entry)
+			continue;
+
+		event = kmalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			continue;
+		memset(event, 0, sizeof(*event));
+		event->type = i;
+		init_waitqueue_head(&event->queue);
+		salinfo_event[i] = event;
+		/* we missed any events before now */
+		for (j = 0; j < NR_CPUS; j++)
+			if (cpu_online(j))
+				set_bit(j, &event->cpu_mask);
+		entry->data = event;
+		entry->proc_fops = &salinfo_event_fops;
+		*sdir++ = entry;
+
+		entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
+		if (!entry)
+			continue;
+
+		data = &salinfo_data[i];
+		data->type = i;
+		entry->data = data;
+		entry->proc_fops = &salinfo_data_fops;
+		*sdir++ = entry;
+
+		*sdir++ = dir;
+	}
+
 	*sdir++ = salinfo_dir;
 
 	return 0;
@@ -69,7 +374,7 @@
 {
 	int i = 0;
 
-	for (i = 0; i < NR_SALINFO_ENTRIES ; i++) {
+	for (i = 0; i < ARRAY_SIZE(salinfo_proc_entries); i++) {
 		if (salinfo_proc_entries[i])
 			remove_proc_entry (salinfo_proc_entries[i]->name, NULL);
 	}



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [Linux-ia64] SAL error record logging/decoding
  2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
                   ` (20 preceding siblings ...)
  2003-05-30 20:27 ` Bjorn Helgaas
@ 2003-05-30 20:31 ` Bjorn Helgaas
  21 siblings, 0 replies; 23+ messages in thread
From: Bjorn Helgaas @ 2003-05-30 20:31 UTC (permalink / raw)
  To: linux-ia64

> The basic logging/decoding package is here:
> 
>     ftp://kernel.org/pub/linux/kernel/people/helgaas/salinfo-0.1.tar.gz

Oops, I mean here:

    ftp://kernel.org/pub/linux/kernel/people/helgaas/salinfo-0.2.tar.gz

0.1 was for the previous round (/proc/sal/cpu0/mca, etc).


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2003-05-30 20:31 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-05-07 23:41 [Linux-ia64] SAL error record logging/decoding Bjorn Helgaas
2003-05-08  0:05 ` David Mosberger
2003-05-08  0:13 ` Luck, Tony
2003-05-08 19:32 ` Bjorn Helgaas
2003-05-20 22:58 ` Bjorn Helgaas
2003-05-21 18:06 ` Luck, Tony
2003-05-21 20:48 ` Luck, Tony
2003-05-21 21:51 ` Luck, Tony
2003-05-22 21:29 ` Bjorn Helgaas
2003-05-23  0:24 ` Bjorn Helgaas
2003-05-23 15:42 ` Luck, Tony
2003-05-28 23:26 ` Bjorn Helgaas
2003-05-29  0:07 ` Keith Owens
2003-05-29  1:34 ` Bjorn Helgaas
2003-05-29  1:37 ` Keith Owens
2003-05-29 20:49 ` Luck, Tony
2003-05-29 21:31 ` Bjorn Helgaas
2003-05-29 21:47 ` Luck, Tony
2003-05-29 22:38 ` Bjorn Helgaas
2003-05-29 23:33 ` Luck, Tony
2003-05-30 11:56 ` Matthew Wilcox
2003-05-30 20:27 ` Bjorn Helgaas
2003-05-30 20:31 ` Bjorn Helgaas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.