All of lore.kernel.org
 help / color / mirror / Atom feed
* Rework arch/ia64/kernel/salinfo.c for 2.4
@ 2003-10-20 10:47 Keith Owens
  2003-10-20 14:38 ` Zoltan Menyhart
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Keith Owens @ 2003-10-20 10:47 UTC (permalink / raw)
  To: linux-ia64

I have reworked salinfo.c to get a clean separation between the
interrupt handler that is called from mca.c and the rest of the salinfo
code that runs in user context.

It is critical that the interrupt handler part of salinfo must never
fail or deadlock, mca.c must be allowed to continue to get decent
debugging information.  With this rework, the handler only saves the
address of the buffer, sets the event bit and calls up() on the salinfo
data semaphore then returns to mca.c.

The information that was split between salinfo_event (4), salinfo_data
(4) and salinfo_buffer (NR_CPUS*4) has been consolidated into a single
salinfo_data (4) structure.  The consolidation simplifies the code and
the locking.

Use set_cpus_allowed() instead of using IPI to read and clear SAL
records.  This does not disable interrupts and keeps the clean
separation between interrupt and user context.  As a bonus, this code
is ready for machines with > 64 cpus in a single system image.

The rework removes the races and deadlocks that were mentioned on this
list last week.  It also avoids multiple reads of the SAL record when
user space has to read the record in multiple chunks.  I am still
stress testing the code, this release is a request for comments.

The rework patch is larger than the source, it is easier to show the
complete code rather than a patch.


/*
 * salinfo.c
 *
 * Creates entries in /proc/sal for various system features.
 *
 * Copyright (c) 2003 Silicon Graphics, Inc.  All rights reserved.
 * Copyright (c) 2003 Hewlett-Packard Co
 *	Bjorn Helgaas <bjorn.helgaas@hp.com>
 *
 * 10/30/2001	jbarnes@sgi.com		copied much of Stephane's palinfo
 *					code to create this file
 * Oct 20 2003	kaos@sgi.com
 *   Replace IPI with set_cpus_allowed() to read a record from the required cpu.
 *   Redesign salinfo log processing to separate interrupt and user space
 *   contexts.
 *   Cache the record across multi-block reads from user space.
 *   Support > 64 cpus.
 */

#include <linux/types.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/vmalloc.h>

#include <asm/semaphore.h>
#include <asm/sal.h>
#include <asm/uaccess.h>

MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
MODULE_LICENSE("GPL");

static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data);

typedef struct {
	const char		*name;		/* name of the proc entry */
	unsigned long           feature;        /* feature bit */
	struct proc_dir_entry	*entry;		/* registered entry (removal) */
} salinfo_entry_t;

/*
 * List {name,feature} pairs for every entry in /proc/sal/<feature>
 * that this module exports
 */
static salinfo_entry_t salinfo_entries[]={
	{ "bus_lock",           IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
	{ "irq_redirection",	IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
	{ "ipi_redirection",	IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
	{ "itc_drift",		IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, },
};

#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries)

static char *salinfo_log_name[] = {
	"mca",
	"init",
	"cmc",
	"cpe",
};

static struct proc_dir_entry *salinfo_proc_entries[
	ARRAY_SIZE(salinfo_entries) +			/* /proc/sal/bus_lock */
	ARRAY_SIZE(salinfo_log_name) +			/* /proc/sal/{mca,...} */
	(2 * ARRAY_SIZE(salinfo_log_name)) +		/* /proc/sal/mca/{event,data} */
	1];						/* /proc/sal */

/* Allow build with or without large SSI support */
#ifdef CPU_MASK_NONE
#define SCA(x, y) set_cpus_allowed((x), &(y))
#else
#define cpumask_t unsigned long
#define SCA(x, y) set_cpus_allowed((x), (y))
#endif

/* Some records we get ourselves, some are accessed as saved data in buffers
 * that are owned by mca.c.
 */
struct salinfo_data_saved {
	u8*			buffer;
	u64			size;
	u64			id;
	int			cpu;
};

struct salinfo_data {
	volatile cpumask_t	cpu_event;	/* which cpus have outstanding events */
	struct semaphore	sem;		/* count of cpus with outstanding events (bits set in cpu_event) */
	u8			*log_buffer;
	u64			log_size;
	int			open;		/* single-open to prevent races */
	u8			type;
	u8			saved_num;	/* using a saved record? */
	u8			new_read;	/* start of a new read? */
	u8			cleared;	/* saved records have already been cleared? */
	int			cpu_read;	/* "current" cpu for reads */
	int			cpu_check;	/* next CPU to check */
	struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
};

static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];

static spinlock_t data_lock, data_saved_lock;

static void
shift1_data_saved (struct salinfo_data *data, int shift)
{
	memcpy(data->data_saved+shift, data->data_saved+shift+1,
	       (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
	memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
	       sizeof(data->data_saved[0]));
}

/* This routine is invoked in interrupt context.  Note: mca.c enables
 * interrupts before calling this code for CMC/CPE.  MCA and INIT events are
 * not irq safe, do not call any routines that use spinlocks, they may deadlock.
 *
 * The buffer passed from mca.c points to the output from ia64_log_get. This is
 * a persistent buffer but its contents can change between the interrupt and
 * when user space processes the record.  Save the record id to identify
 * changes.
 */
void
salinfo_log_wakeup(int type, u8 *buffer, u64 size)
{
	struct salinfo_data *data = salinfo_data + type;
	struct salinfo_data_saved *data_saved;
	unsigned long flags = 0;
	int i, irqsafe = type != SAL_INFO_TYPE_MCA && type != SAL_INFO_TYPE_INIT;
	int saved_size = ARRAY_SIZE(data->data_saved);

	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));

	if (irqsafe)
		spin_lock_irqsave(&data_saved_lock, flags);
	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
		if (!data_saved->buffer)
			break;
	}
	if (i = saved_size) {
		shift1_data_saved(data, 0);
		data_saved = data->data_saved + saved_size - 1;
	}
	data_saved->cpu = smp_processor_id();
	data_saved->id = ((sal_log_record_header_t *)buffer)->id;
	data_saved->size = size;
	data_saved->buffer = buffer;
	/* mca.c clears CMC/CPE from SAL immediately */
	data->cleared = type = SAL_INFO_TYPE_CMC || type = SAL_INFO_TYPE_CPE;
	if (irqsafe)
		spin_unlock_irqrestore(&data_saved_lock, flags);

	if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
		if (irqsafe)
			up(&data->sem);
	}
}

static int
salinfo_event_open(struct inode *inode, struct file *file)
{
	if (!suser())
		return -EPERM;
	return 0;
}

static ssize_t
salinfo_event_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
{
	struct inode *inode = file->f_dentry->d_inode;
	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
	struct salinfo_data *data = entry->data;
	char cmd[32];
	size_t size;
	int i, n, cpu = -1;

retry:
	if (down_trylock(&data->sem)) {
		if (file->f_flags & O_NONBLOCK)
			return -EAGAIN;
		if (down_interruptible(&data->sem))
			return -ERESTARTSYS;
	}

	n = data->cpu_check;
	for (i = 0; i < NR_CPUS; i++) {
		if (test_bit(n, &data->cpu_event)) {
			cpu = n;
			break;
		}
		if (++n = NR_CPUS)
			n = 0;
	}

	if (cpu = -1)
		goto retry;

	/* events are sticky until the user says "clear" */
	up(&data->sem);

	/* for next read, start checking at next CPU */
	data->cpu_check = cpu;
	if (++data->cpu_check = NR_CPUS)
		data->cpu_check = 0;

	snprintf(cmd, sizeof(cmd), "read %d\n", cpu);

	size = strlen(cmd);
	if (size > count)
		size = count;
	if (copy_to_user(buffer, cmd, size))
		return -EFAULT;

	return size;
}

static struct file_operations salinfo_event_fops = {
	.open  = salinfo_event_open,
	.read  = salinfo_event_read,
};

static int
salinfo_log_open(struct inode *inode, struct file *file)
{
	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
	struct salinfo_data *data = entry->data;

	if (!suser())
		return -EPERM;

	spin_lock(&data_lock);
	if (data->open) {
		spin_unlock(&data_lock);
		return -EBUSY;
	}
	data->open = 1;
	spin_unlock(&data_lock);

	return 0;
}

static int
salinfo_log_release(struct inode *inode, struct file *file)
{
	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
	struct salinfo_data *data = entry->data;

	vfree(data->log_buffer);
	data->log_buffer = NULL;
	spin_lock(&data_lock);
	data->open = 0;
	spin_unlock(&data_lock);
	return 0;
}

static void
call_on_cpu(int cpu, void (*fn)(void *), void *arg)
{
	cpumask_t save_cpus_allowed, new_cpus_allowed;
	memcpy(&save_cpus_allowed, &current->cpus_allowed, sizeof(save_cpus_allowed));
	memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed));
	set_bit(cpu, &new_cpus_allowed);
	SCA(current, new_cpus_allowed);
	(*fn)(arg);
	SCA(current, save_cpus_allowed);
}

static void
salinfo_log_read_cpu(void *context)
{
	struct salinfo_data *data = context;
	data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
}

static void
salinfo_log_new_read(struct salinfo_data *data)
{
	struct salinfo_data_saved *data_saved;
	unsigned long flags;
	int i;
	int saved_size = ARRAY_SIZE(data->data_saved);

	data->new_read = 0;
	data->saved_num = 0;
	spin_lock_irqsave(&data_saved_lock, flags);
retry:
	for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
		if (data_saved->buffer && data_saved->cpu = data->cpu_read) {
			sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
			data->log_size = data_saved->size;
			memcpy(data->log_buffer, rh, data->log_size);
			barrier();	/* id check must not be moved */
			if (rh->id = data_saved->id) {
				data->saved_num = i+1;
				break;
			}
			/* saved record changed by mca.c since interrupt, discard it */
			shift1_data_saved(data, i);
			goto retry;
		}
	}
	spin_unlock_irqrestore(&data_saved_lock, flags);

	if (!data->saved_num)
		call_on_cpu(data->cpu_read, salinfo_log_read_cpu, data);
}

static ssize_t
salinfo_log_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
{
	struct inode *inode = file->f_dentry->d_inode;
	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
	struct salinfo_data *data = entry->data;
	void *saldata;
	size_t size;

	if (!data->log_buffer) {
		data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type));
		if (!data->log_buffer)
			return -ENOMEM;
	}

	if (data->new_read)
		salinfo_log_new_read(data);
	if (*ppos >= data->log_size)
		return 0;

	saldata = data->log_buffer + file->f_pos;
	size = data->log_size - file->f_pos;
	if (size > count)
		size = count;
	if (copy_to_user(buffer, saldata, size))
		return -EFAULT;

	*ppos += size;
	return size;
}

static void
salinfo_log_clear_cpu(void *context)
{
	struct salinfo_data *data = context;
	ia64_sal_clear_state_info(data->type);
}

static int
salinfo_log_clear(struct salinfo_data *data, int cpu)
{
	if (!data->log_buffer) {
		data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type));
		if (!data->log_buffer)
			return -ENOMEM;
	}
	if (!test_bit(cpu, &data->cpu_event))
		return 0;
	down(&data->sem);
	clear_bit(cpu, &data->cpu_event);
	data->log_size = 0;
	if (data->saved_num) {
		unsigned long flags;
		spin_lock_irqsave(&data_saved_lock, flags);
		shift1_data_saved(data, data->saved_num - 1 );
		data->saved_num = 0;
		spin_unlock_irqrestore(&data_saved_lock, flags);
	}
	if (!data->cleared)
		call_on_cpu(cpu, salinfo_log_clear_cpu, data);

	/* clearing a record may make a new record visible */
	data->cpu_read = cpu;
	salinfo_log_new_read(data);
	if (data->log_size &&
	    !test_and_set_bit(data->cpu_read,  &data->cpu_event))
		up(&data->sem);
	return 0;
}

static ssize_t
salinfo_log_write(struct file *file, const char *buffer, size_t count, loff_t *ppos)
{
	struct inode *inode = file->f_dentry->d_inode;
	struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
	struct salinfo_data *data = entry->data;
	char cmd[32];
	size_t size;
	int cpu;

	size = sizeof(cmd);
	if (count < size)
		size = count;
	if (copy_from_user(cmd, buffer, size))
		return -EFAULT;

	if (sscanf(cmd, "read %d", &cpu) = 1) {
		data->cpu_read = cpu;
		data->new_read = 1;
	} else if (sscanf(cmd, "clear %d", &cpu) = 1) {
		int ret;
		if ((ret = salinfo_log_clear(data, cpu)))
			count = ret;
	}

	return count;
}

static struct file_operations salinfo_data_fops = {
	.open    = salinfo_log_open,
	.release = salinfo_log_release,
	.read    = salinfo_log_read,
	.write   = salinfo_log_write,
};

static int __init
salinfo_init(void)
{
	struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
	struct proc_dir_entry *dir, *entry;
	struct salinfo_data *data;
	int i, j, online;

	salinfo_dir = proc_mkdir("sal", NULL);
	if (!salinfo_dir)
		return 0;

	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
		/* pass the feature bit in question as misc data */
		*sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
						  salinfo_read, (void *)salinfo_entries[i].feature);
	}

	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
		data = salinfo_data + i;
		data->type = i;
		sema_init(&data->sem, 0);
		dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
		if (!dir)
			continue;

		entry = create_proc_entry("event", S_IRUSR, dir);
		if (!entry)
			continue;
		entry->data = data;
		entry->proc_fops = &salinfo_event_fops;
		*sdir++ = entry;

		entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
		if (!entry)
			continue;
		entry->data = data;
		entry->proc_fops = &salinfo_data_fops;
		*sdir++ = entry;

		/* we missed any events before now */
		online = 0;
		for (j = 0; j < NR_CPUS; j++)
			if (cpu_online(j)) {
				set_bit(j, &data->cpu_event);
				++online;
			}
		sema_init(&data->sem, online);

		*sdir++ = dir;
	}

	*sdir++ = salinfo_dir;

	return 0;
}

/* FIXME: Although this source has a module_exit function, the code cannot be
 * built as a module.  mca.c has an unconditional call to salinfo_log_wakeup()
 * which will be unresolved if salinfo.c is a module.
 */
static void __exit
salinfo_exit(void)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(salinfo_proc_entries); i++) {
		if (salinfo_proc_entries[i])
			remove_proc_entry (salinfo_proc_entries[i]->name, NULL);
	}

	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
		vfree(salinfo_data[i].log_buffer);
	}
}

/*
 * 'data' contains an integer that corresponds to the feature we're
 * testing
 */
static int
salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data)
{
	int len = 0;

	MOD_INC_USE_COUNT;

	len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");

	if (len <= off+count) *eof = 1;

	*start = page + off;
	len   -= off;

	if (len>count) len = count;
	if (len<0) len = 0;

	MOD_DEC_USE_COUNT;

	return len;
}

module_init(salinfo_init);
module_exit(salinfo_exit);


^ permalink raw reply	[flat|nested] 8+ messages in thread

* re: Rework arch/ia64/kernel/salinfo.c for 2.4
  2003-10-20 10:47 Rework arch/ia64/kernel/salinfo.c for 2.4 Keith Owens
@ 2003-10-20 14:38 ` Zoltan Menyhart
  2003-10-20 14:53 ` Keith Owens
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Zoltan Menyhart @ 2003-10-20 14:38 UTC (permalink / raw)
  To: linux-ia64

Keith,

I did see an uncorrectable cache error (MCA) and a corrected
memory error (CMC) in a single SAL error log record.
Can you sort out such a case ?

Is there any use to show the log of INIT ?

/* save last 5 records from mca.c, must be < 255 */
struct salinfo_data: struct salinfo_data_saved data_saved[5]; :

It would be much more safe for the MCA stuff to reserve a data
buffer for each CPU. As there is no mutual exclusion with the
MCA handler:
- do not "clear" nor "shift" MCA logs
- the MCA handler can overwrite the buffer of the CPU on which
  it executes
- for the "read <n>" command, you may:
  + calculate a CRC32 of the buffer[n]
  + copy_to_user(buffer[n],...)
  + calculate again the CRC32 of the buffer[n] and restart
    if it is not the same as before

Assuming I've got a CPE, can I read its SAL log on any CPU ?
Can I clear this SAL log on a different CPU ?

If a CMC's SAL log includes some Platform ... Error Info
structures and another CPU can pinch the platform related
error information (and it can clear it too), how can the CPU
causing the error know what has happened ?

Assuming I've got a CMC / CPE, I read its log but I do not clear it.
Assuming I've got another CMC / CPE and I read the log: are the
new / old errors merged ?

Thanks,

Zoltan Menyhart

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Rework arch/ia64/kernel/salinfo.c for 2.4
  2003-10-20 10:47 Rework arch/ia64/kernel/salinfo.c for 2.4 Keith Owens
  2003-10-20 14:38 ` Zoltan Menyhart
@ 2003-10-20 14:53 ` Keith Owens
  2003-10-20 23:38 ` Bjorn Helgaas
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Owens @ 2003-10-20 14:53 UTC (permalink / raw)
  To: linux-ia64

On Mon, 20 Oct 2003 16:38:54 +0200, 
Zoltan Menyhart <Zoltan.Menyhart_AT_bull.net@nospam.org> wrote:
>Keith,
>
>I did see an uncorrectable cache error (MCA) and a corrected
>memory error (CMC) in a single SAL error log record.
>Can you sort out such a case ?

That depends on your SAL implementation.  Does it pass one or two
records to the OS and how does it pass them?  The OS just does what SAL
says.

>Is there any use to show the log of INIT ?

When the kernel is spinning on a disabled spinlock, the only way to get
its attention is to send INIT.  The registers at the time of INIT tell
you where it was spinning and on which lock.

>/* save last 5 records from mca.c, must be < 255 */
>struct salinfo_data: struct salinfo_data_saved data_saved[5]; :
>
>It would be much more safe for the MCA stuff to reserve a data
>buffer for each CPU. As there is no mutual exclusion with the
>MCA handler:

Unless I misread the SAL spec, you can only have one MCA event in the
OS at a time.  MCA rendezvous is a normal interrupt that does not
generate a record.  At the moment the first MCA is catastrophic and
requires a reboot, which means that the MCA record is not picked up
until after the reboot.  If we ever do recovery from MCA then the
interrupt handler will need to be reviewed but without knowing what the
recovery model is, it is premature to code for it.

>- do not "clear" nor "shift" MCA logs
>- the MCA handler can overwrite the buffer of the CPU on which
>  it executes
>- for the "read <n>" command, you may:
>  + calculate a CRC32 of the buffer[n]
>  + copy_to_user(buffer[n],...)
>  + calculate again the CRC32 of the buffer[n] and restart
>    if it is not the same as before

Doing a CRC at "read <n>" time is too late, the CRC would have to be
taken in the interrupt handler.  In any case, the record ID is supposed
to be unique and is the first field in the record.  Checking that the
ID is unchanged after taking a copy is sufficient and is much cheaper
than a CRC check.

>Assuming I've got a CPE, can I read its SAL log on any CPU ?

Reading SAL records has to be done from the same cpu,
SAL_GET_STATE_INFO does not take a cpu parameter.  The code takes care
of that, see salinfo_log_read_cpu().  Once the record has been copied
into user space, you can decode it from anywhere.

>Can I clear this SAL log on a different CPU ?

Same as read, SAL_CLEAR_STATE_INFO does not take a cpu parameter.  See
salinfo_log_clear_cpu().

>If a CMC's SAL log includes some Platform ... Error Info
>structures and another CPU can pinch the platform related
>error information (and it can clear it too), how can the CPU
>causing the error know what has happened ?

All information must be in the record.  Anything not in the record can
be lost.  Remember that some of these records are not extracted from
prom until after a reboot, so any volatile data is lost.

>Assuming I've got a CMC / CPE, I read its log but I do not clear it.
>Assuming I've got another CMC / CPE and I read the log: are the
>new / old errors merged ?

SAL requires you to clear the current log before you can see the next
one.  SAL_GET_STATE_INFO reads the top record of the defined type on
the current cpu.

My rework has not changed any of the SAL requirements or processing,
just the OS code that tracks the records and extracts them to user
space.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Rework arch/ia64/kernel/salinfo.c for 2.4
  2003-10-20 10:47 Rework arch/ia64/kernel/salinfo.c for 2.4 Keith Owens
  2003-10-20 14:38 ` Zoltan Menyhart
  2003-10-20 14:53 ` Keith Owens
@ 2003-10-20 23:38 ` Bjorn Helgaas
  2003-10-21  0:12 ` Keith Owens
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Bjorn Helgaas @ 2003-10-20 23:38 UTC (permalink / raw)
  To: linux-ia64

On Monday 20 October 2003 4:47 am, Keith Owens wrote:
> I have reworked salinfo.c to get a clean separation between the
> interrupt handler that is called from mca.c and the rest of the salinfo
> code that runs in user context.

Thanks for doing all this work.  You've obviously done a lot of work
and I'm still going through it :-)

The only comment/question I have so far is about the log_buffer
management.  You're relying on ia64_sal_get_state_info_size(X)
returning the same thing every time it's called for "X".  I seem
to recall some weaseling on the part of our FW guys, because
they might want to change the size based on hot-plug events.
But I don't think the spec really leaves them that option.

In any case, since we do rely on the size being constant, what
about doing the allocation in sal_log_open()?  Then it could be
removed from salinfo_log_read() and salinfo_log_clear(), and the
open/release paths would be more parallel.

I'm not too sure about the vfree() in salinfo_exit().  As you say,
it can't be built as a module, even if it could be, there should
be some mechanism that prevents the module from being unloaded
while any of the files are open.

Bjorn


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Rework arch/ia64/kernel/salinfo.c for 2.4
  2003-10-20 10:47 Rework arch/ia64/kernel/salinfo.c for 2.4 Keith Owens
                   ` (2 preceding siblings ...)
  2003-10-20 23:38 ` Bjorn Helgaas
@ 2003-10-21  0:12 ` Keith Owens
  2003-10-21 11:49 ` Zoltan Menyhart
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Owens @ 2003-10-21  0:12 UTC (permalink / raw)
  To: linux-ia64

On Mon, 20 Oct 2003 17:38:42 -0600, 
Bjorn Helgaas <bjorn.helgaas@hp.com> wrote:
>On Monday 20 October 2003 4:47 am, Keith Owens wrote:
>> I have reworked salinfo.c to get a clean separation between the
>> interrupt handler that is called from mca.c and the rest of the salinfo
>> code that runs in user context.
>
>Thanks for doing all this work.  You've obviously done a lot of work
>and I'm still going through it :-)
>
>The only comment/question I have so far is about the log_buffer
>management.  You're relying on ia64_sal_get_state_info_size(X)
>returning the same thing every time it's called for "X".  I seem
>to recall some weaseling on the part of our FW guys, because
>they might want to change the size based on hot-plug events.
>But I don't think the spec really leaves them that option.

I was worried about that myself but, as you say, the spec has no room
for changing record sizes.  SAL has to return one maximum size for all
records of that type for this boot.

>In any case, since we do rely on the size being constant, what
>about doing the allocation in sal_log_open()?  Then it could be
>removed from salinfo_log_read() and salinfo_log_clear(), and the
>open/release paths would be more parallel.

Sounds good.

>I'm not too sure about the vfree() in salinfo_exit().  As you say,
>it can't be built as a module, even if it could be, there should
>be some mechanism that prevents the module from being unloaded
>while any of the files are open.

Agreed.  salinfo.c cannot be a module and salinfo_exit() should
disappear.  I left it in until I got a second opinion.


One minor bug, salinfo_log_wakeup() must not call shift1_data_saved()
if data->saved_num is non-zero.  While the user context code is reading
a saved record, the interrupt handler can use a free saved slot but it
cannot change the slots that are in use.

        if (i = saved_size) {  
                if (!data->saved_num) {
                        shift1_data_saved(data, 0);
                        data_saved = data->data_saved + saved_size - 1;
                } else 
                        data_saved = NULL;
        }
        if (data_saved) {
                data_saved->cpu = smp_processor_id();
                data_saved->id = ((sal_log_record_header_t *)buffer)->id;
                data_saved->size = size;
                data_saved->buffer = buffer;
        }



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Rework arch/ia64/kernel/salinfo.c for 2.4
  2003-10-20 10:47 Rework arch/ia64/kernel/salinfo.c for 2.4 Keith Owens
                   ` (3 preceding siblings ...)
  2003-10-21  0:12 ` Keith Owens
@ 2003-10-21 11:49 ` Zoltan Menyhart
  2003-10-21 11:55 ` Zoltan Menyhart
  2003-10-21 12:31 ` Keith Owens
  6 siblings, 0 replies; 8+ messages in thread
From: Zoltan Menyhart @ 2003-10-21 11:49 UTC (permalink / raw)
  To: linux-ia64

> >I did see an uncorrectable cache error (MCA) and a corrected
> >memory error (CMC) in a single SAL error log record.
> >Can you sort out such a case ?
> 
> That depends on your SAL implementation.  Does it pass one or two
> records to the OS and how does it pass them?  The OS just does what SAL
> says.

It was on an Intel's Tiger box. I asked for an MCA SAL log record and
I got a single record including a corrected memory error.
I just wanted to warn you that things happen...

> Unless I misread the SAL spec, you can only have one MCA event in the
> OS at a time. MCA rendezvous is a normal interrupt that does not
> generate a record.  At the moment the first MCA is catastrophic and
> requires a reboot, which means that the MCA record is not picked up
> until after the reboot.  If we ever do recovery from MCA then the
> interrupt handler will need to be reviewed but without knowing what the
> recovery model is, it is premature to code for it.

We are thinking of :-) implementing some MCA recovery.
Two cases have been identified:
- translation register errors
- "consuming" poisoned memory data / uncorrectable memory error
They are local, they can happen physically parallel on more than one CPUs.
We cannot clear the SAL log inside of the OS_MCA handler, because we cannot
save the error log in an MCA context. If we did and if the recovery failed,
we would lose this information.
Whatever synchronization is used (e.g. rendezvous) another CPU can start its
MCA processing in the mean time.
We have to re-fetch the SAL log in a process context later, save it and
clear the SAL log. If there are more than non cleared SAL logs there,
their platform related information can be mixed up - see App. note 11763
page 3-3. 

> >- do not "clear" nor "shift" MCA logs
> >- the MCA handler can overwrite the buffer of the CPU on which
> >  it executes
> >- for the "read <n>" command, you may:
> >  + calculate a CRC32 of the buffer[n]
> >  + copy_to_user(buffer[n],...)
> >  + calculate again the CRC32 of the buffer[n] and restart
> >    if it is not the same as before
> 
> Doing a CRC at "read <n>" time is too late, the CRC would have to be
> taken in the interrupt handler.  In any case, the record ID is supposed
> to be unique and is the first field in the record.  Checking that the
> ID is unchanged after taking a copy is sufficient and is much cheaper
> than a CRC check.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Rework arch/ia64/kernel/salinfo.c for 2.4
  2003-10-20 10:47 Rework arch/ia64/kernel/salinfo.c for 2.4 Keith Owens
                   ` (4 preceding siblings ...)
  2003-10-21 11:49 ` Zoltan Menyhart
@ 2003-10-21 11:55 ` Zoltan Menyhart
  2003-10-21 12:31 ` Keith Owens
  6 siblings, 0 replies; 8+ messages in thread
From: Zoltan Menyhart @ 2003-10-21 11:55 UTC (permalink / raw)
  To: linux-ia64

(SOORRY, THE TAIL OF MY MAIL GOT LOST, HERE IT IS:)

> >- do not "clear" nor "shift" MCA logs
> >- the MCA handler can overwrite the buffer of the CPU on which
> >  it executes
> >- for the "read <n>" command, you may:
> >  + calculate a CRC32 of the buffer[n]
> >  + copy_to_user(buffer[n],...)
> >  + calculate again the CRC32 of the buffer[n] and restart
> >    if it is not the same as before
> 
> Doing a CRC at "read <n>" time is too late, the CRC would have to be
> taken in the interrupt handler.  In any case, the record ID is supposed
> to be unique and is the first field in the record.  Checking that the
> ID is unchanged after taking a copy is sufficient and is much cheaper
> than a CRC check.

If you start copy_to_user(buffer...) and in the mean time there is a
new MCA (recovered successfully), the the buffer gets overwritten.
We cannot lock out the MCA handler. We need to make sure that the copy
does not include e.g. the beginning of a previous MCA and the end of
the current one.

Regards,

Zoltan Menyhart

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Rework arch/ia64/kernel/salinfo.c for 2.4
  2003-10-20 10:47 Rework arch/ia64/kernel/salinfo.c for 2.4 Keith Owens
                   ` (5 preceding siblings ...)
  2003-10-21 11:55 ` Zoltan Menyhart
@ 2003-10-21 12:31 ` Keith Owens
  6 siblings, 0 replies; 8+ messages in thread
From: Keith Owens @ 2003-10-21 12:31 UTC (permalink / raw)
  To: linux-ia64

On Tue, 21 Oct 2003 13:55:07 +0200, 
Zoltan Menyhart <Zoltan.Menyhart_AT_bull.net@nospam.org> wrote:
>(SOORRY, THE TAIL OF MY MAIL GOT LOST, HERE IT IS:)
>
>> >- do not "clear" nor "shift" MCA logs
>> >- the MCA handler can overwrite the buffer of the CPU on which
>> >  it executes
>> >- for the "read <n>" command, you may:
>> >  + calculate a CRC32 of the buffer[n]
>> >  + copy_to_user(buffer[n],...)
>> >  + calculate again the CRC32 of the buffer[n] and restart
>> >    if it is not the same as before
>> 
>> Doing a CRC at "read <n>" time is too late, the CRC would have to be
>> taken in the interrupt handler.  In any case, the record ID is supposed
>> to be unique and is the first field in the record.  Checking that the
>> ID is unchanged after taking a copy is sufficient and is much cheaper
>> than a CRC check.
>
>If you start copy_to_user(buffer...) and in the mean time there is a
>new MCA (recovered successfully), the the buffer gets overwritten.
>We cannot lock out the MCA handler. We need to make sure that the copy
>does not include e.g. the beginning of a previous MCA and the end of
>the current one.

Check the code, I cater for that.  The MCA record is copied from
storage owned by mca.c to a vmalloc() buffer owned by salinfo.c.  Then
salinfo checks that the record ID has not changed.  If it has changed,
the record is discarded.  This happens before the record is copied to
user space.


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2003-10-21 12:31 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-10-20 10:47 Rework arch/ia64/kernel/salinfo.c for 2.4 Keith Owens
2003-10-20 14:38 ` Zoltan Menyhart
2003-10-20 14:53 ` Keith Owens
2003-10-20 23:38 ` Bjorn Helgaas
2003-10-21  0:12 ` Keith Owens
2003-10-21 11:49 ` Zoltan Menyhart
2003-10-21 11:55 ` Zoltan Menyhart
2003-10-21 12:31 ` Keith Owens

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.