linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] more mutlithreaded core dumps for 2.5.18.
@ 2002-05-28 17:46 mgross
  0 siblings, 0 replies; only message in thread
From: mgross @ 2002-05-28 17:46 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Linus Torvalds, Daniel Jacobowitz

This is another patch update for the multithreaded core dump implementation.

This patch is a re-basing of the last version for the 2.5.18 kernel, along with the following changes:
1) updated comment block as recommended by Daniel explaining the removal of down_write/up_write from elf_core_dump.
2) default "core_dumps_threads" now is 1.
3) removed preempt_disable / preempt_enable_no_resched from the tcore_suspend_threads /tcore_resume_threads.  They where not needed.


--mgross

diff -urN -X dontdiff linux-2.5.18/arch/i386/kernel/i387.c linux2518.tcore/arch/i386/kernel/i387.c
--- linux-2.5.18/arch/i386/kernel/i387.c	Fri May 24 21:55:31 2002
+++ linux2518.tcore/arch/i386/kernel/i387.c	Tue May 28 11:37:49 2002
@@ -528,3 +528,36 @@
 
 	return fpvalid;
 }
+
+int dump_task_fpu( struct task_struct *tsk, struct user_i387_struct *fpu )
+{
+	int fpvalid;
+
+	fpvalid = tsk->used_math;
+	if ( fpvalid ) {
+		if (tsk == current) unlazy_fpu( tsk );
+		if ( cpu_has_fxsr ) {
+			copy_fpu_fxsave( tsk, fpu );
+		} else {
+			copy_fpu_fsave( tsk, fpu );
+		}
+	}
+
+	return fpvalid;
+}
+
+int dump_task_extended_fpu( struct task_struct *tsk, struct user_fxsr_struct *fpu )
+{
+	int fpvalid;
+	
+	fpvalid = tsk->used_math && cpu_has_fxsr;
+	if ( fpvalid ) {
+		if (tsk == current) unlazy_fpu( tsk );
+		memcpy( fpu, &tsk->thread.i387.fxsave,
+		sizeof(struct user_fxsr_struct) );
+	}
+	
+	return fpvalid;
+}
+
+
diff -urN -X dontdiff linux-2.5.18/arch/i386/kernel/process.c linux2518.tcore/arch/i386/kernel/process.c
--- linux-2.5.18/arch/i386/kernel/process.c	Fri May 24 21:55:16 2002
+++ linux2518.tcore/arch/i386/kernel/process.c	Tue May 28 11:37:49 2002
@@ -610,6 +610,18 @@
 
 	dump->u_fpvalid = dump_fpu (regs, &dump->i387);
 }
+/* 
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, struct pt_regs *regs)
+{
+	*regs = *(struct pt_regs *)((unsigned long)tsk->thread_info + THREAD_SIZE - sizeof(struct pt_regs));
+	regs->xcs &= 0xffff;
+	regs->xds &= 0xffff;
+	regs->xes &= 0xffff;
+	regs->xss &= 0xffff;
+	return 1;
+}
 
 /*
  * This special macro can be used to load a debugging register
diff -urN -X dontdiff linux-2.5.18/fs/binfmt_elf.c linux2518.tcore/fs/binfmt_elf.c
--- linux-2.5.18/fs/binfmt_elf.c	Fri May 24 21:55:26 2002
+++ linux2518.tcore/fs/binfmt_elf.c	Tue May 28 11:48:18 2002
@@ -13,6 +13,7 @@
 
 #include <linux/fs.h>
 #include <linux/stat.h>
+#include <linux/sched.h>
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -30,6 +31,7 @@
 #include <linux/elfcore.h>
 #include <linux/init.h>
 #include <linux/highuid.h>
+#include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/compiler.h>
 #include <linux/highmem.h>
@@ -958,7 +960,7 @@
 /* #define DEBUG */
 
 #ifdef DEBUG
-static void dump_regs(const char *str, elf_greg_t *r)
+static void dump_regs(const char *str, elf_gregset_t *r)
 {
 	int i;
 	static const char *regs[] = { "ebx", "ecx", "edx", "esi", "edi", "ebp",
@@ -1006,6 +1008,163 @@
 #define DUMP_SEEK(off)	\
 	if (!dump_seek(file, (off))) \
 		goto end_coredump;
+
+static inline void fill_elf_header(struct elfhdr *elf, int segs)
+{
+	memcpy(elf->e_ident, ELFMAG, SELFMAG);
+	elf->e_ident[EI_CLASS] = ELF_CLASS;
+	elf->e_ident[EI_DATA] = ELF_DATA;
+	elf->e_ident[EI_VERSION] = EV_CURRENT;
+	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+
+	elf->e_type = ET_CORE;
+	elf->e_machine = ELF_ARCH;
+	elf->e_version = EV_CURRENT;
+	elf->e_entry = 0;
+	elf->e_phoff = sizeof(struct elfhdr);
+	elf->e_shoff = 0;
+	elf->e_flags = 0;
+	elf->e_ehsize = sizeof(struct elfhdr);
+	elf->e_phentsize = sizeof(struct elf_phdr);
+	elf->e_phnum = segs;
+	elf->e_shentsize = 0;
+	elf->e_shnum = 0;
+	elf->e_shstrndx = 0;
+	return;
+}
+
+static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
+{
+	phdr->p_type = PT_NOTE;
+	phdr->p_offset = offset;
+	phdr->p_vaddr = 0;
+	phdr->p_paddr = 0;
+	phdr->p_filesz = sz;
+	phdr->p_memsz = 0;
+	phdr->p_flags = 0;
+	phdr->p_align = 0;
+	return;
+}
+
+static inline void fill_note(struct memelfnote *note, const char *name, int type, 
+		unsigned int sz, void *data)
+{
+	note->name = name;
+	note->type = type;
+	note->datasz = sz;
+	note->data = data;
+	return;
+}
+
+/*
+ * fill up all the fields in prstatus from the given task struct, except registers
+ * which need to be filled up seperately.
+ */
+static inline void fill_prstatus(struct elf_prstatus *prstatus, struct task_struct *p, long signr) 
+{
+	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
+	prstatus->pr_sigpend = p->pending.signal.sig[0];
+	prstatus->pr_sighold = p->blocked.sig[0];
+	prstatus->pr_pid = p->pid;
+	prstatus->pr_ppid = p->parent->pid;
+	prstatus->pr_pgrp = p->pgrp;
+	prstatus->pr_sid = p->session;
+	prstatus->pr_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
+	prstatus->pr_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
+	prstatus->pr_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
+	prstatus->pr_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
+	prstatus->pr_cutime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
+	prstatus->pr_cutime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
+	prstatus->pr_cstime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
+	prstatus->pr_cstime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
+	return;
+}
+
+static inline void fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p)
+{
+	int i;
+	
+	psinfo->pr_pid = p->pid;
+	psinfo->pr_ppid = p->parent->pid;
+	psinfo->pr_pgrp = p->pgrp;
+	psinfo->pr_sid = p->session;
+
+	i = p->state ? ffz(~p->state) + 1 : 0;
+	psinfo->pr_state = i;
+	psinfo->pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
+	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
+	psinfo->pr_nice =  task_nice(p);
+	psinfo->pr_flag = p->flags;
+	psinfo->pr_uid = NEW_TO_OLD_UID(p->uid);
+	psinfo->pr_gid = NEW_TO_OLD_GID(p->gid);
+	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+	return;
+}
+
+/*
+ * This is the variable that can be set in proc to determine if we want to
+ * dump a multithreaded core or not. A value of 1 means yes while any
+ * other value means no.
+ *
+ * It is located at /proc/sys/kernel/core_dumps_threads
+ */
+extern int core_dumps_threads;
+
+/* Here is the structure in which status of each thread is captured. */
+struct elf_thread_status
+{
+	struct list_head list;
+	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
+	elf_fpregset_t fpu;		/* NT_PRFPREG */
+	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
+	struct memelfnote notes[3];
+	int num_notes;
+};
+
+/*
+ * In order to add the specific thread information for the elf file format,
+ * we need to keep a linked list of every threads pr_status and then
+ * create a single section for them in the final core file.
+ */
+static int elf_dump_thread_status(long signr, struct task_struct * p, struct list_head * thread_list)
+{
+
+	struct elf_thread_status *t;
+	int sz = 0;
+
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
+	if (!t) {
+		printk(KERN_WARNING "Cannot allocate memory for thread status.\n");
+		return 0;
+	}
+
+	INIT_LIST_HEAD(&t->list);
+	t->num_notes = 0;
+
+	fill_prstatus(&t->prstatus, p, signr);
+	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);	
+	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus));
+	t->num_notes++;
+	sz += notesize(&t->notes[0]);
+
+	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, &t->fpu))) {
+		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), &(t->fpu));
+		t->num_notes++;
+		sz += notesize(&t->notes[1]);
+	}
+
+	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
+		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), &(t->xfpu));
+		t->num_notes++;
+		sz += notesize(&t->notes[2]);
+	}
+	
+	list_add(&t->list, thread_list);
+	return sz;
+}
+
+
+
 /*
  * Actual dumper
  *
@@ -1024,12 +1183,25 @@
 	struct elfhdr elf;
 	off_t offset = 0, dataoff;
 	unsigned long limit = current->rlim[RLIMIT_CORE].rlim_cur;
-	int numnote = 4;
-	struct memelfnote notes[4];
+	int numnote = 5;
+	struct memelfnote notes[5];
 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
-	elf_fpregset_t fpu;		/* NT_PRFPREG */
 	struct elf_prpsinfo psinfo;	/* NT_PRPSINFO */
+ 	struct task_struct *p;
+ 	LIST_HEAD(thread_list);
+ 	struct list_head *t;
+	elf_fpregset_t fpu;
+	elf_fpxregset_t xfpu;
+	int dump_threads = core_dumps_threads; /* this value should not change once the */
+					/* dumping starts */
+	int thread_status_size = 0;
+	
 
+	/* First pause all related threaded processes */
+	if (dump_threads)	{
+		tcore_suspend_threads();
+	}
+	
 	/* first copy the parameters from user space */
 	memset(&psinfo, 0, sizeof(psinfo));
 	{
@@ -1047,7 +1219,6 @@
 
 	}
 
-	memset(&prstatus, 0, sizeof(prstatus));
 	/*
 	 * This transfers the registers from regs into the standard
 	 * coredump arrangement, whatever that is.
@@ -1063,9 +1234,44 @@
 	else
 		*(struct pt_regs *)&prstatus.pr_reg = *regs;
 #endif
+ 
+	if (dump_threads) {
+		/* capture the status of all other threads */
+		if (signr) {
+			read_lock(&tasklist_lock);
+			for_each_task(p)
+				if (current->mm == p->mm && current != p) {
+					int sz = elf_dump_thread_status(signr, p, &thread_list);
+					if (!sz) {
+						read_unlock(&tasklist_lock);
+						goto cleanup;
+					}
+					else
+						thread_status_size += sz;
+				}
+			read_unlock(&tasklist_lock);
+		}
+	} /* End if(dump_threads) */
+	
+	memset(&prstatus, 0, sizeof(prstatus));
+	fill_prstatus(&prstatus, current, signr);
+	elf_core_copy_regs(&prstatus.pr_reg, regs);
+	
+	/* We no longer stop all vm operations */
 
-	/* now stop all vm operations */
-	down_write(&current->mm->mmap_sem);
+	/* This because those proceses that could possibly 
+	 * change map_count or the mmap / vma pages are now suspended.
+	 *
+	 * Only ptrace can touch these memory address, but it cannot change
+	 * the map_count or the pages.  So no possibility of crashing exists while dumping
+	 * the mm->vm_next areas to the core file.
+	 *
+	 * Grabbing mmap_sem in this function is risky WRT the use of suspend_threads.
+	 * Although no locks ups have been induced, if one of the suspended threads was 
+	 * in line for the current->mmap_sem and if gets it while on the Phantom runque,
+	 * then we would dead lock in this function if we continue to attempt to down_write
+	 * in this function.
+	 */
 	segs = current->mm->map_count;
 
 #ifdef DEBUG
@@ -1073,25 +1279,7 @@
 #endif
 
 	/* Set up header */
-	memcpy(elf.e_ident, ELFMAG, SELFMAG);
-	elf.e_ident[EI_CLASS] = ELF_CLASS;
-	elf.e_ident[EI_DATA] = ELF_DATA;
-	elf.e_ident[EI_VERSION] = EV_CURRENT;
-	memset(elf.e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
-
-	elf.e_type = ET_CORE;
-	elf.e_machine = ELF_ARCH;
-	elf.e_version = EV_CURRENT;
-	elf.e_entry = 0;
-	elf.e_phoff = sizeof(elf);
-	elf.e_shoff = 0;
-	elf.e_flags = 0;
-	elf.e_ehsize = sizeof(elf);
-	elf.e_phentsize = sizeof(struct elf_phdr);
-	elf.e_phnum = segs+1;		/* Include notes */
-	elf.e_shentsize = 0;
-	elf.e_shnum = 0;
-	elf.e_shstrndx = 0;
+	fill_elf_header(&elf, segs+1); /* including notes section*/
 
 	fs = get_fs();
 	set_fs(KERNEL_DS);
@@ -1108,64 +1296,31 @@
 	 * with info from their /proc.
 	 */
 
-	notes[0].name = "CORE";
-	notes[0].type = NT_PRSTATUS;
-	notes[0].datasz = sizeof(prstatus);
-	notes[0].data = &prstatus;
-	prstatus.pr_info.si_signo = prstatus.pr_cursig = signr;
-	prstatus.pr_sigpend = current->pending.signal.sig[0];
-	prstatus.pr_sighold = current->blocked.sig[0];
-	psinfo.pr_pid = prstatus.pr_pid = current->pid;
-	psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
-	psinfo.pr_pgrp = prstatus.pr_pgrp = current->pgrp;
-	psinfo.pr_sid = prstatus.pr_sid = current->session;
-	prstatus.pr_utime.tv_sec = CT_TO_SECS(current->times.tms_utime);
-	prstatus.pr_utime.tv_usec = CT_TO_USECS(current->times.tms_utime);
-	prstatus.pr_stime.tv_sec = CT_TO_SECS(current->times.tms_stime);
-	prstatus.pr_stime.tv_usec = CT_TO_USECS(current->times.tms_stime);
-	prstatus.pr_cutime.tv_sec = CT_TO_SECS(current->times.tms_cutime);
-	prstatus.pr_cutime.tv_usec = CT_TO_USECS(current->times.tms_cutime);
-	prstatus.pr_cstime.tv_sec = CT_TO_SECS(current->times.tms_cstime);
-	prstatus.pr_cstime.tv_usec = CT_TO_USECS(current->times.tms_cstime);
+	fill_note(&notes[0], "CORE", NT_PRSTATUS, sizeof(prstatus), &prstatus);
+ 	
+	fill_psinfo(&psinfo, current);
+	fill_note(&notes[1], "CORE", NT_PRPSINFO, sizeof(psinfo), &psinfo);
+	
+	fill_note(&notes[2], "CORE", NT_TASKSTRUCT, sizeof(*current), current);
+  
+  	/* Try to dump the FPU. */
+	if ((prstatus.pr_fpvalid = elf_core_copy_task_fpregs(current, &fpu))) {
+		fill_note(&notes[3], "CORE", NT_PRFPREG, sizeof(fpu), &fpu);
+	} else {
+		--numnote;
+ 	}
+	if (elf_core_copy_task_xfpregs(current, &xfpu)) {
+		fill_note(&notes[4], "LINUX", NT_PRXFPREG, sizeof(xfpu), &xfpu);
+	} else {
+		--numnote;
+	}
+  	
 
 #ifdef DEBUG
 	dump_regs("Passed in regs", (elf_greg_t *)regs);
 	dump_regs("prstatus regs", (elf_greg_t *)&prstatus.pr_reg);
 #endif
 
-	notes[1].name = "CORE";
-	notes[1].type = NT_PRPSINFO;
-	notes[1].datasz = sizeof(psinfo);
-	notes[1].data = &psinfo;
-	i = current->state ? ffz(~current->state) + 1 : 0;
-	psinfo.pr_state = i;
-	psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
-	psinfo.pr_zomb = psinfo.pr_sname == 'Z';
-	psinfo.pr_nice = task_nice(current);
-	psinfo.pr_flag = current->flags;
-	psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
-	psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
-	strncpy(psinfo.pr_fname, current->comm, sizeof(psinfo.pr_fname));
-
-	notes[2].name = "CORE";
-	notes[2].type = NT_TASKSTRUCT;
-	notes[2].datasz = sizeof(*current);
-	notes[2].data = current;
-
-	/* Try to dump the FPU. */
-	prstatus.pr_fpvalid = dump_fpu (regs, &fpu);
-	if (!prstatus.pr_fpvalid)
-	{
-		numnote--;
-	}
-	else
-	{
-		notes[3].name = "CORE";
-		notes[3].type = NT_PRFPREG;
-		notes[3].datasz = sizeof(fpu);
-		notes[3].data = &fpu;
-	}
-	
 	/* Write notes phdr entry */
 	{
 		struct elf_phdr phdr;
@@ -1173,17 +1328,12 @@
 
 		for(i = 0; i < numnote; i++)
 			sz += notesize(&notes[i]);
+		
+		if (dump_threads)
+			sz += thread_status_size;
 
-		phdr.p_type = PT_NOTE;
-		phdr.p_offset = offset;
-		phdr.p_vaddr = 0;
-		phdr.p_paddr = 0;
-		phdr.p_filesz = sz;
-		phdr.p_memsz = 0;
-		phdr.p_flags = 0;
-		phdr.p_align = 0;
-
-		offset += phdr.p_filesz;
+		fill_elf_note_phdr(&phdr, sz, offset);
+		offset += sz;
 		DUMP_WRITE(&phdr, sizeof(phdr));
 	}
 
@@ -1212,10 +1362,21 @@
 		DUMP_WRITE(&phdr, sizeof(phdr));
 	}
 
+ 	/* write out the notes section */
 	for(i = 0; i < numnote; i++)
 		if (!writenote(&notes[i], file))
 			goto end_coredump;
 
+	/* write out the thread status notes section */
+ 	if (dump_threads)  {
+		list_for_each(t, &thread_list) {
+			struct elf_thread_status *tmp = list_entry(t, struct elf_thread_status, list);
+			for (i = 0; i < tmp->num_notes; i++)
+				if (!writenote(&tmp->notes[i], file))
+					goto end_coredump;
+		}
+ 	}
+ 
 	DUMP_SEEK(dataoff);
 
 	for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
@@ -1259,11 +1420,23 @@
 		       (off_t) file->f_pos, offset);
 	}
 
- end_coredump:
+end_coredump:
 	set_fs(fs);
-	up_write(&current->mm->mmap_sem);
+
+cleanup:
+	if (dump_threads)  {
+		while(!list_empty(&thread_list)) {
+			struct list_head *tmp = thread_list.next;
+			list_del(tmp);
+			kfree(list_entry(tmp, struct elf_thread_status, list));
+		}
+
+		tcore_resume_threads();
+	}
+
 	return has_dumped;
 }
+
 #endif		/* USE_ELF_CORE_DUMP */
 
 static int __init init_elf_binfmt(void)
diff -urN -X dontdiff linux-2.5.18/include/asm-i386/elf.h linux2518.tcore/include/asm-i386/elf.h
--- linux-2.5.18/include/asm-i386/elf.h	Fri May 24 21:55:29 2002
+++ linux2518.tcore/include/asm-i386/elf.h	Tue May 28 11:54:05 2002
@@ -99,6 +99,16 @@
 
 #ifdef __KERNEL__
 #define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX)
+
+
+extern int dump_task_regs (struct task_struct *, struct pt_regs *);
+extern int dump_task_fpu (struct task_struct *, struct user_i387_struct *);
+extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct *);
+
+#define ELF_CORE_COPY_TASK_REGS(tsk, pt_regs) dump_task_regs(tsk, pt_regs)
+#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
+#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs)
+
 #endif
 
 #endif
diff -urN -X dontdiff linux-2.5.18/include/linux/elf.h linux2518.tcore/include/linux/elf.h
--- linux-2.5.18/include/linux/elf.h	Fri May 24 21:55:20 2002
+++ linux2518.tcore/include/linux/elf.h	Tue May 28 11:54:53 2002
@@ -576,6 +576,9 @@
 #define NT_PRPSINFO	3
 #define NT_TASKSTRUCT	4
 #define NT_PRFPXREG	20
+#define NT_PRXFPREG     0x46e62b7f	/* note name must be "LINUX" as per GDB */
+					/* from gdb5.1/include/elf/common.h */
+
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
diff -urN -X dontdiff linux-2.5.18/include/linux/elfcore.h linux2518.tcore/include/linux/elfcore.h
--- linux-2.5.18/include/linux/elfcore.h	Fri May 24 21:55:26 2002
+++ linux2518.tcore/include/linux/elfcore.h	Tue May 28 11:54:53 2002
@@ -86,4 +86,55 @@
 #define PRARGSZ ELF_PRARGSZ 
 #endif
 
+#ifdef __KERNEL__
+static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
+{
+#ifdef ELF_CORE_COPY_REGS
+	ELF_CORE_COPY_REGS((*elfregs), regs)
+#else
+	if (sizeof(elf_gregset_t) != sizeof(struct pt_regs)) {
+		printk("sizeof(elf_gregset_t) (%ld) != sizeof(struct pt_regs) (%ld)\n",
+			(long)sizeof(elf_gregset_t), (long)sizeof(struct pt_regs));
+	} else
+		*(struct pt_regs *)elfregs = *regs;
+#endif
+}
+
+static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t *elfregs)
+{
+#ifdef ELF_CORE_COPY_TASK_REGS
+	struct pt_regs regs;
+	
+	if (ELF_CORE_COPY_TASK_REGS(t, &regs)) {
+		elf_core_copy_regs(elfregs, &regs);
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
+
+static inline int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu)
+{
+#ifdef ELF_CORE_COPY_FPREGS
+	return ELF_CORE_COPY_FPREGS(t, fpu);
+#else
+	return dump_fpu(NULL, fpu);
+#endif
+}
+
+static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
+{
+#ifdef ELF_CORE_COPY_XFPREGS
+	return ELF_CORE_COPY_XFPREGS(t, xfpu);
+#else
+	return 0;
+#endif
+}
+
+
+#endif /* __KERNEL__ */
+
+
 #endif /* _LINUX_ELFCORE_H */
diff -urN -X dontdiff linux-2.5.18/include/linux/sched.h linux2518.tcore/include/linux/sched.h
--- linux-2.5.18/include/linux/sched.h	Fri May 24 21:55:19 2002
+++ linux2518.tcore/include/linux/sched.h	Tue May 28 11:52:48 2002
@@ -131,6 +131,14 @@
 
 #include <linux/spinlock.h>
 
+
+/* functions for pausing and resumming functions 
+ * common mm's without using signals.  These are used
+ * by the multithreaded elf core dump code in binfmt_elf.c*/
+extern void tcore_suspend_threads( void );
+extern void tcore_resume_threads( void );
+
+
 /*
  * This serializes "schedule()" and also protects
  * the run-queue from deletions/modifications (but
diff -urN -X dontdiff linux-2.5.18/include/linux/sysctl.h linux2518.tcore/include/linux/sysctl.h
--- linux-2.5.18/include/linux/sysctl.h	Fri May 24 21:55:17 2002
+++ linux2518.tcore/include/linux/sysctl.h	Tue May 28 11:52:48 2002
@@ -87,6 +87,7 @@
 	KERN_CAP_BSET=14,	/* int: capability bounding set */
 	KERN_PANIC=15,		/* int: panic timeout */
 	KERN_REALROOTDEV=16,	/* real root device to mount after initrd */
+	KERN_CORE_DUMPS_THREADS=17, /* int: include status of others threads in dump */
 
 	KERN_SPARC_REBOOT=21,	/* reboot command on Sparc */
 	KERN_CTLALTDEL=22,	/* int: allow ctl-alt-del to reboot */
diff -urN -X dontdiff linux-2.5.18/kernel/sched.c linux2518.tcore/kernel/sched.c
--- linux-2.5.18/kernel/sched.c	Fri May 24 21:55:22 2002
+++ linux2518.tcore/kernel/sched.c	Tue May 28 11:48:17 2002
@@ -140,7 +140,8 @@
 	list_t migration_queue;
 } ____cacheline_aligned;
 
-static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
+static struct runqueue runqueues[NR_CPUS + 1] __cacheline_aligned;
+#define PHANTOM_CPU NR_CPUS
 
 #define cpu_rq(cpu)		(runqueues + (cpu))
 #define this_rq()		cpu_rq(smp_processor_id())
@@ -249,6 +250,9 @@
 #ifdef CONFIG_SMP
 	int need_resched, nrpolling;
 
+	if( unlikely(!p->cpus_allowed) )
+			return;
+			
 	preempt_disable();
 	/* minimise the chance of sending an interrupt to poll_idle() */
 	nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
@@ -325,7 +329,7 @@
 	p->state = TASK_RUNNING;
 	if (!p->array) {
 		activate_task(p, rq);
-		if (p->prio < rq->curr->prio)
+		if (p->cpus_allowed && (p->prio < rq->curr->prio) )
 			resched_task(rq->curr);
 		success = 1;
 	}
@@ -982,6 +986,133 @@
 
 void scheduling_functions_end_here(void) { }
 
+/*
+ * needed for accurate core dumps of multi-threaded applications.
+ * see binfmt_elf.c for more information.
+ */
+static void reschedule_other_cpus(void)
+{
+#ifdef CONFIG_SMP
+	int i, cpu;
+	struct task_struct *p;
+
+	for(i=0; i< smp_num_cpus; i++) {
+		cpu = cpu_logical_map(i);
+		p = cpu_curr(cpu);
+		if (p->thread_info->cpu!= smp_processor_id()) {
+			set_tsk_need_resched(p);
+			smp_send_reschedule(p->thread_info->cpu);
+		}
+	}
+#endif	
+	return;
+}
+
+
+/* functions for pausing and resumming functions with out using signals */
+void tcore_suspend_threads(void)
+{
+	unsigned long flags;
+	runqueue_t *phantomQ;
+	task_t *threads[NR_CPUS], *p;
+	int i, OnCPUCount = 0;
+
+//
+// grab all the rq_locks.
+// current is the process dumping core
+//  
+
+	down_write(&current->mm->mmap_sem);
+	// avoid potential race on >2 way SMP if 3 or more thread procs
+	// dump core at the same time.
+	
+	local_irq_save(flags);
+
+	for(i=0; i< smp_num_cpus; i++) {
+		spin_lock(&cpu_rq(i)->lock);
+	}
+
+	current->cpus_allowed = 1UL << current->thread_info->cpu;
+	// prevent migraion of dumping process making life complicated.
+
+	phantomQ = cpu_rq(PHANTOM_CPU); 
+	spin_lock(&phantomQ->lock);
+	
+	reschedule_other_cpus();
+	// this is an optional IPI, but it makes for the most accurate core files possible.
+	
+	read_lock(&tasklist_lock);
+
+	for_each_task(p) {
+		if (current->mm == p->mm && current != p) {
+			if( p == task_rq(p)->curr ) {
+				//then remember it for later us of set_cpus_allowed
+				threads[OnCPUCount] = p;
+				p->cpus_allowed = 0;//prevent load balance from moving these guys.
+				OnCPUCount ++;
+			} else {
+				// we manualy move the process to the phantom run queue.
+
+				if (p->array) {
+					deactivate_task(p, task_rq(p));
+					activate_task(p, phantomQ);
+				}
+				p->thread_info->cpu = PHANTOM_CPU;
+				p->cpus_allowed = 0;//prevent load balance from moving these guys.
+			}
+		}
+	}
+	read_unlock(&tasklist_lock);
+
+	spin_unlock(&phantomQ->lock);
+	for(i=smp_num_cpus-1; 0<= i; i--) {
+		spin_unlock(&cpu_rq(i)->lock);
+	}
+
+	local_irq_restore(flags);
+
+	for( i = 0; i<OnCPUCount; i++) {
+		set_cpus_allowed(threads[i], 0);
+	}
+	
+	up_write(&current->mm->mmap_sem);
+}
+
+void tcore_resume_threads(void)
+{
+	unsigned long flags;
+	runqueue_t *phantomQ;
+	task_t *p;
+	int i;
+
+	local_irq_save(flags);
+	phantomQ = cpu_rq(PHANTOM_CPU);
+
+	spin_lock(&task_rq(current)->lock);
+	spin_lock(&phantomQ->lock);
+	
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if (current->mm == p->mm && current != p) {
+			p->cpus_allowed = 1UL << current->thread_info->cpu;
+			if (p->array) {
+				deactivate_task(p,phantomQ);
+				activate_task(p, task_rq(current));
+			}
+			p->thread_info->cpu = current->thread_info->cpu;
+		}
+	}
+
+	read_unlock(&tasklist_lock);
+
+	spin_unlock(&phantomQ->lock);
+	spin_unlock(&task_rq(current)->lock);
+
+	local_irq_restore(flags);
+}
+
+
+
 void set_user_nice(task_t *p, long nice)
 {
 	unsigned long flags;
@@ -1568,11 +1699,11 @@
 {
 	runqueue_t *rq;
 	int i, j, k;
+	prio_array_t *array;
 
-	for (i = 0; i < NR_CPUS; i++) {
-		runqueue_t *rq = cpu_rq(i);
-		prio_array_t *array;
 
+	for (i = 0; i < NR_CPUS; i++) {
+		rq = cpu_rq(i);
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
 		spin_lock_init(&rq->lock);
@@ -1589,6 +1720,28 @@
 			__set_bit(MAX_PRIO, array->bitmap);
 		}
 	}
+
+ 
+	i = PHANTOM_CPU;
+	rq = cpu_rq(i);
+	rq->active = rq->arrays;
+	rq->expired = rq->arrays + 1;
+	rq->curr = NULL;
+	spin_lock_init(&rq->lock);
+	spin_lock_init(&rq->frozen);
+	INIT_LIST_HEAD(&rq->migration_queue);
+
+	for (j = 0; j < 2; j++) {
+		array = rq->arrays + j;
+		for (k = 0; k < MAX_PRIO; k++) {
+			INIT_LIST_HEAD(array->queue + k);
+			__clear_bit(k, array->bitmap);
+		}
+		// delimiter for bitsearch
+		__set_bit(MAX_PRIO, array->bitmap);
+	}
+
+
 	/*
 	 * We have to do a little magic to get the first
 	 * process right in SMP mode.
@@ -1648,9 +1801,11 @@
 	migration_req_t req;
 	runqueue_t *rq;
 
-	new_mask &= cpu_online_map;
-	if (!new_mask)
-		BUG();
+	if(new_mask){ // can be O for TCore process suspends
+		new_mask &= cpu_online_map;
+		if (!new_mask)
+			BUG();
+	}
 
 	preempt_disable();
 	rq = task_rq_lock(p, &flags);
@@ -1723,7 +1878,12 @@
 		spin_unlock_irqrestore(&rq->lock, flags);
 
 		p = req->task;
-		cpu_dest = __ffs(p->cpus_allowed);
+
+		if( p->cpus_allowed)
+			cpu_dest = __ffs(p->cpus_allowed);
+		else
+			cpu_dest = PHANTOM_CPU;
+
 		rq_dest = cpu_rq(cpu_dest);
 repeat:
 		cpu_src = p->thread_info->cpu;
diff -urN -X dontdiff linux-2.5.18/kernel/sysctl.c linux2518.tcore/kernel/sysctl.c
--- linux-2.5.18/kernel/sysctl.c	Fri May 24 21:55:19 2002
+++ linux2518.tcore/kernel/sysctl.c	Tue May 28 13:17:00 2002
@@ -38,6 +38,8 @@
 #include <linux/nfs_fs.h>
 #endif
 
+int core_dumps_threads = 1;
+
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -171,7 +173,9 @@
 	 0644, NULL, &proc_dointvec},
 	{KERN_TAINTED, "tainted", &tainted, sizeof(int),
 	 0644, NULL, &proc_dointvec},
-	{KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
+	{KERN_CORE_DUMPS_THREADS, "core_dumps_threads", &core_dumps_threads, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	 {KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
 	 0600, NULL, &proc_dointvec_bset},
 #ifdef CONFIG_BLK_DEV_INITRD
 	{KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int),


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2002-05-28 20:41 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-05-28 17:46 [PATCH] more mutlithreaded core dumps for 2.5.18 mgross

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).