All of lore.kernel.org
 help / color / mirror / Atom feed
* + binfmt-introduce-coredump-parameter-structure.patch added to -mm tree
@ 2009-11-20 22:12 akpm
  2009-11-26  8:53 ` KOSAKI Motohiro
  0 siblings, 1 reply; 28+ messages in thread
From: akpm @ 2009-11-20 22:12 UTC (permalink / raw)
  To: mm-commits
  Cc: mhiramat, dhowells, hidehiro.kawai.ez, lethal, mingo, oleg,
	roland, vapier


The patch titled
     binfmt: introduce coredump parameter structure
has been added to the -mm tree.  Its filename is
     binfmt-introduce-coredump-parameter-structure.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
out what to do about this

The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/

------------------------------------------------------
Subject: binfmt: introduce coredump parameter structure
From: Masami Hiramatsu <mhiramat@redhat.com>

These patches are for fixing coredump mm->flags consistency issue.

---
1787         if (mm->core_state || !get_dumpable(mm)) {  <- (1)
1788                 up_write(&mm->mmap_sem);
1789                 put_cred(cred);
1790                 goto fail;
1791         }
1792
[...]
1798         if (get_dumpable(mm) == 2) {    /* Setuid core dump mode */ <-(2)
1799                 flag = O_EXCL;          /* Stop rewrite attacks */
1800                 cred->fsuid = 0;        /* Dump root private */
1801         }
---

Since dumpable bits are not protected by lock, there is a chance to change
these bits between (1) and (2).

To solve this issue, this patch copies mm->flags to
coredump_params.mm_flags at the beginning of do_coredump() and uses it
instead of get_dumpable() while dumping core.  This series also introduce
coredump parameter structure for simplify bimfmt->core_dump interface.



This patch:

Introduce coredump parameter data structure (struct coredump_params) for
simplifying binfmt->core_dump() arguments.  This also cleanup DUMP_WRITE()
in elf_core_dump() by style issue.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/binfmt_aout.c        |   13 +++++++------
 fs/binfmt_elf.c         |   38 ++++++++++++++++++++++----------------
 fs/binfmt_elf_fdpic.c   |   28 ++++++++++++++--------------
 fs/binfmt_flat.c        |    6 +++---
 fs/binfmt_som.c         |    2 +-
 fs/exec.c               |   38 +++++++++++++++++++++-----------------
 include/linux/binfmts.h |   10 +++++++++-
 7 files changed, 77 insertions(+), 58 deletions(-)

diff -puN fs/binfmt_aout.c~binfmt-introduce-coredump-parameter-structure fs/binfmt_aout.c
--- a/fs/binfmt_aout.c~binfmt-introduce-coredump-parameter-structure
+++ a/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
 
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt aout_format = {
 	.module		= THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
  * dumping of the process results in another error..
  */
 
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
 {
+	struct file *file = cprm->file;
 	mm_segment_t fs;
 	int has_dumped = 0;
 	unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, st
 	current->flags |= PF_DUMPCORE;
        	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
 	dump.u_ar0 = offsetof(struct user, regs);
-	dump.signal = signr;
-	aout_dump_thread(regs, &dump);
+	dump.signal = cprm->signr;
+	aout_dump_thread(cprm->regs, &dump);
 
 /* If the size of the dump file exceeds the rlimit, then see what would happen
    if we wrote the stack, but not the data area.  */
-	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
 		dump.u_dsize = 0;
 
 /* Make sure we have enough room to write the stack and data areas. */
-	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
 		dump.u_ssize = 0;
 
 /* make sure we actually have a data and stack area to dump */
diff -puN fs/binfmt_elf.c~binfmt-introduce-coredump-parameter-structure fs/binfmt_elf.c
--- a/fs/binfmt_elf.c~binfmt-introduce-coredump-parameter-structure
+++ a/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file
  * don't even try.
  */
 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
 #else
 #define elf_core_dump	NULL
 #endif
@@ -1277,10 +1277,6 @@ static int writenote(struct memelfnote *
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
-		goto end_coredump;
-
 static void fill_elf_header(struct elfhdr *elf, int segs,
 			    u16 machine, u32 flags, u8 osabi)
 {
@@ -1906,7 +1902,7 @@ static struct vm_area_struct *next_vma(s
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
 	mm_segment_t fs;
@@ -1952,7 +1948,7 @@ static int elf_core_dump(long signr, str
 	 * notes.  This also sets up the file header.
 	 */
 	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, signr, regs))
+			    &info, cprm->signr, cprm->regs))
 		goto cleanup;
 
 	has_dumped = 1;
@@ -1961,7 +1957,10 @@ static int elf_core_dump(long signr, str
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 
-	DUMP_WRITE(elf, sizeof(*elf));
+	size += sizeof(*elf);
+	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+		goto end_coredump;
+
 	offset += sizeof(*elf);				/* Elf header */
 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
 	foffset = offset;
@@ -1975,7 +1974,10 @@ static int elf_core_dump(long signr, str
 
 		fill_elf_note_phdr(&phdr, sz, offset);
 		offset += sz;
-		DUMP_WRITE(&phdr, sizeof(phdr));
+		size += sizeof(phdr);
+		if (size > cprm->limit ||
+		    !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
@@ -2006,7 +2008,10 @@ static int elf_core_dump(long signr, str
 			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
-		DUMP_WRITE(&phdr, sizeof(phdr));
+		size += sizeof(phdr);
+		if (size > cprm->limit ||
+		    !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
@@ -2014,14 +2019,14 @@ static int elf_core_dump(long signr, str
 #endif
 
  	/* write out the notes section */
-	if (!write_note_info(&info, file, &foffset))
+	if (!write_note_info(&info, cprm->file, &foffset))
 		goto end_coredump;
 
-	if (elf_coredump_extra_notes_write(file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - foffset))
 		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2038,12 +2043,13 @@ static int elf_core_dump(long signr, str
 			page = get_dump_page(addr);
 			if (page) {
 				void *kaddr = kmap(page);
-				stop = ((size += PAGE_SIZE) > limit) ||
-					!dump_write(file, kaddr, PAGE_SIZE);
+				stop = ((size += PAGE_SIZE) > cprm->limit) ||
+					!dump_write(cprm->file, kaddr,
+						    PAGE_SIZE);
 				kunmap(page);
 				page_cache_release(page);
 			} else
-				stop = !dump_seek(file, PAGE_SIZE);
+				stop = !dump_seek(cprm->file, PAGE_SIZE);
 			if (stop)
 				goto end_coredump;
 		}
diff -puN fs/binfmt_elf_fdpic.c~binfmt-introduce-coredump-parameter-structure fs/binfmt_elf_fdpic.c
--- a/fs/binfmt_elf_fdpic.c~binfmt-introduce-coredump-parameter-structure
+++ a/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_
 					     struct file *, struct mm_struct *);
 
 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
 #endif
 
 static struct linux_binfmt elf_fdpic_format = {
@@ -1582,8 +1582,7 @@ static int elf_fdpic_dump_segments(struc
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
-			       struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
 {
 #define	NUM_NOTES	6
 	int has_dumped = 0;
@@ -1642,7 +1641,7 @@ static int elf_fdpic_core_dump(long sign
 		goto cleanup;
 #endif
 
-	if (signr) {
+	if (cprm->signr) {
 		struct core_thread *ct;
 		struct elf_thread_status *tmp;
 
@@ -1661,14 +1660,14 @@ static int elf_fdpic_core_dump(long sign
 			int sz;
 
 			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(signr, tmp);
+			sz = elf_dump_thread_status(cprm->signr, tmp);
 			thread_status_size += sz;
 		}
 	}
 
 	/* now collect the dump for the current */
-	fill_prstatus(prstatus, current, signr);
-	elf_core_copy_regs(&prstatus->pr_reg, regs);
+	fill_prstatus(prstatus, current, cprm->signr);
+	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
 
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
@@ -1703,7 +1702,7 @@ static int elf_fdpic_core_dump(long sign
 
   	/* Try to dump the FPU. */
 	if ((prstatus->pr_fpvalid =
-	     elf_core_copy_task_fpregs(current, regs, fpu)))
+	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
 		fill_note(notes + numnote++,
 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
 #ifdef ELF_CORE_COPY_XFPREGS
@@ -1774,7 +1773,7 @@ static int elf_fdpic_core_dump(long sign
 
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, file))
+		if (!writenote(notes + i, cprm->file))
 			goto end_coredump;
 
 	/* write out the thread status notes section */
@@ -1783,25 +1782,26 @@ static int elf_fdpic_core_dump(long sign
 				list_entry(t, struct elf_thread_status, list);
 
 		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file))
+			if (!writenote(&tmp->notes[i], cprm->file))
 				goto end_coredump;
 	}
 
-	if (!dump_seek(file, dataoff))
+	if (!dump_seek(cprm->file, dataoff))
 		goto end_coredump;
 
-	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+				    mm_flags) < 0)
 		goto end_coredump;
 
 #ifdef ELF_CORE_WRITE_EXTRA_DATA
 	ELF_CORE_WRITE_EXTRA_DATA;
 #endif
 
-	if (file->f_pos != offset) {
+	if (cprm->file->f_pos != offset) {
 		/* Sanity check */
 		printk(KERN_WARNING
 		       "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
-		       file->f_pos, offset);
+		       cprm->file->f_pos, offset);
 	}
 
 end_coredump:
diff -puN fs/binfmt_flat.c~binfmt-introduce-coredump-parameter-structure fs/binfmt_flat.c
--- a/fs/binfmt_flat.c~binfmt-introduce-coredump-parameter-structure
+++ a/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int 
 #endif
 
 static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt flat_format = {
 	.module		= THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format =
  * Currently only a stub-function.
  */
 
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
 {
 	printk("Process %s:%d received signr %d and should have core dumped\n",
-			current->comm, current->pid, (int) signr);
+			current->comm, current->pid, (int) cprm->signr);
 	return(1);
 }
 
diff -puN fs/binfmt_som.c~binfmt-introduce-coredump-parameter-structure fs/binfmt_som.c
--- a/fs/binfmt_som.c~binfmt-introduce-coredump-parameter-structure
+++ a/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file 
  * don't even try.
  */
 #if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
 #else
 #define som_core_dump	NULL
 #endif
diff -puN fs/exec.c~binfmt-introduce-coredump-parameter-structure fs/exec.c
--- a/fs/exec.c~binfmt-introduce-coredump-parameter-structure
+++ a/fs/exec.c
@@ -1761,17 +1761,20 @@ void do_coredump(long signr, int exit_co
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
-	struct file * file;
 	const struct cred *old_cred;
 	struct cred *cred;
 	int retval = 0;
 	int flag = 0;
 	int ispipe = 0;
-	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
 	char **helper_argv = NULL;
 	int helper_argc = 0;
 	int dump_count = 0;
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
+	struct coredump_params cprm = {
+		.signr = signr,
+		.regs = regs,
+		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+	};
 
 	audit_core_dumps(signr);
 
@@ -1827,15 +1830,15 @@ void do_coredump(long signr, int exit_co
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (core_limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
 		goto fail_unlock;
 
  	if (ispipe) {
-		if (core_limit == 0) {
+		if (cprm.limit == 0) {
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
-			 * core_limit of 0 here as a speacial value. Any
+			 * cprm.limit of 0 here as a speacial value. Any
 			 * non-zero limit gets set to RLIM_INFINITY below, but
 			 * a limit of 0 skips the dump.  This is a consistent
 			 * way to catch recursive crashes.  We can still crash
@@ -1868,25 +1871,25 @@ void do_coredump(long signr, int exit_co
 			goto fail_dropcount;
 		}
 
-		core_limit = RLIM_INFINITY;
+		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
 		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&file)) {
+				&cprm.file)) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
  		}
  	} else
- 		file = filp_open(corename,
+		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(file))
+	if (IS_ERR(cprm.file))
 		goto fail_dropcount;
-	inode = file->f_path.dentry->d_inode;
+	inode = cprm.file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(file->f_path.dentry))
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
 		goto close_fail;
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1899,21 +1902,22 @@ void do_coredump(long signr, int exit_co
 	 */
 	if (inode->i_uid != current_fsuid())
 		goto close_fail;
-	if (!file->f_op)
+	if (!cprm.file->f_op)
 		goto close_fail;
-	if (!file->f_op->write)
+	if (!cprm.file->f_op->write)
 		goto close_fail;
-	if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+	if (!ispipe &&
+	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
 		goto close_fail;
 
-	retval = binfmt->core_dump(signr, regs, file, core_limit);
+	retval = binfmt->core_dump(&cprm);
 
 	if (retval)
 		current->signal->group_exit_code |= 0x80;
 close_fail:
 	if (ispipe && core_pipe_limit)
-		wait_for_dump_helpers(file);
-	filp_close(file, NULL);
+		wait_for_dump_helpers(cprm.file);
+	filp_close(cprm.file, NULL);
 fail_dropcount:
 	if (dump_count)
 		atomic_dec(&core_dump_count);
diff -puN include/linux/binfmts.h~binfmt-introduce-coredump-parameter-structure include/linux/binfmts.h
--- a/include/linux/binfmts.h~binfmt-introduce-coredump-parameter-structure
+++ a/include/linux/binfmts.h
@@ -68,6 +68,14 @@ struct linux_binprm{
 
 #define BINPRM_MAX_RECURSION 4
 
+/* Function parameter for binfmt->coredump */
+struct coredump_params {
+	long signr;
+	struct pt_regs *regs;
+	struct file *file;
+	unsigned long limit;
+};
+
 /*
  * This structure defines the functions that are used to load the binary formats that
  * linux accepts.
@@ -77,7 +85,7 @@ struct linux_binfmt {
 	struct module *module;
 	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
 	int (*load_shlib)(struct file *);
-	int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
 	int hasvdso;
 };
_

Patches currently in -mm which might be from mhiramat@redhat.com are

linux-next.patch
binfmt-introduce-coredump-parameter-structure.patch
binfmt-pass-mm-flags-as-a-coredump-parameter-for-consistency.patch


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: + binfmt-introduce-coredump-parameter-structure.patch added to -mm tree
  2009-11-20 22:12 + binfmt-introduce-coredump-parameter-structure.patch added to -mm tree akpm
@ 2009-11-26  8:53 ` KOSAKI Motohiro
  2009-11-26 15:50   ` Masami Hiramatsu
  0 siblings, 1 reply; 28+ messages in thread
From: KOSAKI Motohiro @ 2009-11-26  8:53 UTC (permalink / raw)
  To: linux-kernel, mhiramat
  Cc: kosaki.motohiro, dhowells, hidehiro.kawai.ez, lethal, mingo,
	oleg, roland, vapier

Hi Hiramatsu-san,

> 
> The patch titled
>      binfmt: introduce coredump parameter structure
> has been added to the -mm tree.  Its filename is
>      binfmt-introduce-coredump-parameter-structure.patch
> 
> Before you just go and hit "reply", please:
>    a) Consider who else should be cc'ed
>    b) Prefer to cc a suitable mailing list as well
>    c) Ideally: find the original patch on the mailing list and do a
>       reply-to-all to that, adding suitable additional cc's
> 
> *** Remember to use Documentation/SubmitChecklist when testing your code ***
> 
> See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
> out what to do about this
> 
> The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/
> 
> ------------------------------------------------------
> Subject: binfmt: introduce coredump parameter structure
> From: Masami Hiramatsu <mhiramat@redhat.com>
> 
> These patches are for fixing coredump mm->flags consistency issue.
> 
> ---
> 1787         if (mm->core_state || !get_dumpable(mm)) {  <- (1)
> 1788                 up_write(&mm->mmap_sem);
> 1789                 put_cred(cred);
> 1790                 goto fail;
> 1791         }
> 1792
> [...]
> 1798         if (get_dumpable(mm) == 2) {    /* Setuid core dump mode */ <-(2)
> 1799                 flag = O_EXCL;          /* Stop rewrite attacks */
> 1800                 cred->fsuid = 0;        /* Dump root private */
> 1801         }
> ---
> 
> Since dumpable bits are not protected by lock, there is a chance to change
> these bits between (1) and (2).
> 
> To solve this issue, this patch copies mm->flags to
> coredump_params.mm_flags at the beginning of do_coredump() and uses it
> instead of get_dumpable() while dumping core.  This series also introduce
> coredump parameter structure for simplify bimfmt->core_dump interface.
> 
> 
> 
> This patch:
> 
> Introduce coredump parameter data structure (struct coredump_params) for
> simplifying binfmt->core_dump() arguments.  This also cleanup DUMP_WRITE()
> in elf_core_dump() by style issue.


This patch break ia64 because arch specific ELF_CORE_EXTRA_PHDRS and 
ELF_CORE_WRITE_EXTRA_DATA still use DUMP_WRITE. I expect this patch 
break uml too.




^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: + binfmt-introduce-coredump-parameter-structure.patch added to -mm tree
  2009-11-26  8:53 ` KOSAKI Motohiro
@ 2009-11-26 15:50   ` Masami Hiramatsu
  2009-11-26 16:51     ` Oleg Nesterov
  0 siblings, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-11-26 15:50 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-kernel, dhowells, hidehiro.kawai.ez, lethal, mingo, oleg,
	roland, vapier, Takahiro Yasui

Hi Kosaki-san,

KOSAKI Motohiro wrote:
> Hi Hiramatsu-san,
> 
>>
>> The patch titled
>>      binfmt: introduce coredump parameter structure
>> has been added to the -mm tree.  Its filename is
>>      binfmt-introduce-coredump-parameter-structure.patch
>>
>> Before you just go and hit "reply", please:
>>    a) Consider who else should be cc'ed
>>    b) Prefer to cc a suitable mailing list as well
>>    c) Ideally: find the original patch on the mailing list and do a
>>       reply-to-all to that, adding suitable additional cc's
>>
>> *** Remember to use Documentation/SubmitChecklist when testing your code ***
>>
>> See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
>> out what to do about this
>>
>> The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/
>>
>> ------------------------------------------------------
>> Subject: binfmt: introduce coredump parameter structure
>> From: Masami Hiramatsu <mhiramat@redhat.com>
>>
>> These patches are for fixing coredump mm->flags consistency issue.
>>
>> ---
>> 1787         if (mm->core_state || !get_dumpable(mm)) {  <- (1)
>> 1788                 up_write(&mm->mmap_sem);
>> 1789                 put_cred(cred);
>> 1790                 goto fail;
>> 1791         }
>> 1792
>> [...]
>> 1798         if (get_dumpable(mm) == 2) {    /* Setuid core dump mode */ <-(2)
>> 1799                 flag = O_EXCL;          /* Stop rewrite attacks */
>> 1800                 cred->fsuid = 0;        /* Dump root private */
>> 1801         }
>> ---
>>
>> Since dumpable bits are not protected by lock, there is a chance to change
>> these bits between (1) and (2).
>>
>> To solve this issue, this patch copies mm->flags to
>> coredump_params.mm_flags at the beginning of do_coredump() and uses it
>> instead of get_dumpable() while dumping core.  This series also introduce
>> coredump parameter structure for simplify bimfmt->core_dump interface.
>>
>>
>>
>> This patch:
>>
>> Introduce coredump parameter data structure (struct coredump_params) for
>> simplifying binfmt->core_dump() arguments.  This also cleanup DUMP_WRITE()
>> in elf_core_dump() by style issue.
> 
> 
> This patch break ia64 because arch specific ELF_CORE_EXTRA_PHDRS and 
> ELF_CORE_WRITE_EXTRA_DATA still use DUMP_WRITE. I expect this patch 
> break uml too.

$ grep -r DUMP_WRITE arch/*/include
arch/ia64/include/asm/elf.h:            DUMP_WRITE(&phdr, sizeof(phdr));       \
arch/ia64/include/asm/elf.h:                    DUMP_WRITE((void *) gate_phdrs[\

Oops, certainly, that's a problem.
IMHO, we should not do like that, all parameter required by a macro should be
specified explicitly, since it reduces readability so much...
I think we'd better make those macros inline function, check it's return value
for error handling.

What would you think about it?

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: + binfmt-introduce-coredump-parameter-structure.patch added to -mm tree
  2009-11-26 15:50   ` Masami Hiramatsu
@ 2009-11-26 16:51     ` Oleg Nesterov
  2009-11-29  3:59       ` Masami Hiramatsu
                         ` (2 more replies)
  0 siblings, 3 replies; 28+ messages in thread
From: Oleg Nesterov @ 2009-11-26 16:51 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: KOSAKI Motohiro, linux-kernel, dhowells, hidehiro.kawai.ez,
	lethal, mingo, roland, vapier, Takahiro Yasui

On 11/26, Masami Hiramatsu wrote:
>
> $ grep -r DUMP_WRITE arch/*/include
> arch/ia64/include/asm/elf.h:            DUMP_WRITE(&phdr, sizeof(phdr));       \
> arch/ia64/include/asm/elf.h:                    DUMP_WRITE((void *) gate_phdrs[\

arch/um/sys-i386/asm/elf.h uses DUMP_WRITE() too.

> Oops, certainly, that's a problem.
> IMHO, we should not do like that, all parameter required by a macro should be
> specified explicitly, since it reduces readability so much...
> I think we'd better make those macros inline function, check it's return value
> for error handling.

Agreed, DUMP_WRITE() in its current form should die. Not only
it has implicit parameter, it does "goto" from the macro body
and it has multiple definitions withing the same file.

But perhaps this needs a separate patch? It is not trivial to kill
DUMP_WRITE(), you can fix this patch if you change DUMP_WRITE()
to use cprm->file instead of file.

Oleg.


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: + binfmt-introduce-coredump-parameter-structure.patch added to -mm tree
  2009-11-26 16:51     ` Oleg Nesterov
@ 2009-11-29  3:59       ` Masami Hiramatsu
  2009-11-29  4:39       ` [PATCH v2] mm: Introduce coredump parameter structure Masami Hiramatsu
  2009-11-29  4:41       ` Masami Hiramatsu
  2 siblings, 0 replies; 28+ messages in thread
From: Masami Hiramatsu @ 2009-11-29  3:59 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: KOSAKI Motohiro, linux-kernel, dhowells, hidehiro.kawai.ez,
	lethal, mingo, roland, vapier, Takahiro Yasui

Oleg Nesterov wrote:
> On 11/26, Masami Hiramatsu wrote:
>>
>> $ grep -r DUMP_WRITE arch/*/include
>> arch/ia64/include/asm/elf.h:            DUMP_WRITE(&phdr, sizeof(phdr));       \
>> arch/ia64/include/asm/elf.h:                    DUMP_WRITE((void *) gate_phdrs[\
>
> arch/um/sys-i386/asm/elf.h uses DUMP_WRITE() too.
>
>> Oops, certainly, that's a problem.
>> IMHO, we should not do like that, all parameter required by a macro should be
>> specified explicitly, since it reduces readability so much...
>> I think we'd better make those macros inline function, check it's return value
>> for error handling.
>
> Agreed, DUMP_WRITE() in its current form should die. Not only
> it has implicit parameter, it does "goto" from the macro body
> and it has multiple definitions withing the same file.
>
> But perhaps this needs a separate patch? It is not trivial to kill
> DUMP_WRITE(), you can fix this patch if you change DUMP_WRITE()
> to use cprm->file instead of file.

Hmm, certainly, that should be separated. So I just change DUMP_WRITE()
to use cprm->limit/file.

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v2] mm: Introduce coredump parameter structure
  2009-11-26 16:51     ` Oleg Nesterov
  2009-11-29  3:59       ` Masami Hiramatsu
@ 2009-11-29  4:39       ` Masami Hiramatsu
  2009-11-29  4:41       ` Masami Hiramatsu
  2 siblings, 0 replies; 28+ messages in thread
From: Masami Hiramatsu @ 2009-11-29  4:39 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: KOSAKI Motohiro, linux-kernel, dhowells, hidehiro.kawai.ez,
	lethal, mingo, roland, vapier, Takahiro Yasui

Introduce coredump parameter data structure (struct coredump_params)
for simplifying binfmt->core_dump() arguments.

Changes in v2:
  - Don't remove DUMP_WRITE() macro.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

Andrew, please replace the previous buggy patch with this version.

Thank you,

   fs/binfmt_aout.c        |   13 +++++++------
   fs/binfmt_elf.c         |   24 +++++++++++++-----------
   fs/binfmt_elf_fdpic.c   |   29 +++++++++++++++--------------
   fs/binfmt_flat.c        |    6 +++---
   fs/binfmt_som.c         |    2 +-
   fs/exec.c               |   38 +++++++++++++++++++++-----------------
   include/linux/binfmts.h |   10 +++++++++-
   7 files changed, 69 insertions(+), 53 deletions(-)


diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf..346b694 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@

   static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
   static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);

   static struct linux_binfmt aout_format = {
   	.module		= THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
    * dumping of the process results in another error..
    */

-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
   {
+	struct file *file = cprm->file;
   	mm_segment_t fs;
   	int has_dumped = 0;
   	unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
   	current->flags |= PF_DUMPCORE;
          	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
   	dump.u_ar0 = offsetof(struct user, regs);
-	dump.signal = signr;
-	aout_dump_thread(regs, &dump);
+	dump.signal = cprm->signr;
+	aout_dump_thread(cprm->regs, &dump);

   /* If the size of the dump file exceeds the rlimit, then see what would happen
      if we wrote the stack, but not the data area.  */
-	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
   		dump.u_dsize = 0;

   /* Make sure we have enough room to write the stack and data areas. */
-	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
   		dump.u_ssize = 0;

   /* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b9b3bb5..4ee5bb2 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
    * don't even try.
    */
   #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
   #else
   #define elf_core_dump	NULL
   #endif
@@ -1277,8 +1277,9 @@ static int writenote(struct memelfnote *men, struct file *file,
   }
   #undef DUMP_WRITE

-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
   		goto end_coredump;

   static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1906,7 +1907,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
    * and then they are actually written out.  If we run out of core limit
    * we just truncate.
    */
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
   {
   	int has_dumped = 0;
   	mm_segment_t fs;
@@ -1952,7 +1953,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
   	 * notes.  This also sets up the file header.
   	 */
   	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, signr, regs))
+			    &info, cprm->signr, cprm->regs))
   		goto cleanup;

   	has_dumped = 1;
@@ -2014,14 +2015,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
   #endif

    	/* write out the notes section */
-	if (!write_note_info(&info, file, &foffset))
+	if (!write_note_info(&info, cprm->file, &foffset))
   		goto end_coredump;

-	if (elf_coredump_extra_notes_write(file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
   		goto end_coredump;

   	/* Align to page */
-	if (!dump_seek(file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - foffset))
   		goto end_coredump;

   	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2038,12 +2039,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
   			page = get_dump_page(addr);
   			if (page) {
   				void *kaddr = kmap(page);
-				stop = ((size += PAGE_SIZE) > limit) ||
-					!dump_write(file, kaddr, PAGE_SIZE);
+				stop = ((size += PAGE_SIZE) > cprm->limit) ||
+					!dump_write(cprm->file, kaddr,
+						    PAGE_SIZE);
   				kunmap(page);
   				page_cache_release(page);
   			} else
-				stop = !dump_seek(file, PAGE_SIZE);
+				stop = !dump_seek(cprm->file, PAGE_SIZE);
   			if (stop)
   				goto end_coredump;
   		}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 38502c6..917e1b4 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
   					     struct file *, struct mm_struct *);

   #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
   #endif

   static struct linux_binfmt elf_fdpic_format = {
@@ -1325,8 +1325,9 @@ static int writenote(struct memelfnote *men, struct file *file)
   #undef DUMP_WRITE
   #undef DUMP_SEEK

-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
   		goto end_coredump;

   static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1581,8 +1582,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
    * and then they are actually written out.  If we run out of core limit
    * we just truncate.
    */
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
-			       struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
   {
   #define	NUM_NOTES	6
   	int has_dumped = 0;
@@ -1641,7 +1641,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
   		goto cleanup;
   #endif

-	if (signr) {
+	if (cprm->signr) {
   		struct core_thread *ct;
   		struct elf_thread_status *tmp;

@@ -1660,14 +1660,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
   			int sz;

   			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(signr, tmp);
+			sz = elf_dump_thread_status(cprm->signr, tmp);
   			thread_status_size += sz;
   		}
   	}

   	/* now collect the dump for the current */
-	fill_prstatus(prstatus, current, signr);
-	elf_core_copy_regs(&prstatus->pr_reg, regs);
+	fill_prstatus(prstatus, current, cprm->signr);
+	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);

   	segs = current->mm->map_count;
   #ifdef ELF_CORE_EXTRA_PHDRS
@@ -1702,7 +1702,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,

     	/* Try to dump the FPU. */
   	if ((prstatus->pr_fpvalid =
-	     elf_core_copy_task_fpregs(current, regs, fpu)))
+	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
   		fill_note(notes + numnote++,
   			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
   #ifdef ELF_CORE_COPY_XFPREGS
@@ -1773,7 +1773,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,

    	/* write out the notes section */
   	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, file))
+		if (!writenote(notes + i, cprm->file))
   			goto end_coredump;

   	/* write out the thread status notes section */
@@ -1782,14 +1782,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
   				list_entry(t, struct elf_thread_status, list);

   		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file))
+			if (!writenote(&tmp->notes[i], cprm->file))
   				goto end_coredump;
   	}

-	if (!dump_seek(file, dataoff))
+	if (!dump_seek(cprm->file, dataoff))
   		goto end_coredump;

-	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+				    mm_flags) < 0)
   		goto end_coredump;

   #ifdef ELF_CORE_WRITE_EXTRA_DATA
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a279665..d4a00ea 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
   #endif

   static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);

   static struct linux_binfmt flat_format = {
   	.module		= THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
    * Currently only a stub-function.
    */

-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
   {
   	printk("Process %s:%d received signr %d and should have core dumped\n",
-			current->comm, current->pid, (int) signr);
+			current->comm, current->pid, (int) cprm->signr);
   	return(1);
   }

diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9..2a9b533 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
    * don't even try.
    */
   #if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
   #else
   #define som_core_dump	NULL
   #endif
diff --git a/fs/exec.c b/fs/exec.c
index ba112bd..5daf7d5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1756,17 +1756,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
   	struct mm_struct *mm = current->mm;
   	struct linux_binfmt * binfmt;
   	struct inode * inode;
-	struct file * file;
   	const struct cred *old_cred;
   	struct cred *cred;
   	int retval = 0;
   	int flag = 0;
   	int ispipe = 0;
-	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
   	char **helper_argv = NULL;
   	int helper_argc = 0;
   	int dump_count = 0;
   	static atomic_t core_dump_count = ATOMIC_INIT(0);
+	struct coredump_params cprm = {
+		.signr = signr,
+		.regs = regs,
+		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+	};

   	audit_core_dumps(signr);

@@ -1822,15 +1825,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
   	ispipe = format_corename(corename, signr);
   	unlock_kernel();

-	if ((!ispipe) && (core_limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
   		goto fail_unlock;

    	if (ispipe) {
-		if (core_limit == 0) {
+		if (cprm.limit == 0) {
   			/*
   			 * Normally core limits are irrelevant to pipes, since
   			 * we're not writing to the file system, but we use
-			 * core_limit of 0 here as a speacial value. Any
+			 * cprm.limit of 0 here as a speacial value. Any
   			 * non-zero limit gets set to RLIM_INFINITY below, but
   			 * a limit of 0 skips the dump.  This is a consistent
   			 * way to catch recursive crashes.  We can still crash
@@ -1863,25 +1866,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
   			goto fail_dropcount;
   		}

-		core_limit = RLIM_INFINITY;
+		cprm.limit = RLIM_INFINITY;

   		/* SIGPIPE can happen, but it's just never processed */
   		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&file)) {
+				&cprm.file)) {
    			printk(KERN_INFO "Core dump to %s pipe failed\n",
   			       corename);
   			goto fail_dropcount;
    		}
    	} else
- 		file = filp_open(corename,
+		cprm.file = filp_open(corename,
   				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
   				 0600);
-	if (IS_ERR(file))
+	if (IS_ERR(cprm.file))
   		goto fail_dropcount;
-	inode = file->f_path.dentry->d_inode;
+	inode = cprm.file->f_path.dentry->d_inode;
   	if (inode->i_nlink > 1)
   		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(file->f_path.dentry))
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
   		goto close_fail;

   	/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1894,21 +1897,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
   	 */
   	if (inode->i_uid != current_fsuid())
   		goto close_fail;
-	if (!file->f_op)
+	if (!cprm.file->f_op)
   		goto close_fail;
-	if (!file->f_op->write)
+	if (!cprm.file->f_op->write)
   		goto close_fail;
-	if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+	if (!ispipe &&
+	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
   		goto close_fail;

-	retval = binfmt->core_dump(signr, regs, file, core_limit);
+	retval = binfmt->core_dump(&cprm);

   	if (retval)
   		current->signal->group_exit_code |= 0x80;
   close_fail:
   	if (ispipe && core_pipe_limit)
-		wait_for_dump_helpers(file);
-	filp_close(file, NULL);
+		wait_for_dump_helpers(cprm.file);
+	filp_close(cprm.file, NULL);
   fail_dropcount:
   	if (dump_count)
   		atomic_dec(&core_dump_count);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index aece486..cd4349b 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -68,6 +68,14 @@ struct linux_binprm{

   #define BINPRM_MAX_RECURSION 4

+/* Function parameter for binfmt->coredump */
+struct coredump_params {
+	long signr;
+	struct pt_regs *regs;
+	struct file *file;
+	unsigned long limit;
+};
+
   /*
    * This structure defines the functions that are used to load the binary formats that
    * linux accepts.
@@ -77,7 +85,7 @@ struct linux_binfmt {
   	struct module *module;
   	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
   	int (*load_shlib)(struct file *);
-	int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+	int (*core_dump)(struct coredump_params *cprm);
   	unsigned long min_coredump;	/* minimal dump size */
   	int hasvdso;
   };
-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v2] mm: Introduce coredump parameter structure
  2009-11-26 16:51     ` Oleg Nesterov
  2009-11-29  3:59       ` Masami Hiramatsu
  2009-11-29  4:39       ` [PATCH v2] mm: Introduce coredump parameter structure Masami Hiramatsu
@ 2009-11-29  4:41       ` Masami Hiramatsu
  2009-11-29 15:10         ` [PATCH][RFC] tracepoint: signal coredump (Re: [PATCH v2] mm: Introduce coredump parameter structure) Masami Hiramatsu
  2009-12-02  0:18         ` [PATCH v2] mm: Introduce coredump parameter structure Andrew Morton
  2 siblings, 2 replies; 28+ messages in thread
From: Masami Hiramatsu @ 2009-11-29  4:41 UTC (permalink / raw)
  To: Oleg Nesterov, Andrew Morton
  Cc: KOSAKI Motohiro, linux-kernel, dhowells, hidehiro.kawai.ez,
	lethal, mingo, roland, vapier, Takahiro Yasui

Introduce coredump parameter data structure (struct coredump_params)
for simplifying binfmt->core_dump() arguments.

Changes in v2:
   - Don't remove DUMP_WRITE() macro.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

Andrew, please replace the previous buggy patch with this version.

Thank you,

    fs/binfmt_aout.c        |   13 +++++++------
    fs/binfmt_elf.c         |   24 +++++++++++++-----------
    fs/binfmt_elf_fdpic.c   |   29 +++++++++++++++--------------
    fs/binfmt_flat.c        |    6 +++---
    fs/binfmt_som.c         |    2 +-
    fs/exec.c               |   38 +++++++++++++++++++++-----------------
    include/linux/binfmts.h |   10 +++++++++-
    7 files changed, 69 insertions(+), 53 deletions(-)


diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf..346b694 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@

    static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
    static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);

    static struct linux_binfmt aout_format = {
    	.module		= THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
     * dumping of the process results in another error..
     */

-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
    {
+	struct file *file = cprm->file;
    	mm_segment_t fs;
    	int has_dumped = 0;
    	unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
    	current->flags |= PF_DUMPCORE;
           	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
    	dump.u_ar0 = offsetof(struct user, regs);
-	dump.signal = signr;
-	aout_dump_thread(regs, &dump);
+	dump.signal = cprm->signr;
+	aout_dump_thread(cprm->regs, &dump);

    /* If the size of the dump file exceeds the rlimit, then see what would happen
       if we wrote the stack, but not the data area.  */
-	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
    		dump.u_dsize = 0;

    /* Make sure we have enough room to write the stack and data areas. */
-	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
    		dump.u_ssize = 0;

    /* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b9b3bb5..4ee5bb2 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
     * don't even try.
     */
    #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
    #else
    #define elf_core_dump	NULL
    #endif
@@ -1277,8 +1277,9 @@ static int writenote(struct memelfnote *men, struct file *file,
    }
    #undef DUMP_WRITE

-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
    		goto end_coredump;

    static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1906,7 +1907,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
     * and then they are actually written out.  If we run out of core limit
     * we just truncate.
     */
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
    {
    	int has_dumped = 0;
    	mm_segment_t fs;
@@ -1952,7 +1953,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
    	 * notes.  This also sets up the file header.
    	 */
    	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, signr, regs))
+			    &info, cprm->signr, cprm->regs))
    		goto cleanup;

    	has_dumped = 1;
@@ -2014,14 +2015,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
    #endif

     	/* write out the notes section */
-	if (!write_note_info(&info, file, &foffset))
+	if (!write_note_info(&info, cprm->file, &foffset))
    		goto end_coredump;

-	if (elf_coredump_extra_notes_write(file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
    		goto end_coredump;

    	/* Align to page */
-	if (!dump_seek(file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - foffset))
    		goto end_coredump;

    	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2038,12 +2039,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
    			page = get_dump_page(addr);
    			if (page) {
    				void *kaddr = kmap(page);
-				stop = ((size += PAGE_SIZE) > limit) ||
-					!dump_write(file, kaddr, PAGE_SIZE);
+				stop = ((size += PAGE_SIZE) > cprm->limit) ||
+					!dump_write(cprm->file, kaddr,
+						    PAGE_SIZE);
    				kunmap(page);
    				page_cache_release(page);
    			} else
-				stop = !dump_seek(file, PAGE_SIZE);
+				stop = !dump_seek(cprm->file, PAGE_SIZE);
    			if (stop)
    				goto end_coredump;
    		}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 38502c6..917e1b4 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
    					     struct file *, struct mm_struct *);

    #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
    #endif

    static struct linux_binfmt elf_fdpic_format = {
@@ -1325,8 +1325,9 @@ static int writenote(struct memelfnote *men, struct file *file)
    #undef DUMP_WRITE
    #undef DUMP_SEEK

-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
    		goto end_coredump;

    static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1581,8 +1582,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
     * and then they are actually written out.  If we run out of core limit
     * we just truncate.
     */
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
-			       struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
    {
    #define	NUM_NOTES	6
    	int has_dumped = 0;
@@ -1641,7 +1641,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
    		goto cleanup;
    #endif

-	if (signr) {
+	if (cprm->signr) {
    		struct core_thread *ct;
    		struct elf_thread_status *tmp;

@@ -1660,14 +1660,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
    			int sz;

    			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(signr, tmp);
+			sz = elf_dump_thread_status(cprm->signr, tmp);
    			thread_status_size += sz;
    		}
    	}

    	/* now collect the dump for the current */
-	fill_prstatus(prstatus, current, signr);
-	elf_core_copy_regs(&prstatus->pr_reg, regs);
+	fill_prstatus(prstatus, current, cprm->signr);
+	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);

    	segs = current->mm->map_count;
    #ifdef ELF_CORE_EXTRA_PHDRS
@@ -1702,7 +1702,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,

      	/* Try to dump the FPU. */
    	if ((prstatus->pr_fpvalid =
-	     elf_core_copy_task_fpregs(current, regs, fpu)))
+	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
    		fill_note(notes + numnote++,
    			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
    #ifdef ELF_CORE_COPY_XFPREGS
@@ -1773,7 +1773,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,

     	/* write out the notes section */
    	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, file))
+		if (!writenote(notes + i, cprm->file))
    			goto end_coredump;

    	/* write out the thread status notes section */
@@ -1782,14 +1782,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
    				list_entry(t, struct elf_thread_status, list);

    		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file))
+			if (!writenote(&tmp->notes[i], cprm->file))
    				goto end_coredump;
    	}

-	if (!dump_seek(file, dataoff))
+	if (!dump_seek(cprm->file, dataoff))
    		goto end_coredump;

-	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+				    mm_flags) < 0)
    		goto end_coredump;

    #ifdef ELF_CORE_WRITE_EXTRA_DATA
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a279665..d4a00ea 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
    #endif

    static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);

    static struct linux_binfmt flat_format = {
    	.module		= THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
     * Currently only a stub-function.
     */

-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
    {
    	printk("Process %s:%d received signr %d and should have core dumped\n",
-			current->comm, current->pid, (int) signr);
+			current->comm, current->pid, (int) cprm->signr);
    	return(1);
    }

diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9..2a9b533 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
     * don't even try.
     */
    #if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
    #else
    #define som_core_dump	NULL
    #endif
diff --git a/fs/exec.c b/fs/exec.c
index ba112bd..5daf7d5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1756,17 +1756,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
    	struct mm_struct *mm = current->mm;
    	struct linux_binfmt * binfmt;
    	struct inode * inode;
-	struct file * file;
    	const struct cred *old_cred;
    	struct cred *cred;
    	int retval = 0;
    	int flag = 0;
    	int ispipe = 0;
-	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
    	char **helper_argv = NULL;
    	int helper_argc = 0;
    	int dump_count = 0;
    	static atomic_t core_dump_count = ATOMIC_INIT(0);
+	struct coredump_params cprm = {
+		.signr = signr,
+		.regs = regs,
+		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+	};

    	audit_core_dumps(signr);

@@ -1822,15 +1825,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
    	ispipe = format_corename(corename, signr);
    	unlock_kernel();

-	if ((!ispipe) && (core_limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
    		goto fail_unlock;

     	if (ispipe) {
-		if (core_limit == 0) {
+		if (cprm.limit == 0) {
    			/*
    			 * Normally core limits are irrelevant to pipes, since
    			 * we're not writing to the file system, but we use
-			 * core_limit of 0 here as a speacial value. Any
+			 * cprm.limit of 0 here as a speacial value. Any
    			 * non-zero limit gets set to RLIM_INFINITY below, but
    			 * a limit of 0 skips the dump.  This is a consistent
    			 * way to catch recursive crashes.  We can still crash
@@ -1863,25 +1866,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
    			goto fail_dropcount;
    		}

-		core_limit = RLIM_INFINITY;
+		cprm.limit = RLIM_INFINITY;

    		/* SIGPIPE can happen, but it's just never processed */
    		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&file)) {
+				&cprm.file)) {
     			printk(KERN_INFO "Core dump to %s pipe failed\n",
    			       corename);
    			goto fail_dropcount;
     		}
     	} else
- 		file = filp_open(corename,
+		cprm.file = filp_open(corename,
    				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
    				 0600);
-	if (IS_ERR(file))
+	if (IS_ERR(cprm.file))
    		goto fail_dropcount;
-	inode = file->f_path.dentry->d_inode;
+	inode = cprm.file->f_path.dentry->d_inode;
    	if (inode->i_nlink > 1)
    		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(file->f_path.dentry))
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
    		goto close_fail;

    	/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1894,21 +1897,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
    	 */
    	if (inode->i_uid != current_fsuid())
    		goto close_fail;
-	if (!file->f_op)
+	if (!cprm.file->f_op)
    		goto close_fail;
-	if (!file->f_op->write)
+	if (!cprm.file->f_op->write)
    		goto close_fail;
-	if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+	if (!ispipe &&
+	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
    		goto close_fail;

-	retval = binfmt->core_dump(signr, regs, file, core_limit);
+	retval = binfmt->core_dump(&cprm);

    	if (retval)
    		current->signal->group_exit_code |= 0x80;
    close_fail:
    	if (ispipe && core_pipe_limit)
-		wait_for_dump_helpers(file);
-	filp_close(file, NULL);
+		wait_for_dump_helpers(cprm.file);
+	filp_close(cprm.file, NULL);
    fail_dropcount:
    	if (dump_count)
    		atomic_dec(&core_dump_count);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index aece486..cd4349b 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -68,6 +68,14 @@ struct linux_binprm{

    #define BINPRM_MAX_RECURSION 4

+/* Function parameter for binfmt->coredump */
+struct coredump_params {
+	long signr;
+	struct pt_regs *regs;
+	struct file *file;
+	unsigned long limit;
+};
+
    /*
     * This structure defines the functions that are used to load the binary formats that
     * linux accepts.
@@ -77,7 +85,7 @@ struct linux_binfmt {
    	struct module *module;
    	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
    	int (*load_shlib)(struct file *);
-	int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+	int (*core_dump)(struct coredump_params *cprm);
    	unsigned long min_coredump;	/* minimal dump size */
    	int hasvdso;
    };
-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH][RFC] tracepoint: signal coredump (Re: [PATCH v2] mm: Introduce coredump parameter structure)
  2009-11-29  4:41       ` Masami Hiramatsu
@ 2009-11-29 15:10         ` Masami Hiramatsu
  2009-12-02 20:46           ` [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint Masami Hiramatsu
  2009-12-02  0:18         ` [PATCH v2] mm: Introduce coredump parameter structure Andrew Morton
  1 sibling, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-11-29 15:10 UTC (permalink / raw)
  To: Andrew Morton, mingo
  Cc: Oleg Nesterov, KOSAKI Motohiro, LKML, dhowells,
	hidehiro.kawai.ez, lethal, roland, vapier, Takahiro Yasui

[-- Attachment #1: Type: text/plain, Size: 16847 bytes --]

Hi Ingo and Andrew,

By the way, I'd like to push signal_coredump tracepoint (which I attached)
on these patches, and it also depends on signal-tracepoint patch series
which already picked up to -tip tree.

So, in this case, is it possible to pick these patches (coredump parameter
and mm->flags consistency fix (http://patchwork.kernel.org/patch/60917/))
to -tip tree too?

Thank you,

Masami Hiramatsu wrote:
> Introduce coredump parameter data structure (struct coredump_params)
> for simplifying binfmt->core_dump() arguments.
>
> Changes in v2:
>     - Don't remove DUMP_WRITE() macro.
>
> Signed-off-by: Masami Hiramatsu<mhiramat@redhat.com>
> Suggested-by: Ingo Molnar<mingo@elte.hu>
> Cc: Hidehiro Kawai<hidehiro.kawai.ez@hitachi.com>
> Cc: Andrew Morton<akpm@linux-foundation.org>
> Cc: Oleg Nesterov<oleg@redhat.com>
> Cc: Roland McGrath<roland@redhat.com>
> Cc: KOSAKI Motohiro<kosaki.motohiro@jp.fujitsu.com>
> ---
>
> Andrew, please replace the previous buggy patch with this version.
>
> Thank you,
>
>      fs/binfmt_aout.c        |   13 +++++++------
>      fs/binfmt_elf.c         |   24 +++++++++++++-----------
>      fs/binfmt_elf_fdpic.c   |   29 +++++++++++++++--------------
>      fs/binfmt_flat.c        |    6 +++---
>      fs/binfmt_som.c         |    2 +-
>      fs/exec.c               |   38 +++++++++++++++++++++-----------------
>      include/linux/binfmts.h |   10 +++++++++-
>      7 files changed, 69 insertions(+), 53 deletions(-)
>
>
> diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
> index b639dcf..346b694 100644
> --- a/fs/binfmt_aout.c
> +++ b/fs/binfmt_aout.c
> @@ -32,7 +32,7 @@
>
>      static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
>      static int load_aout_library(struct file*);
> -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
> +static int aout_core_dump(struct coredump_params *cprm);
>
>      static struct linux_binfmt aout_format = {
>      	.module		= THIS_MODULE,
> @@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
>       * dumping of the process results in another error..
>       */
>
> -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
> +static int aout_core_dump(struct coredump_params *cprm)
>      {
> +	struct file *file = cprm->file;
>      	mm_segment_t fs;
>      	int has_dumped = 0;
>      	unsigned long dump_start, dump_size;
> @@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
>      	current->flags |= PF_DUMPCORE;
>             	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
>      	dump.u_ar0 = offsetof(struct user, regs);
> -	dump.signal = signr;
> -	aout_dump_thread(regs,&dump);
> +	dump.signal = cprm->signr;
> +	aout_dump_thread(cprm->regs,&dump);
>
>      /* If the size of the dump file exceeds the rlimit, then see what would happen
>         if we wrote the stack, but not the data area.  */
> -	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE>  limit)
> +	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE>  cprm->limit)
>      		dump.u_dsize = 0;
>
>      /* Make sure we have enough room to write the stack and data areas. */
> -	if ((dump.u_ssize + 1) * PAGE_SIZE>  limit)
> +	if ((dump.u_ssize + 1) * PAGE_SIZE>  cprm->limit)
>      		dump.u_ssize = 0;
>
>      /* make sure we actually have a data and stack area to dump */
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index b9b3bb5..4ee5bb2 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
>       * don't even try.
>       */
>      #if defined(USE_ELF_CORE_DUMP)&&  defined(CONFIG_ELF_CORE)
> -static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
> +static int elf_core_dump(struct coredump_params *cprm);
>      #else
>      #define elf_core_dump	NULL
>      #endif
> @@ -1277,8 +1277,9 @@ static int writenote(struct memelfnote *men, struct file *file,
>      }
>      #undef DUMP_WRITE
>
> -#define DUMP_WRITE(addr, nr)	\
> -	if ((size += (nr))>  limit || !dump_write(file, (addr), (nr))) \
> +#define DUMP_WRITE(addr, nr)				\
> +	if ((size += (nr))>  cprm->limit ||		\
> +	    !dump_write(cprm->file, (addr), (nr)))	\
>      		goto end_coredump;
>
>      static void fill_elf_header(struct elfhdr *elf, int segs,
> @@ -1906,7 +1907,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
>       * and then they are actually written out.  If we run out of core limit
>       * we just truncate.
>       */
> -static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
> +static int elf_core_dump(struct coredump_params *cprm)
>      {
>      	int has_dumped = 0;
>      	mm_segment_t fs;
> @@ -1952,7 +1953,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
>      	 * notes.  This also sets up the file header.
>      	 */
>      	if (!fill_note_info(elf, segs + 1, /* including notes section */
> -			&info, signr, regs))
> +			&info, cprm->signr, cprm->regs))
>      		goto cleanup;
>
>      	has_dumped = 1;
> @@ -2014,14 +2015,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
>      #endif
>
>       	/* write out the notes section */
> -	if (!write_note_info(&info, file,&foffset))
> +	if (!write_note_info(&info, cprm->file,&foffset))
>      		goto end_coredump;
>
> -	if (elf_coredump_extra_notes_write(file,&foffset))
> +	if (elf_coredump_extra_notes_write(cprm->file,&foffset))
>      		goto end_coredump;
>
>      	/* Align to page */
> -	if (!dump_seek(file, dataoff - foffset))
> +	if (!dump_seek(cprm->file, dataoff - foffset))
>      		goto end_coredump;
>
>      	for (vma = first_vma(current, gate_vma); vma != NULL;
> @@ -2038,12 +2039,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
>      			page = get_dump_page(addr);
>      			if (page) {
>      				void *kaddr = kmap(page);
> -				stop = ((size += PAGE_SIZE)>  limit) ||
> -					!dump_write(file, kaddr, PAGE_SIZE);
> +				stop = ((size += PAGE_SIZE)>  cprm->limit) ||
> +					!dump_write(cprm->file, kaddr,
> +						    PAGE_SIZE);
>      				kunmap(page);
>      				page_cache_release(page);
>      			} else
> -				stop = !dump_seek(file, PAGE_SIZE);
> +				stop = !dump_seek(cprm->file, PAGE_SIZE);
>      			if (stop)
>      				goto end_coredump;
>      		}
> diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
> index 38502c6..917e1b4 100644
> --- a/fs/binfmt_elf_fdpic.c
> +++ b/fs/binfmt_elf_fdpic.c
> @@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
>      					     struct file *, struct mm_struct *);
>
>      #if defined(USE_ELF_CORE_DUMP)&&  defined(CONFIG_ELF_CORE)
> -static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
> +static int elf_fdpic_core_dump(struct coredump_params *cprm);
>      #endif
>
>      static struct linux_binfmt elf_fdpic_format = {
> @@ -1325,8 +1325,9 @@ static int writenote(struct memelfnote *men, struct file *file)
>      #undef DUMP_WRITE
>      #undef DUMP_SEEK
>
> -#define DUMP_WRITE(addr, nr)	\
> -	if ((size += (nr))>  limit || !dump_write(file, (addr), (nr))) \
> +#define DUMP_WRITE(addr, nr)				\
> +	if ((size += (nr))>  cprm->limit ||		\
> +	    !dump_write(cprm->file, (addr), (nr)))	\
>      		goto end_coredump;
>
>      static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
> @@ -1581,8 +1582,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
>       * and then they are actually written out.  If we run out of core limit
>       * we just truncate.
>       */
> -static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
> -			       struct file *file, unsigned long limit)
> +static int elf_fdpic_core_dump(struct coredump_params *cprm)
>      {
>      #define	NUM_NOTES	6
>      	int has_dumped = 0;
> @@ -1641,7 +1641,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
>      		goto cleanup;
>      #endif
>
> -	if (signr) {
> +	if (cprm->signr) {
>      		struct core_thread *ct;
>      		struct elf_thread_status *tmp;
>
> @@ -1660,14 +1660,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
>      			int sz;
>
>      			tmp = list_entry(t, struct elf_thread_status, list);
> -			sz = elf_dump_thread_status(signr, tmp);
> +			sz = elf_dump_thread_status(cprm->signr, tmp);
>      			thread_status_size += sz;
>      		}
>      	}
>
>      	/* now collect the dump for the current */
> -	fill_prstatus(prstatus, current, signr);
> -	elf_core_copy_regs(&prstatus->pr_reg, regs);
> +	fill_prstatus(prstatus, current, cprm->signr);
> +	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
>
>      	segs = current->mm->map_count;
>      #ifdef ELF_CORE_EXTRA_PHDRS
> @@ -1702,7 +1702,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
>
>        	/* Try to dump the FPU. */
>      	if ((prstatus->pr_fpvalid =
> -	     elf_core_copy_task_fpregs(current, regs, fpu)))
> +	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
>      		fill_note(notes + numnote++,
>      			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
>      #ifdef ELF_CORE_COPY_XFPREGS
> @@ -1773,7 +1773,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
>
>       	/* write out the notes section */
>      	for (i = 0; i<  numnote; i++)
> -		if (!writenote(notes + i, file))
> +		if (!writenote(notes + i, cprm->file))
>      			goto end_coredump;
>
>      	/* write out the thread status notes section */
> @@ -1782,14 +1782,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
>      				list_entry(t, struct elf_thread_status, list);
>
>      		for (i = 0; i<  tmp->num_notes; i++)
> -			if (!writenote(&tmp->notes[i], file))
> +			if (!writenote(&tmp->notes[i], cprm->file))
>      				goto end_coredump;
>      	}
>
> -	if (!dump_seek(file, dataoff))
> +	if (!dump_seek(cprm->file, dataoff))
>      		goto end_coredump;
>
> -	if (elf_fdpic_dump_segments(file,&size,&limit, mm_flags)<  0)
> +	if (elf_fdpic_dump_segments(cprm->file,&size,&cprm->limit,
> +				    mm_flags)<  0)
>      		goto end_coredump;
>
>      #ifdef ELF_CORE_WRITE_EXTRA_DATA
> diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
> index a279665..d4a00ea 100644
> --- a/fs/binfmt_flat.c
> +++ b/fs/binfmt_flat.c
> @@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
>      #endif
>
>      static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
> -static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
> +static int flat_core_dump(struct coredump_params *cprm);
>
>      static struct linux_binfmt flat_format = {
>      	.module		= THIS_MODULE,
> @@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
>       * Currently only a stub-function.
>       */
>
> -static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
> +static int flat_core_dump(struct coredump_params *cprm)
>      {
>      	printk("Process %s:%d received signr %d and should have core dumped\n",
> -			current->comm, current->pid, (int) signr);
> +			current->comm, current->pid, (int) cprm->signr);
>      	return(1);
>      }
>
> diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
> index eff74b9..2a9b533 100644
> --- a/fs/binfmt_som.c
> +++ b/fs/binfmt_som.c
> @@ -43,7 +43,7 @@ static int load_som_library(struct file *);
>       * don't even try.
>       */
>      #if 0
> -static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
> +static int som_core_dump(struct coredump_params *cprm);
>      #else
>      #define som_core_dump	NULL
>      #endif
> diff --git a/fs/exec.c b/fs/exec.c
> index ba112bd..5daf7d5 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1756,17 +1756,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
>      	struct mm_struct *mm = current->mm;
>      	struct linux_binfmt * binfmt;
>      	struct inode * inode;
> -	struct file * file;
>      	const struct cred *old_cred;
>      	struct cred *cred;
>      	int retval = 0;
>      	int flag = 0;
>      	int ispipe = 0;
> -	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
>      	char **helper_argv = NULL;
>      	int helper_argc = 0;
>      	int dump_count = 0;
>      	static atomic_t core_dump_count = ATOMIC_INIT(0);
> +	struct coredump_params cprm = {
> +		.signr = signr,
> +		.regs = regs,
> +		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
> +	};
>
>      	audit_core_dumps(signr);
>
> @@ -1822,15 +1825,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
>      	ispipe = format_corename(corename, signr);
>      	unlock_kernel();
>
> -	if ((!ispipe)&&  (core_limit<  binfmt->min_coredump))
> +	if ((!ispipe)&&  (cprm.limit<  binfmt->min_coredump))
>      		goto fail_unlock;
>
>       	if (ispipe) {
> -		if (core_limit == 0) {
> +		if (cprm.limit == 0) {
>      			/*
>      			 * Normally core limits are irrelevant to pipes, since
>      			 * we're not writing to the file system, but we use
> -			 * core_limit of 0 here as a speacial value. Any
> +			 * cprm.limit of 0 here as a speacial value. Any
>      			 * non-zero limit gets set to RLIM_INFINITY below, but
>      			 * a limit of 0 skips the dump.  This is a consistent
>      			 * way to catch recursive crashes.  We can still crash
> @@ -1863,25 +1866,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
>      			goto fail_dropcount;
>      		}
>
> -		core_limit = RLIM_INFINITY;
> +		cprm.limit = RLIM_INFINITY;
>
>      		/* SIGPIPE can happen, but it's just never processed */
>      		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
> -				&file)) {
> +				&cprm.file)) {
>       			printk(KERN_INFO "Core dump to %s pipe failed\n",
>      			       corename);
>      			goto fail_dropcount;
>       		}
>       	} else
> - 		file = filp_open(corename,
> +		cprm.file = filp_open(corename,
>      				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
>      				 0600);
> -	if (IS_ERR(file))
> +	if (IS_ERR(cprm.file))
>      		goto fail_dropcount;
> -	inode = file->f_path.dentry->d_inode;
> +	inode = cprm.file->f_path.dentry->d_inode;
>      	if (inode->i_nlink>  1)
>      		goto close_fail;	/* multiple links - don't dump */
> -	if (!ispipe&&  d_unhashed(file->f_path.dentry))
> +	if (!ispipe&&  d_unhashed(cprm.file->f_path.dentry))
>      		goto close_fail;
>
>      	/* AK: actually i see no reason to not allow this for named pipes etc.,
> @@ -1894,21 +1897,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
>      	 */
>      	if (inode->i_uid != current_fsuid())
>      		goto close_fail;
> -	if (!file->f_op)
> +	if (!cprm.file->f_op)
>      		goto close_fail;
> -	if (!file->f_op->write)
> +	if (!cprm.file->f_op->write)
>      		goto close_fail;
> -	if (!ispipe&&  do_truncate(file->f_path.dentry, 0, 0, file) != 0)
> +	if (!ispipe&&
> +	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
>      		goto close_fail;
>
> -	retval = binfmt->core_dump(signr, regs, file, core_limit);
> +	retval = binfmt->core_dump(&cprm);
>
>      	if (retval)
>      		current->signal->group_exit_code |= 0x80;
>      close_fail:
>      	if (ispipe&&  core_pipe_limit)
> -		wait_for_dump_helpers(file);
> -	filp_close(file, NULL);
> +		wait_for_dump_helpers(cprm.file);
> +	filp_close(cprm.file, NULL);
>      fail_dropcount:
>      	if (dump_count)
>      		atomic_dec(&core_dump_count);
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index aece486..cd4349b 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -68,6 +68,14 @@ struct linux_binprm{
>
>      #define BINPRM_MAX_RECURSION 4
>
> +/* Function parameter for binfmt->coredump */
> +struct coredump_params {
> +	long signr;
> +	struct pt_regs *regs;
> +	struct file *file;
> +	unsigned long limit;
> +};
> +
>      /*
>       * This structure defines the functions that are used to load the binary formats that
>       * linux accepts.
> @@ -77,7 +85,7 @@ struct linux_binfmt {
>      	struct module *module;
>      	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
>      	int (*load_shlib)(struct file *);
> -	int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
> +	int (*core_dump)(struct coredump_params *cprm);
>      	unsigned long min_coredump;	/* minimal dump size */
>      	int hasvdso;
>      };

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


[-- Attachment #2: tp-signal-coredump.patch --]
[-- Type: text/plain, Size: 4955 bytes --]

tracepoint: Add signal coredump tracepoint

From: Masami Hiramatsu <mhiramat@redhat.com>

Add signal coredump tracepoint which shows signal number,
mm->flags, limits, pointer to file structure and core
file name.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
---

 fs/exec.c                     |   22 +++++++++++----------
 include/trace/events/signal.h |   43 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 10 deletions(-)


diff --git a/fs/exec.c b/fs/exec.c
index 2ec6973..56f512a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <trace/events/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1772,6 +1773,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.signr = signr,
+		.file = NULL,
 		.regs = regs,
 		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
 		/*
@@ -1837,9 +1839,6 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
-		goto fail_unlock;
-
  	if (ispipe) {
 		if (cprm.limit == 0) {
 			/*
@@ -1860,7 +1859,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 				"Process %d(%s) has RLIMIT_CORE set to 0\n",
 				task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Aborting core\n");
-			goto fail_unlock;
+			goto end_open;
 		}
 
 		dump_count = atomic_inc_return(&core_dump_count);
@@ -1868,14 +1867,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
 			       task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Skipping core dump\n");
-			goto fail_dropcount;
+			goto end_open;
 		}
 
 		helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
 		if (!helper_argv) {
 			printk(KERN_WARNING "%s failed to allocate memory\n",
 			       __func__);
-			goto fail_dropcount;
+			goto end_open;
 		}
 
 		cprm.limit = RLIM_INFINITY;
@@ -1885,13 +1884,17 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 				&cprm.file)) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
-			goto fail_dropcount;
+			goto end_open;
  		}
- 	} else
+	} else if (cprm.limit >= binfmt->min_coredump)
 		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(cprm.file))
+
+end_open:
+	trace_signal_coredump(&cprm, corename);
+
+	if (!cprm.file || IS_ERR(cprm.file))
 		goto fail_dropcount;
 	inode = cprm.file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
@@ -1928,7 +1931,6 @@ close_fail:
 fail_dropcount:
 	if (dump_count)
 		atomic_dec(&core_dump_count);
-fail_unlock:
 	if (helper_argv)
 		argv_free(helper_argv);
 
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a510b75..7feba6e 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -6,6 +6,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/binfmts.h>
 #include <linux/tracepoint.h>
 
 #define TP_STORE_SIGINFO(__entry, info)				\
@@ -167,6 +168,48 @@ TRACE_EVENT(signal_lose_info,
 	TP_printk("sig=%d group=%d errno=%d code=%d",
 		  __entry->sig, __entry->group, __entry->errno, __entry->code)
 );
+
+/**
+ * signal_coredump - called when dumping core by signal
+ * @cprm: pointer to struct coredump_params
+ * @core_name: core-name string
+ *
+ * Current process dumps core file to 'core_name' file, because 'cprm->signr'
+ * signal is delivered.
+ * 'cprm->file' is a pointer to file structure of core file, if it is NULL
+ * or an error number(IS_ERR(cprm->file)), coredump should be failed.
+ */
+TRACE_EVENT(signal_coredump,
+
+	TP_PROTO(struct coredump_params *cprm, const char *core_name),
+
+	TP_ARGS(cprm, core_name),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	unsigned long,	limit		)
+		__field(	unsigned long,	flags		)
+		__field(	unsigned long,	file		)
+		__string(	name,		core_name	)
+	),
+
+
+	TP_fast_assign(
+		__entry->sig	= (int)cprm->signr;
+		__entry->limit	= cprm->limit;
+		__entry->flags	= cprm->mm_flags;
+		__entry->file	= (unsigned long)cprm->file;
+		__assign_str(name,	core_name);
+	),
+
+	TP_printk("sig=%d limit=%lu dumpable=%lx dump_filter=%lx "
+		  "file=%lx corename=%s",
+		  __entry->sig, __entry->limit,
+		  __entry->flags & MMF_DUMPABLE_MASK,
+		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
+		  MMF_DUMP_FILTER_SHIFT,
+		  __entry->file, __get_str(name))
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] mm: Introduce coredump parameter structure
  2009-11-29  4:41       ` Masami Hiramatsu
  2009-11-29 15:10         ` [PATCH][RFC] tracepoint: signal coredump (Re: [PATCH v2] mm: Introduce coredump parameter structure) Masami Hiramatsu
@ 2009-12-02  0:18         ` Andrew Morton
  2009-12-02  0:27           ` KOSAKI Motohiro
                             ` (2 more replies)
  1 sibling, 3 replies; 28+ messages in thread
From: Andrew Morton @ 2009-12-02  0:18 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Oleg Nesterov, KOSAKI Motohiro, linux-kernel, dhowells,
	hidehiro.kawai.ez, lethal, mingo, roland, vapier, Takahiro Yasui

On Sat, 28 Nov 2009 23:41:19 -0500
Masami Hiramatsu <mhiramat@redhat.com> wrote:

> Introduce coredump parameter data structure (struct coredump_params)
> for simplifying binfmt->core_dump() arguments.
> 
> Changes in v2:
>    - Don't remove DUMP_WRITE() macro.

What is the reason for this change?

Please always include both the "what" and the "why" in changelog text.

> diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
> index b639dcf..346b694 100644
> --- a/fs/binfmt_aout.c
> +++ b/fs/binfmt_aout.c
> @@ -32,7 +32,7 @@
> 
>     static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
>     static int load_aout_library(struct file*);
> -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
> +static int aout_core_dump(struct coredump_params *cprm);
> 
>     static struct linux_binfmt aout_format = {
>     	.module		= THIS_MODULE,
> @@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
>      * dumping of the process results in another error..
>      */
> 
> -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
> +static int aout_core_dump(struct coredump_params *cprm)
>     {

Something horrid has mangled your patch.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] mm: Introduce coredump parameter structure
  2009-12-02  0:18         ` [PATCH v2] mm: Introduce coredump parameter structure Andrew Morton
@ 2009-12-02  0:27           ` KOSAKI Motohiro
  2009-12-02  0:29           ` Masami Hiramatsu
  2009-12-02  0:41           ` [PATCH v2] [RESEND] " Masami Hiramatsu
  2 siblings, 0 replies; 28+ messages in thread
From: KOSAKI Motohiro @ 2009-12-02  0:27 UTC (permalink / raw)
  To: Andrew Morton
  Cc: kosaki.motohiro, Masami Hiramatsu, Oleg Nesterov, linux-kernel,
	dhowells, hidehiro.kawai.ez, lethal, mingo, roland, vapier,
	Takahiro Yasui

> On Sat, 28 Nov 2009 23:41:19 -0500
> Masami Hiramatsu <mhiramat@redhat.com> wrote:
> 
> > Introduce coredump parameter data structure (struct coredump_params)
> > for simplifying binfmt->core_dump() arguments.
> > 
> > Changes in v2:
> >    - Don't remove DUMP_WRITE() macro.
> 
> What is the reason for this change?
> 
> Please always include both the "what" and the "why" in changelog text.

I reported v1 break ia64 and uml. its arch specific code still use DUMP_WRITE().

Thanks.



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] mm: Introduce coredump parameter structure
  2009-12-02  0:18         ` [PATCH v2] mm: Introduce coredump parameter structure Andrew Morton
  2009-12-02  0:27           ` KOSAKI Motohiro
@ 2009-12-02  0:29           ` Masami Hiramatsu
  2009-12-02  9:50             ` Ingo Molnar
  2009-12-02  0:41           ` [PATCH v2] [RESEND] " Masami Hiramatsu
  2 siblings, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-02  0:29 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Oleg Nesterov, KOSAKI Motohiro, linux-kernel, dhowells,
	hidehiro.kawai.ez, lethal, mingo, roland, vapier, Takahiro Yasui

Andrew Morton wrote:
> On Sat, 28 Nov 2009 23:41:19 -0500
> Masami Hiramatsu<mhiramat@redhat.com>  wrote:
>
>> Introduce coredump parameter data structure (struct coredump_params)
>> for simplifying binfmt->core_dump() arguments.
>>
>> Changes in v2:
>>     - Don't remove DUMP_WRITE() macro.
>
> What is the reason for this change?
>
> Please always include both the "what" and the "why" in changelog text.

I see.

>
>> diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
>> index b639dcf..346b694 100644
>> --- a/fs/binfmt_aout.c
>> +++ b/fs/binfmt_aout.c
>> @@ -32,7 +32,7 @@
>>
>>      static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
>>      static int load_aout_library(struct file*);
>> -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
>> +static int aout_core_dump(struct coredump_params *cprm);
>>
>>      static struct linux_binfmt aout_format = {
>>      	.module		= THIS_MODULE,
>> @@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
>>       * dumping of the process results in another error..
>>       */
>>
>> -static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
>> +static int aout_core_dump(struct coredump_params *cprm)
>>      {
>
> Something horrid has mangled your patch.

Oops, sorry. maybe copy-and-paste buffer demon did it :-(

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v2] [RESEND] mm: Introduce coredump parameter structure
  2009-12-02  0:18         ` [PATCH v2] mm: Introduce coredump parameter structure Andrew Morton
  2009-12-02  0:27           ` KOSAKI Motohiro
  2009-12-02  0:29           ` Masami Hiramatsu
@ 2009-12-02  0:41           ` Masami Hiramatsu
  2 siblings, 0 replies; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-02  0:41 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, lkml
  Cc: systemtap, DLE, Masami Hiramatsu, Hidehiro Kawai, Andrew Morton,
	Oleg Nesterov, Roland McGrath, KOSAKI Motohiro

Introduce coredump parameter data structure (struct coredump_params)
for simplifying binfmt->core_dump() arguments.

Changes in v2:
 - Don't remove DUMP_WRITE() macro, because it is used in
   ELF_CORE_WRITE_EXTRA_* at asm/elf.h on ia64 and um.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

 fs/binfmt_aout.c        |   13 +++++++------
 fs/binfmt_elf.c         |   24 +++++++++++++-----------
 fs/binfmt_elf_fdpic.c   |   29 +++++++++++++++--------------
 fs/binfmt_flat.c        |    6 +++---
 fs/binfmt_som.c         |    2 +-
 fs/exec.c               |   38 +++++++++++++++++++++-----------------
 include/linux/binfmts.h |   10 +++++++++-
 7 files changed, 69 insertions(+), 53 deletions(-)

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf..346b694 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
 
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt aout_format = {
 	.module		= THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
  * dumping of the process results in another error..
  */
 
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
 {
+	struct file *file = cprm->file;
 	mm_segment_t fs;
 	int has_dumped = 0;
 	unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
 	current->flags |= PF_DUMPCORE;
        	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
 	dump.u_ar0 = offsetof(struct user, regs);
-	dump.signal = signr;
-	aout_dump_thread(regs, &dump);
+	dump.signal = cprm->signr;
+	aout_dump_thread(cprm->regs, &dump);
 
 /* If the size of the dump file exceeds the rlimit, then see what would happen
    if we wrote the stack, but not the data area.  */
-	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
 		dump.u_dsize = 0;
 
 /* Make sure we have enough room to write the stack and data areas. */
-	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
 		dump.u_ssize = 0;
 
 /* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b9b3bb5..4ee5bb2 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  * don't even try.
  */
 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
 #else
 #define elf_core_dump	NULL
 #endif
@@ -1277,8 +1277,9 @@ static int writenote(struct memelfnote *men, struct file *file,
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
 		goto end_coredump;
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1906,7 +1907,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
 	mm_segment_t fs;
@@ -1952,7 +1953,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	 * notes.  This also sets up the file header.
 	 */
 	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, signr, regs))
+			    &info, cprm->signr, cprm->regs))
 		goto cleanup;
 
 	has_dumped = 1;
@@ -2014,14 +2015,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 #endif
 
  	/* write out the notes section */
-	if (!write_note_info(&info, file, &foffset))
+	if (!write_note_info(&info, cprm->file, &foffset))
 		goto end_coredump;
 
-	if (elf_coredump_extra_notes_write(file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - foffset))
 		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2038,12 +2039,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 			page = get_dump_page(addr);
 			if (page) {
 				void *kaddr = kmap(page);
-				stop = ((size += PAGE_SIZE) > limit) ||
-					!dump_write(file, kaddr, PAGE_SIZE);
+				stop = ((size += PAGE_SIZE) > cprm->limit) ||
+					!dump_write(cprm->file, kaddr,
+						    PAGE_SIZE);
 				kunmap(page);
 				page_cache_release(page);
 			} else
-				stop = !dump_seek(file, PAGE_SIZE);
+				stop = !dump_seek(cprm->file, PAGE_SIZE);
 			if (stop)
 				goto end_coredump;
 		}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 38502c6..917e1b4 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
 					     struct file *, struct mm_struct *);
 
 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
 #endif
 
 static struct linux_binfmt elf_fdpic_format = {
@@ -1325,8 +1325,9 @@ static int writenote(struct memelfnote *men, struct file *file)
 #undef DUMP_WRITE
 #undef DUMP_SEEK
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
 		goto end_coredump;
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1581,8 +1582,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
-			       struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
 {
 #define	NUM_NOTES	6
 	int has_dumped = 0;
@@ -1641,7 +1641,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 		goto cleanup;
 #endif
 
-	if (signr) {
+	if (cprm->signr) {
 		struct core_thread *ct;
 		struct elf_thread_status *tmp;
 
@@ -1660,14 +1660,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 			int sz;
 
 			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(signr, tmp);
+			sz = elf_dump_thread_status(cprm->signr, tmp);
 			thread_status_size += sz;
 		}
 	}
 
 	/* now collect the dump for the current */
-	fill_prstatus(prstatus, current, signr);
-	elf_core_copy_regs(&prstatus->pr_reg, regs);
+	fill_prstatus(prstatus, current, cprm->signr);
+	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
 
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
@@ -1702,7 +1702,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 
   	/* Try to dump the FPU. */
 	if ((prstatus->pr_fpvalid =
-	     elf_core_copy_task_fpregs(current, regs, fpu)))
+	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
 		fill_note(notes + numnote++,
 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
 #ifdef ELF_CORE_COPY_XFPREGS
@@ -1773,7 +1773,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, file))
+		if (!writenote(notes + i, cprm->file))
 			goto end_coredump;
 
 	/* write out the thread status notes section */
@@ -1782,14 +1782,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 				list_entry(t, struct elf_thread_status, list);
 
 		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file))
+			if (!writenote(&tmp->notes[i], cprm->file))
 				goto end_coredump;
 	}
 
-	if (!dump_seek(file, dataoff))
+	if (!dump_seek(cprm->file, dataoff))
 		goto end_coredump;
 
-	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+				    mm_flags) < 0)
 		goto end_coredump;
 
 #ifdef ELF_CORE_WRITE_EXTRA_DATA
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a279665..d4a00ea 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
 #endif
 
 static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt flat_format = {
 	.module		= THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
  * Currently only a stub-function.
  */
 
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
 {
 	printk("Process %s:%d received signr %d and should have core dumped\n",
-			current->comm, current->pid, (int) signr);
+			current->comm, current->pid, (int) cprm->signr);
 	return(1);
 }
 
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9..2a9b533 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
  * don't even try.
  */
 #if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
 #else
 #define som_core_dump	NULL
 #endif
diff --git a/fs/exec.c b/fs/exec.c
index ba112bd..5daf7d5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1756,17 +1756,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
-	struct file * file;
 	const struct cred *old_cred;
 	struct cred *cred;
 	int retval = 0;
 	int flag = 0;
 	int ispipe = 0;
-	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
 	char **helper_argv = NULL;
 	int helper_argc = 0;
 	int dump_count = 0;
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
+	struct coredump_params cprm = {
+		.signr = signr,
+		.regs = regs,
+		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+	};
 
 	audit_core_dumps(signr);
 
@@ -1822,15 +1825,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (core_limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
 		goto fail_unlock;
 
  	if (ispipe) {
-		if (core_limit == 0) {
+		if (cprm.limit == 0) {
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
-			 * core_limit of 0 here as a speacial value. Any
+			 * cprm.limit of 0 here as a speacial value. Any
 			 * non-zero limit gets set to RLIM_INFINITY below, but
 			 * a limit of 0 skips the dump.  This is a consistent
 			 * way to catch recursive crashes.  We can still crash
@@ -1863,25 +1866,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			goto fail_dropcount;
 		}
 
-		core_limit = RLIM_INFINITY;
+		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
 		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&file)) {
+				&cprm.file)) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
  		}
  	} else
- 		file = filp_open(corename,
+		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(file))
+	if (IS_ERR(cprm.file))
 		goto fail_dropcount;
-	inode = file->f_path.dentry->d_inode;
+	inode = cprm.file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(file->f_path.dentry))
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
 		goto close_fail;
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1894,21 +1897,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 */
 	if (inode->i_uid != current_fsuid())
 		goto close_fail;
-	if (!file->f_op)
+	if (!cprm.file->f_op)
 		goto close_fail;
-	if (!file->f_op->write)
+	if (!cprm.file->f_op->write)
 		goto close_fail;
-	if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+	if (!ispipe &&
+	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
 		goto close_fail;
 
-	retval = binfmt->core_dump(signr, regs, file, core_limit);
+	retval = binfmt->core_dump(&cprm);
 
 	if (retval)
 		current->signal->group_exit_code |= 0x80;
 close_fail:
 	if (ispipe && core_pipe_limit)
-		wait_for_dump_helpers(file);
-	filp_close(file, NULL);
+		wait_for_dump_helpers(cprm.file);
+	filp_close(cprm.file, NULL);
 fail_dropcount:
 	if (dump_count)
 		atomic_dec(&core_dump_count);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index aece486..cd4349b 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -68,6 +68,14 @@ struct linux_binprm{
 
 #define BINPRM_MAX_RECURSION 4
 
+/* Function parameter for binfmt->coredump */
+struct coredump_params {
+	long signr;
+	struct pt_regs *regs;
+	struct file *file;
+	unsigned long limit;
+};
+
 /*
  * This structure defines the functions that are used to load the binary formats that
  * linux accepts.
@@ -77,7 +85,7 @@ struct linux_binfmt {
 	struct module *module;
 	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
 	int (*load_shlib)(struct file *);
-	int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
 	int hasvdso;
 };


-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] mm: Introduce coredump parameter structure
  2009-12-02  0:29           ` Masami Hiramatsu
@ 2009-12-02  9:50             ` Ingo Molnar
  2009-12-02 18:07               ` Masami Hiramatsu
  0 siblings, 1 reply; 28+ messages in thread
From: Ingo Molnar @ 2009-12-02  9:50 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Andrew Morton, Oleg Nesterov, KOSAKI Motohiro, linux-kernel,
	dhowells, hidehiro.kawai.ez, lethal, roland, vapier,
	Takahiro Yasui


* Masami Hiramatsu <mhiramat@redhat.com> wrote:

> Andrew Morton wrote:
> >On Sat, 28 Nov 2009 23:41:19 -0500
> >Masami Hiramatsu<mhiramat@redhat.com>  wrote:
> >
> >>Introduce coredump parameter data structure (struct coredump_params)
> >>for simplifying binfmt->core_dump() arguments.
> >>
> >>Changes in v2:
> >>    - Don't remove DUMP_WRITE() macro.
> >
> >What is the reason for this change?
> >
> >Please always include both the "what" and the "why" in changelog text.
> 
> I see.

I think Andrew wanted to see a longer explanation about precisely what 
we need for these tracepoints and what the various specific usecases are 
to utilize it.

I.e. a basic cost/benefit analysis is needed. By looking at the patch we 
can see the cost - but you have to counter-balance that with enough 
stuff in the 'benefits' column of the equation.

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] mm: Introduce coredump parameter structure
  2009-12-02  9:50             ` Ingo Molnar
@ 2009-12-02 18:07               ` Masami Hiramatsu
  0 siblings, 0 replies; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-02 18:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Oleg Nesterov, KOSAKI Motohiro, linux-kernel,
	dhowells, hidehiro.kawai.ez, lethal, roland, vapier,
	Takahiro Yasui

Ingo Molnar wrote:
> * Masami Hiramatsu<mhiramat@redhat.com>  wrote:
>
>> Andrew Morton wrote:
>>> On Sat, 28 Nov 2009 23:41:19 -0500
>>> Masami Hiramatsu<mhiramat@redhat.com>   wrote:
>>>
>>>> Introduce coredump parameter data structure (struct coredump_params)
>>>> for simplifying binfmt->core_dump() arguments.
>>>>
>>>> Changes in v2:
>>>>     - Don't remove DUMP_WRITE() macro.
>>>
>>> What is the reason for this change?
>>>
>>> Please always include both the "what" and the "why" in changelog text.
>>
>> I see.
>
> I think Andrew wanted to see a longer explanation about precisely what
> we need for these tracepoints and what the various specific usecases are
> to utilize it.

Ah, OK.

>
> I.e. a basic cost/benefit analysis is needed. By looking at the patch we
> can see the cost - but you have to counter-balance that with enough
> stuff in the 'benefits' column of the equation.

Hmm, actually, this tracepoint requirement comes from the viewpoint of
administrators (not developers). Since now we have introduced many
coredump configurations (e.g. dumpable, coredump_filter, core_pattern, etc)
and some of them can be modified by users, we assume it is hard to know
what was actually dumped (or not dumped) after some problem happened on the
system. For example, a process didn't generated core, coredump doesn't have
some sections, etc. In those cases, the coredump tracepoint can help us to
know why the core file is so big or small, or not generated, by recording
all configurations for all processes on the system. That will reduce
system-administration cost.

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-11-29 15:10         ` [PATCH][RFC] tracepoint: signal coredump (Re: [PATCH v2] mm: Introduce coredump parameter structure) Masami Hiramatsu
@ 2009-12-02 20:46           ` Masami Hiramatsu
  2009-12-03 10:39             ` Ingo Molnar
  2009-12-05  7:18             ` KOSAKI Motohiro
  0 siblings, 2 replies; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-02 20:46 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, lkml
  Cc: systemtap, DLE, Masami Hiramatsu, Oleg Nesterov, Roland McGrath,
	Jason Baron, Ingo Molnar, Andrew Morton, KOSAKI Motohiro

Add signal coredump tracepoint which shows signal number,
mm->flags, limits, pointer to file structure and core
file name.

This tracepoint requirement comes mainly from the viewpoint of
administrators. Since now we have introduced many coredump
configurations (e.g. dumpable, coredump_filter, core_pattern,
etc) and some of them can be modified by users, it will be hard
to know what was actually dumped (or not dumped) after some
problem happened on the system. For example, a process didn't
generated core, coredump doesn't have some sections, etc.
In those cases, the coredump tracepoint can help us to know
why the core file is so big or small, or not generated, by
recording all configurations for all processes on the system.
That will reduce system-administration cost.

Changes in v2:
 - Fix a bug to clear file local variable when
   call_usermodehelper_pipe() is failed.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

 fs/exec.c                     |   23 ++++++++++++----------
 include/trace/events/signal.h |   43 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 2ec6973..be3ec5c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <trace/events/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1772,6 +1773,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.signr = signr,
+		.file = NULL,
 		.regs = regs,
 		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
 		/*
@@ -1837,9 +1839,6 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
-		goto fail_unlock;
-
  	if (ispipe) {
 		if (cprm.limit == 0) {
 			/*
@@ -1860,7 +1859,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 				"Process %d(%s) has RLIMIT_CORE set to 0\n",
 				task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Aborting core\n");
-			goto fail_unlock;
+			goto end_open;
 		}
 
 		dump_count = atomic_inc_return(&core_dump_count);
@@ -1868,14 +1867,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
 			       task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Skipping core dump\n");
-			goto fail_dropcount;
+			goto end_open;
 		}
 
 		helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
 		if (!helper_argv) {
 			printk(KERN_WARNING "%s failed to allocate memory\n",
 			       __func__);
-			goto fail_dropcount;
+			goto end_open;
 		}
 
 		cprm.limit = RLIM_INFINITY;
@@ -1883,15 +1882,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		/* SIGPIPE can happen, but it's just never processed */
 		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
 				&cprm.file)) {
+			cprm.file = NULL;
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
-			goto fail_dropcount;
+			goto end_open;
  		}
- 	} else
+	} else if (cprm.limit >= binfmt->min_coredump)
 		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(cprm.file))
+
+end_open:
+	trace_signal_coredump(&cprm, corename);
+
+	if (!cprm.file || IS_ERR(cprm.file))
 		goto fail_dropcount;
 	inode = cprm.file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
@@ -1928,7 +1932,6 @@ close_fail:
 fail_dropcount:
 	if (dump_count)
 		atomic_dec(&core_dump_count);
-fail_unlock:
 	if (helper_argv)
 		argv_free(helper_argv);
 
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a510b75..7feba6e 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -6,6 +6,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/binfmts.h>
 #include <linux/tracepoint.h>
 
 #define TP_STORE_SIGINFO(__entry, info)				\
@@ -167,6 +168,48 @@ TRACE_EVENT(signal_lose_info,
 	TP_printk("sig=%d group=%d errno=%d code=%d",
 		  __entry->sig, __entry->group, __entry->errno, __entry->code)
 );
+
+/**
+ * signal_coredump - called when dumping core by signal
+ * @cprm: pointer to struct coredump_params
+ * @core_name: core-name string
+ *
+ * Current process dumps core file to 'core_name' file, because 'cprm->signr'
+ * signal is delivered.
+ * 'cprm->file' is a pointer to file structure of core file, if it is NULL
+ * or an error number(IS_ERR(cprm->file)), coredump should be failed.
+ */
+TRACE_EVENT(signal_coredump,
+
+	TP_PROTO(struct coredump_params *cprm, const char *core_name),
+
+	TP_ARGS(cprm, core_name),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	unsigned long,	limit		)
+		__field(	unsigned long,	flags		)
+		__field(	unsigned long,	file		)
+		__string(	name,		core_name	)
+	),
+
+
+	TP_fast_assign(
+		__entry->sig	= (int)cprm->signr;
+		__entry->limit	= cprm->limit;
+		__entry->flags	= cprm->mm_flags;
+		__entry->file	= (unsigned long)cprm->file;
+		__assign_str(name,	core_name);
+	),
+
+	TP_printk("sig=%d limit=%lu dumpable=%lx dump_filter=%lx "
+		  "file=%lx corename=%s",
+		  __entry->sig, __entry->limit,
+		  __entry->flags & MMF_DUMPABLE_MASK,
+		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
+		  MMF_DUMP_FILTER_SHIFT,
+		  __entry->file, __get_str(name))
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */


-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-02 20:46           ` [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint Masami Hiramatsu
@ 2009-12-03 10:39             ` Ingo Molnar
  2009-12-03 11:32               ` Masami Hiramatsu
  2009-12-05  7:18             ` KOSAKI Motohiro
  1 sibling, 1 reply; 28+ messages in thread
From: Ingo Molnar @ 2009-12-03 10:39 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron, KOSAKI Motohiro


* Masami Hiramatsu <mhiramat@redhat.com> wrote:

> Add signal coredump tracepoint which shows signal number, mm->flags, 
> limits, pointer to file structure and core file name.

Why is the kernel pointer to the file structure logged? User-space has 
no use for it and the analysis value is low.

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-03 10:39             ` Ingo Molnar
@ 2009-12-03 11:32               ` Masami Hiramatsu
  2009-12-05  7:16                 ` Ingo Molnar
  0 siblings, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-03 11:32 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron, KOSAKI Motohiro

Ingo Molnar wrote:
> 
> * Masami Hiramatsu <mhiramat@redhat.com> wrote:
> 
>> Add signal coredump tracepoint which shows signal number, mm->flags, 
>> limits, pointer to file structure and core file name.
> 
> Why is the kernel pointer to the file structure logged? User-space has 
> no use for it and the analysis value is low.

Ah, if open() or opening pipe fails, it becomes 0 or -ERRNO,
so we can check if there is an error.

Perhaps, we can do below in trace_printk for trace users.
"open %s", (!file || IS_ERR((void *)file)) ? "failed" : "succeeded"

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-03 11:32               ` Masami Hiramatsu
@ 2009-12-05  7:16                 ` Ingo Molnar
  2009-12-07 17:19                   ` Masami Hiramatsu
  0 siblings, 1 reply; 28+ messages in thread
From: Ingo Molnar @ 2009-12-05  7:16 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron, KOSAKI Motohiro


* Masami Hiramatsu <mhiramat@redhat.com> wrote:

> Ingo Molnar wrote:
> > 
> > * Masami Hiramatsu <mhiramat@redhat.com> wrote:
> > 
> >> Add signal coredump tracepoint which shows signal number, mm->flags, 
> >> limits, pointer to file structure and core file name.
> > 
> > Why is the kernel pointer to the file structure logged? User-space has 
> > no use for it and the analysis value is low.
> 
> Ah, if open() or opening pipe fails, it becomes 0 or -ERRNO, so we can 
> check if there is an error.

ok, that wasnt obvious from the patch - worth adding it to the 
changelog.

> Perhaps, we can do below in trace_printk for trace users.
> "open %s", (!file || IS_ERR((void *)file)) ? "failed" : "succeeded"

i'd rather suggest to pass an error code (and keep it 0 if none), 
instead of some ad-hoc string message.

But ... the whole issue of VFS event logging and new tracepoints should 
be approached from a more generic direction i think. Do we want to log 
inode_nr:dev pairs as well? Shouldnt there be a generic event-class 
definition via DECLARE_EVENT_CLASS for file related events, with 'core 
dumped' just being a sub-event-code?

I sense reluctance from the direction of Andrew and disinterest from the 
VFS folks - not a good backdrop in general.

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-02 20:46           ` [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint Masami Hiramatsu
  2009-12-03 10:39             ` Ingo Molnar
@ 2009-12-05  7:18             ` KOSAKI Motohiro
  2009-12-07 15:25               ` Masami Hiramatsu
  1 sibling, 1 reply; 28+ messages in thread
From: KOSAKI Motohiro @ 2009-12-05  7:18 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Ingo Molnar, Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron

2009/12/3 Masami Hiramatsu <mhiramat@redhat.com>:
> Add signal coredump tracepoint which shows signal number,
> mm->flags, limits, pointer to file structure and core
> file name.
>
> This tracepoint requirement comes mainly from the viewpoint of
> administrators. Since now we have introduced many coredump
> configurations (e.g. dumpable, coredump_filter, core_pattern,
> etc) and some of them can be modified by users, it will be hard
> to know what was actually dumped (or not dumped) after some
> problem happened on the system. For example, a process didn't
> generated core, coredump doesn't have some sections, etc.
> In those cases, the coredump tracepoint can help us to know
> why the core file is so big or small, or not generated, by
> recording all configurations for all processes on the system.
> That will reduce system-administration cost.

AFAIK, not-dumped case is important than dump successful case.
IOW, admin need to know why the crashed process was not dumped.

This tracepoint doesn't cover all failure case. especially
binfmt->core_dump() et.al.
IOW, this tracepoint seems too specialized piped-coredump feature.

What do you think this tracepoint's use case?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-05  7:18             ` KOSAKI Motohiro
@ 2009-12-07 15:25               ` Masami Hiramatsu
  2009-12-08  1:51                 ` KOSAKI Motohiro
  0 siblings, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-07 15:25 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Ingo Molnar, Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron

KOSAKI Motohiro wrote:
> 2009/12/3 Masami Hiramatsu<mhiramat@redhat.com>:
>> Add signal coredump tracepoint which shows signal number,
>> mm->flags, limits, pointer to file structure and core
>> file name.
>>
>> This tracepoint requirement comes mainly from the viewpoint of
>> administrators. Since now we have introduced many coredump
>> configurations (e.g. dumpable, coredump_filter, core_pattern,
>> etc) and some of them can be modified by users, it will be hard
>> to know what was actually dumped (or not dumped) after some
>> problem happened on the system. For example, a process didn't
>> generated core, coredump doesn't have some sections, etc.
>> In those cases, the coredump tracepoint can help us to know
>> why the core file is so big or small, or not generated, by
>> recording all configurations for all processes on the system.
>> That will reduce system-administration cost.
>
> AFAIK, not-dumped case is important than dump successful case.
> IOW, admin need to know why the crashed process was not dumped.

Certainly, failure cases are important, but also, the cases
that dumped-core doesn't or does include some sections
are also important.

> This tracepoint doesn't cover all failure case. especially
> binfmt->core_dump() et.al.
> IOW, this tracepoint seems too specialized piped-coredump feature.

Hmm, so would you mean that after calling binfmt->core_dump()
is better place?

> What do you think this tracepoint's use case?

Frankly to say, our first attempt was tracing mm->flags because
it can be changed by users without asking, and they sometimes
ask why core is not perfect or why core file is so big.

Perhaps, covering all of those failure cases and succeed cases,
gives better information for them. In that case, we might better
tweak execution(goto) path to leave some error code on retval.

e.g.
         if (IS_ERR(file))
                 goto fail_dropcount;
+	retval = -EBADF;
         inode = file->f_path.dentry->d_inode;
         if (inode->i_nlink > 1)
                 goto close_fail;        /* multiple links - don't dump */
         if (!ispipe && d_unhashed(file->f_path.dentry))
                 goto close_fail;

         /* AK: actually i see no reason to not allow this for named pipes etc.,
            but keep the previous behaviour for now. */
         if (!ispipe && !S_ISREG(inode->i_mode))
                 goto close_fail;
         /*
          * Dont allow local users get cute and trick others to coredump
          * into their pre-created files:
          */
+	retval = -EPERM;
         if (inode->i_uid != current_fsuid())
                 goto close_fail;
+	retval = -EINVAL;
         if (!file->f_op)
                 goto close_fail;
         if (!file->f_op->write)
                 goto close_fail;
+	retval = -EEXIST;
         if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
                 goto close_fail;


Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-05  7:16                 ` Ingo Molnar
@ 2009-12-07 17:19                   ` Masami Hiramatsu
  0 siblings, 0 replies; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-07 17:19 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron, KOSAKI Motohiro

Ingo Molnar wrote:
>
> * Masami Hiramatsu<mhiramat@redhat.com>  wrote:
>
>> Ingo Molnar wrote:
>>>
>>> * Masami Hiramatsu<mhiramat@redhat.com>  wrote:
>>>
>>>> Add signal coredump tracepoint which shows signal number, mm->flags,
>>>> limits, pointer to file structure and core file name.
>>>
>>> Why is the kernel pointer to the file structure logged? User-space has
>>> no use for it and the analysis value is low.
>>
>> Ah, if open() or opening pipe fails, it becomes 0 or -ERRNO, so we can
>> check if there is an error.
>
> ok, that wasnt obvious from the patch - worth adding it to the
> changelog.

OK.

>> Perhaps, we can do below in trace_printk for trace users.
>> "open %s", (!file || IS_ERR((void *)file)) ? "failed" : "succeeded"
>
> i'd rather suggest to pass an error code (and keep it 0 if none),
> instead of some ad-hoc string message.

Sure. Or, perhaps, is it enough to show error code? (as block_rq_with_error did)

> But ... the whole issue of VFS event logging and new tracepoints should
> be approached from a more generic direction i think. Do we want to log
> inode_nr:dev pairs as well? Shouldnt there be a generic event-class
> definition via DECLARE_EVENT_CLASS for file related events, with 'core
> dumped' just being a sub-event-code?

Hmm, would you mean that coredump event should be a VFS event? or
handling file open errors should be a VFS event?
There are many other special reasons of failing coredump, as I discussed
with Kosaki-san. So, I think coredump event should be different from VFS
events.

Of course, Failure of file opening event should be handled in VFS events
if possible. In that case, we just need to trace coredump event and
VFS open event, and matching file descriptor or something like that.

> I sense reluctance from the direction of Andrew and disinterest from the
> VFS folks - not a good backdrop in general.
>
> 	Ingo

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-07 15:25               ` Masami Hiramatsu
@ 2009-12-08  1:51                 ` KOSAKI Motohiro
  2009-12-08 20:40                   ` [PATCH v3] " Masami Hiramatsu
  0 siblings, 1 reply; 28+ messages in thread
From: KOSAKI Motohiro @ 2009-12-08  1:51 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kosaki.motohiro, Ingo Molnar, Andrew Morton, lkml, systemtap,
	DLE, Oleg Nesterov, Roland McGrath, Jason Baron

> KOSAKI Motohiro wrote:
> > 2009/12/3 Masami Hiramatsu<mhiramat@redhat.com>:
> >> Add signal coredump tracepoint which shows signal number,
> >> mm->flags, limits, pointer to file structure and core
> >> file name.
> >>
> >> This tracepoint requirement comes mainly from the viewpoint of
> >> administrators. Since now we have introduced many coredump
> >> configurations (e.g. dumpable, coredump_filter, core_pattern,
> >> etc) and some of them can be modified by users, it will be hard
> >> to know what was actually dumped (or not dumped) after some
> >> problem happened on the system. For example, a process didn't
> >> generated core, coredump doesn't have some sections, etc.
> >> In those cases, the coredump tracepoint can help us to know
> >> why the core file is so big or small, or not generated, by
> >> recording all configurations for all processes on the system.
> >> That will reduce system-administration cost.
> >
> > AFAIK, not-dumped case is important than dump successful case.
> > IOW, admin need to know why the crashed process was not dumped.
> 
> Certainly, failure cases are important, but also, the cases
> that dumped-core doesn't or does include some sections
> are also important.

correct.

> > This tracepoint doesn't cover all failure case. especially
> > binfmt->core_dump() et.al.
> > IOW, this tracepoint seems too specialized piped-coredump feature.
> 
> Hmm, so would you mean that after calling binfmt->core_dump()
> is better place?

I think your following use-case desired so. if you have unwritten reason, please correct me.

	For example, a process didn't generated core, coredump doesn't have
	some sections, etc.


> > What do you think this tracepoint's use case?
> 
> Frankly to say, our first attempt was tracing mm->flags because
> it can be changed by users without asking, and they sometimes
> ask why core is not perfect or why core file is so big.
> 
> Perhaps, covering all of those failure cases and succeed cases,
> gives better information for them. In that case, we might better
> tweak execution(goto) path to leave some error code on retval.

This is enough acceptable to me.

> 
> e.g.
>          if (IS_ERR(file))
>                  goto fail_dropcount;
> +	retval = -EBADF;
>          inode = file->f_path.dentry->d_inode;
>          if (inode->i_nlink > 1)
>                  goto close_fail;        /* multiple links - don't dump */
>          if (!ispipe && d_unhashed(file->f_path.dentry))
>                  goto close_fail;
> 
>          /* AK: actually i see no reason to not allow this for named pipes etc.,
>             but keep the previous behaviour for now. */
>          if (!ispipe && !S_ISREG(inode->i_mode))
>                  goto close_fail;
>          /*
>           * Dont allow local users get cute and trick others to coredump
>           * into their pre-created files:
>           */
> +	retval = -EPERM;
>          if (inode->i_uid != current_fsuid())
>                  goto close_fail;
> +	retval = -EINVAL;
>          if (!file->f_op)
>                  goto close_fail;
>          if (!file->f_op->write)
>                  goto close_fail;
> +	retval = -EEXIST;
>          if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
>                  goto close_fail;
> 
> 
> Thank you,
> 
> -- 
> Masami Hiramatsu
> 
> Software Engineer
> Hitachi Computer Products (America), Inc.
> Software Solutions Division
> 
> e-mail: mhiramat@redhat.com
> 




^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v3] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-08  1:51                 ` KOSAKI Motohiro
@ 2009-12-08 20:40                   ` Masami Hiramatsu
  2009-12-09  5:34                     ` KOSAKI Motohiro
  0 siblings, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-08 20:40 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, lkml, KOSAKI Motohiro
  Cc: systemtap, DLE, Masami Hiramatsu, Oleg Nesterov, Roland McGrath,
	Jason Baron, Ingo Molnar, Andrew Morton, KOSAKI Motohiro

Add signal coredump tracepoint which shows signal number,
mm->flags, limits, the result of coredump, and core
file name.

This tracepoint requirement comes mainly from the viewpoint of
administrators. Since now we have introduced many coredump
configurations (e.g. dumpable, coredump_filter, core_pattern,
etc) and some of them can be modified by users, it will be hard
to know what was actually dumped (or not dumped) after some
problem happened on the system. For example, a process didn't
generated core, coredump doesn't have some sections, etc.
In those cases, the coredump tracepoint can help us to know
why the core file is so big or small, or not generated, by
recording all configurations for all processes on the system.
That will reduce system-administration cost.

Changes in v3:
 - Move tracepoint at the end of do_coredump() for tracing
   the result of coredump.
 - Use retval to record error-code at every failure points
   for passing the result of coredump to tracepoint.
 - Trace retval instead of cprm->file for recording the
   result of coredump.

Changes in v2:
 - Fix a bug to clear file local variable when
   call_usermodehelper_pipe() is failed.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

 fs/exec.c                     |   58 ++++++++++++++++++++++++++++++-----------
 include/trace/events/signal.h |   48 ++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 16 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 0a5d944..d67ed5a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <trace/events/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1753,7 +1754,7 @@ static void wait_for_dump_helpers(struct file *file)
 void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
 	struct core_state core_state;
-	char corename[CORENAME_MAX_SIZE + 1];
+	char corename[CORENAME_MAX_SIZE + 1] = "";
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
@@ -1768,6 +1769,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.signr = signr,
+		.file = NULL,
 		.regs = regs,
 		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
 		/*
@@ -1781,8 +1783,10 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	audit_core_dumps(signr);
 
 	binfmt = mm->binfmt;
-	if (!binfmt || !binfmt->core_dump)
+	if (!binfmt || !binfmt->core_dump) {
+		retval = -ENOSYS;
 		goto fail;
+	}
 
 	cred = prepare_creds();
 	if (!cred) {
@@ -1795,6 +1799,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
 	if (mm->core_state || !__get_dumpable(cprm.mm_flags)) {
+		/* This is not an error. retval should be 0 */
 		up_write(&mm->mmap_sem);
 		put_cred(cred);
 		goto fail;
@@ -1833,11 +1838,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump)) {
+		retval = -EFBIG;
 		goto fail_unlock;
+	}
 
  	if (ispipe) {
 		if (cprm.limit == 0) {
+			retval = -EINVAL;
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
@@ -1861,6 +1869,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 		dump_count = atomic_inc_return(&core_dump_count);
 		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+			retval = -EFBIG;
 			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
 			       task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Skipping core dump\n");
@@ -1869,6 +1878,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 		helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
 		if (!helper_argv) {
+			retval = -ENOMEM;
 			printk(KERN_WARNING "%s failed to allocate memory\n",
 			       __func__);
 			goto fail_dropcount;
@@ -1877,8 +1887,9 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
-		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&cprm.file)) {
+		retval = call_usermodehelper_pipe(helper_argv[0], helper_argv,
+						  NULL, &cprm.file);
+		if (retval < 0) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
@@ -1887,31 +1898,43 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(cprm.file))
+	if (IS_ERR(cprm.file)) {
+		retval = (int)PTR_ERR(cprm.file);
 		goto fail_dropcount;
+	}
 	inode = cprm.file->f_path.dentry->d_inode;
-	if (inode->i_nlink > 1)
+	if (inode->i_nlink > 1) {
+		retval = -EMLINK;
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
+	}
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry)) {
+		retval = -EBADF;
 		goto close_fail;
+	}
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
 	   but keep the previous behaviour for now. */
-	if (!ispipe && !S_ISREG(inode->i_mode))
+	if (!ispipe && !S_ISREG(inode->i_mode)) {
+		retval = -EBADF;
 		goto close_fail;
+	}
 	/*
 	 * Dont allow local users get cute and trick others to coredump
 	 * into their pre-created files:
 	 */
-	if (inode->i_uid != current_fsuid())
+	if (inode->i_uid != current_fsuid()) {
+		retval = -EPERM;
 		goto close_fail;
-	if (!cprm.file->f_op)
-		goto close_fail;
-	if (!cprm.file->f_op->write)
-		goto close_fail;
-	if (!ispipe &&
-	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
+	}
+	if (!cprm.file->f_op || !cprm.file->f_op->write) {
+		retval = -EINVAL;
 		goto close_fail;
+	}
+	if (!ispipe) {
+		retval = do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file);
+		if (retval != 0)
+			goto close_fail;
+	}
 
 	retval = binfmt->core_dump(&cprm);
 
@@ -1932,5 +1955,8 @@ fail_unlock:
 	put_cred(cred);
 	coredump_finish(mm);
 fail:
+	/* Trace coredump parameters and return value */
+	trace_signal_coredump(&cprm, corename, retval);
+
 	return;
 }
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a510b75..42608f0 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -4,8 +4,10 @@
 #if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SIGNAL_H
 
+#include <linux/err.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/binfmts.h>
 #include <linux/tracepoint.h>
 
 #define TP_STORE_SIGINFO(__entry, info)				\
@@ -167,6 +169,52 @@ TRACE_EVENT(signal_lose_info,
 	TP_printk("sig=%d group=%d errno=%d code=%d",
 		  __entry->sig, __entry->group, __entry->errno, __entry->code)
 );
+
+/**
+ * signal_coredump - called when dumping core by signal
+ * @cprm: pointer to struct coredump_params
+ * @core_name: core-name string
+ * @retval: return value of binfmt->coredump or error-code
+ *
+ * Current process dumps core file to 'core_name' file, because 'cprm->signr'
+ * signal is delivered.
+ * 'retval' is an error code or 0/1. retval == 1 means the core file was
+ * dumped successfully and retval == 0 means binfmt->coredump failed to dump.
+ * If retval < 0, this means do_coredump() failed to dump core file before
+ * calling binfmt->coredump.
+ */
+TRACE_EVENT(signal_coredump,
+
+	TP_PROTO(struct coredump_params *cprm, const char *core_name,
+		 int retval),
+
+	TP_ARGS(cprm, core_name, retval),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	unsigned long,	limit		)
+		__field(	unsigned long,	flags		)
+		__field(	int,		retval		)
+		__string(	name,		core_name	)
+	),
+
+
+	TP_fast_assign(
+		__entry->sig	= (int)cprm->signr;
+		__entry->limit	= cprm->limit;
+		__entry->flags	= cprm->mm_flags;
+		__entry->retval	= retval;
+		__assign_str(name,	core_name);
+	),
+
+	TP_printk("sig=%d limit=%lu dumpable=0x%lx dump_filter=0x%lx "
+		  "corename=\"%s\" retval=%d",
+		  __entry->sig, __entry->limit,
+		  __entry->flags & MMF_DUMPABLE_MASK,
+		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
+		  MMF_DUMP_FILTER_SHIFT,
+		  __get_str(name), __entry->retval)
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */


-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-08 20:40                   ` [PATCH v3] " Masami Hiramatsu
@ 2009-12-09  5:34                     ` KOSAKI Motohiro
  2009-12-09 16:07                       ` Masami Hiramatsu
  0 siblings, 1 reply; 28+ messages in thread
From: KOSAKI Motohiro @ 2009-12-09  5:34 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kosaki.motohiro, Ingo Molnar, Andrew Morton, lkml, systemtap,
	DLE, Oleg Nesterov, Roland McGrath, Jason Baron

> +	TP_fast_assign(
> +		__entry->sig	= (int)cprm->signr;
> +		__entry->limit	= cprm->limit;
> +		__entry->flags	= cprm->mm_flags;
> +		__entry->retval	= retval;
> +		__assign_str(name,	core_name);
> +	),
> +
> +	TP_printk("sig=%d limit=%lu dumpable=0x%lx dump_filter=0x%lx "
> +		  "corename=\"%s\" retval=%d",
> +		  __entry->sig, __entry->limit,
> +		  __entry->flags & MMF_DUMPABLE_MASK,
> +		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
> +		  MMF_DUMP_FILTER_SHIFT,
> +		  __get_str(name), __entry->retval)
> +);
>  #endif /* _TRACE_SIGNAL_H */

I don't think "limit" is userfriendly name, core_limit or core_size_limit is better?
plus, we have core_pipe_limit sysctl too. (it's similar but different concept limit).

other parts looks good to me.




^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-09  5:34                     ` KOSAKI Motohiro
@ 2009-12-09 16:07                       ` Masami Hiramatsu
  2009-12-09 16:16                         ` Masami Hiramatsu
  0 siblings, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-09 16:07 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Ingo Molnar, Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron



KOSAKI Motohiro wrote:
>> +	TP_fast_assign(
>> +		__entry->sig	= (int)cprm->signr;
>> +		__entry->limit	= cprm->limit;
>> +		__entry->flags	= cprm->mm_flags;
>> +		__entry->retval	= retval;
>> +		__assign_str(name,	core_name);
>> +	),
>> +
>> +	TP_printk("sig=%d limit=%lu dumpable=0x%lx dump_filter=0x%lx "
>> +		  "corename=\"%s\" retval=%d",
>> +		  __entry->sig, __entry->limit,
>> +		  __entry->flags&  MMF_DUMPABLE_MASK,
>> +		  (__entry->flags&  MMF_DUMP_FILTER_MASK)>>
>> +		  MMF_DUMP_FILTER_SHIFT,
>> +		  __get_str(name), __entry->retval)
>> +);
>>   #endif /* _TRACE_SIGNAL_H */
>
> I don't think "limit" is userfriendly name, core_limit or core_size_limit is better?
> plus, we have core_pipe_limit sysctl too. (it's similar but different concept limit).

Ah, I missed it. OK, so I'll rename core_limit and add core_pipe_limit.
Thank you for pointed it out :-)

>
> other parts looks good to me.
>
>
>

Thanks!

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-09 16:07                       ` Masami Hiramatsu
@ 2009-12-09 16:16                         ` Masami Hiramatsu
  2009-12-09 20:38                           ` [PATCH v4] " Masami Hiramatsu
  0 siblings, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-09 16:16 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Ingo Molnar, Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron

Masami Hiramatsu wrote:
>
>
> KOSAKI Motohiro wrote:
>>> +	TP_fast_assign(
>>> +		__entry->sig	= (int)cprm->signr;
>>> +		__entry->limit	= cprm->limit;
>>> +		__entry->flags	= cprm->mm_flags;
>>> +		__entry->retval	= retval;
>>> +		__assign_str(name,	core_name);
>>> +	),
>>> +
>>> +	TP_printk("sig=%d limit=%lu dumpable=0x%lx dump_filter=0x%lx "
>>> +		  "corename=\"%s\" retval=%d",
>>> +		  __entry->sig, __entry->limit,
>>> +		  __entry->flags&   MMF_DUMPABLE_MASK,
>>> +		  (__entry->flags&   MMF_DUMP_FILTER_MASK)>>
>>> +		  MMF_DUMP_FILTER_SHIFT,
>>> +		  __get_str(name), __entry->retval)
>>> +);
>>>    #endif /* _TRACE_SIGNAL_H */
>>
>> I don't think "limit" is userfriendly name, core_limit or core_size_limit is better?
>> plus, we have core_pipe_limit sysctl too. (it's similar but different concept limit).
>
> Ah, I missed it. OK, so I'll rename core_limit and add core_pipe_limit.

Hmm, perhaps, would we need a pair of core_pipe_limit and dump_count?
because it limits the number of concurrently dump-to-pipe and the number
is stored on dump_count.
Or, maybe it is enough to trace current parameters, because if we hit the
core_pipe_limit, we can see -EFBIG at retval parameter.


> Thank you for pointed it out :-)
>
>>
>> other parts looks good to me.
>>
>>
>>
>
> Thanks!
>

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v4] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-09 16:16                         ` Masami Hiramatsu
@ 2009-12-09 20:38                           ` Masami Hiramatsu
  2009-12-10  0:09                             ` KOSAKI Motohiro
  0 siblings, 1 reply; 28+ messages in thread
From: Masami Hiramatsu @ 2009-12-09 20:38 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, lkml, KOSAKI Motohiro
  Cc: systemtap, DLE, Masami Hiramatsu, Oleg Nesterov, Roland McGrath,
	Jason Baron, Ingo Molnar, Andrew Morton, KOSAKI Motohiro

Add signal coredump tracepoint which shows signal number,
mm->flags, core file size limitation, the result of
coredump, and core file name.

This tracepoint requirement comes mainly from the viewpoint of
administrators. Since now we have introduced many coredump
configurations (e.g. dumpable, coredump_filter, core_pattern,
etc) and some of them can be modified by users, it will be hard
to know what was actually dumped (or not dumped) after some
problem happened on the system. For example, a process didn't
generated core, coredump doesn't have some sections, etc.
In those cases, the coredump tracepoint can help us to know
why the core file is so big or small, or not generated, by
recording all configurations for all processes on the system.
That will reduce system-administration cost.

Changes in v4:
 - Rename limit trace-argument to core_size_limit, because
   of user friendly output.

Changes in v3:
 - Move tracepoint at the end of do_coredump() for tracing
   the result of coredump.
 - Use retval to record error-code at every failure points
   for passing the result of coredump to tracepoint.
 - Trace retval instead of cprm->file for recording the
   result of coredump.

Changes in v2:
 - Fix a bug to clear file local variable when
   call_usermodehelper_pipe() is failed.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

 fs/exec.c                     |   58 ++++++++++++++++++++++++++++++-----------
 include/trace/events/signal.h |   48 ++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 16 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 0a5d944..d67ed5a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <trace/events/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1753,7 +1754,7 @@ static void wait_for_dump_helpers(struct file *file)
 void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
 	struct core_state core_state;
-	char corename[CORENAME_MAX_SIZE + 1];
+	char corename[CORENAME_MAX_SIZE + 1] = "";
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
@@ -1768,6 +1769,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.signr = signr,
+		.file = NULL,
 		.regs = regs,
 		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
 		/*
@@ -1781,8 +1783,10 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	audit_core_dumps(signr);
 
 	binfmt = mm->binfmt;
-	if (!binfmt || !binfmt->core_dump)
+	if (!binfmt || !binfmt->core_dump) {
+		retval = -ENOSYS;
 		goto fail;
+	}
 
 	cred = prepare_creds();
 	if (!cred) {
@@ -1795,6 +1799,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
 	if (mm->core_state || !__get_dumpable(cprm.mm_flags)) {
+		/* This is not an error. retval should be 0 */
 		up_write(&mm->mmap_sem);
 		put_cred(cred);
 		goto fail;
@@ -1833,11 +1838,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump)) {
+		retval = -EFBIG;
 		goto fail_unlock;
+	}
 
  	if (ispipe) {
 		if (cprm.limit == 0) {
+			retval = -EINVAL;
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
@@ -1861,6 +1869,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 		dump_count = atomic_inc_return(&core_dump_count);
 		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+			retval = -EFBIG;
 			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
 			       task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Skipping core dump\n");
@@ -1869,6 +1878,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 		helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
 		if (!helper_argv) {
+			retval = -ENOMEM;
 			printk(KERN_WARNING "%s failed to allocate memory\n",
 			       __func__);
 			goto fail_dropcount;
@@ -1877,8 +1887,9 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
-		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&cprm.file)) {
+		retval = call_usermodehelper_pipe(helper_argv[0], helper_argv,
+						  NULL, &cprm.file);
+		if (retval < 0) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
@@ -1887,31 +1898,43 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(cprm.file))
+	if (IS_ERR(cprm.file)) {
+		retval = (int)PTR_ERR(cprm.file);
 		goto fail_dropcount;
+	}
 	inode = cprm.file->f_path.dentry->d_inode;
-	if (inode->i_nlink > 1)
+	if (inode->i_nlink > 1) {
+		retval = -EMLINK;
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
+	}
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry)) {
+		retval = -EBADF;
 		goto close_fail;
+	}
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
 	   but keep the previous behaviour for now. */
-	if (!ispipe && !S_ISREG(inode->i_mode))
+	if (!ispipe && !S_ISREG(inode->i_mode)) {
+		retval = -EBADF;
 		goto close_fail;
+	}
 	/*
 	 * Dont allow local users get cute and trick others to coredump
 	 * into their pre-created files:
 	 */
-	if (inode->i_uid != current_fsuid())
+	if (inode->i_uid != current_fsuid()) {
+		retval = -EPERM;
 		goto close_fail;
-	if (!cprm.file->f_op)
-		goto close_fail;
-	if (!cprm.file->f_op->write)
-		goto close_fail;
-	if (!ispipe &&
-	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
+	}
+	if (!cprm.file->f_op || !cprm.file->f_op->write) {
+		retval = -EINVAL;
 		goto close_fail;
+	}
+	if (!ispipe) {
+		retval = do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file);
+		if (retval != 0)
+			goto close_fail;
+	}
 
 	retval = binfmt->core_dump(&cprm);
 
@@ -1932,5 +1955,8 @@ fail_unlock:
 	put_cred(cred);
 	coredump_finish(mm);
 fail:
+	/* Trace coredump parameters and return value */
+	trace_signal_coredump(&cprm, corename, retval);
+
 	return;
 }
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a510b75..6dbc856 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -4,8 +4,10 @@
 #if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SIGNAL_H
 
+#include <linux/err.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/binfmts.h>
 #include <linux/tracepoint.h>
 
 #define TP_STORE_SIGINFO(__entry, info)				\
@@ -167,6 +169,52 @@ TRACE_EVENT(signal_lose_info,
 	TP_printk("sig=%d group=%d errno=%d code=%d",
 		  __entry->sig, __entry->group, __entry->errno, __entry->code)
 );
+
+/**
+ * signal_coredump - called when dumping core by signal
+ * @cprm: pointer to struct coredump_params
+ * @core_name: core-name string
+ * @retval: return value of binfmt->coredump or error-code
+ *
+ * Current process dumps core file to 'core_name' file, because 'cprm->signr'
+ * signal is delivered.
+ * 'retval' is an error code or 0/1. retval == 1 means the core file was
+ * dumped successfully and retval == 0 means binfmt->coredump failed to dump.
+ * If retval < 0, this means do_coredump() failed to dump core file before
+ * calling binfmt->coredump.
+ */
+TRACE_EVENT(signal_coredump,
+
+	TP_PROTO(struct coredump_params *cprm, const char *core_name,
+		 int retval),
+
+	TP_ARGS(cprm, core_name, retval),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	unsigned long,	core_size_limit	)
+		__field(	unsigned long,	flags		)
+		__field(	int,		retval		)
+		__string(	name,		core_name	)
+	),
+
+
+	TP_fast_assign(
+		__entry->sig			= (int)cprm->signr;
+		__entry->core_size_limit	= cprm->limit;
+		__entry->flags			= cprm->mm_flags;
+		__entry->retval			= retval;
+		__assign_str(name,		core_name);
+	),
+
+	TP_printk("sig=%d core_size_limit=%lu dumpable=0x%lx dump_filter=0x%lx"
+		  " corename=\"%s\" retval=%d",
+		  __entry->sig, __entry->core_size_limit,
+		  __entry->flags & MMF_DUMPABLE_MASK,
+		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
+		  MMF_DUMP_FILTER_SHIFT,
+		  __get_str(name), __entry->retval)
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */


-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v4] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-09 20:38                           ` [PATCH v4] " Masami Hiramatsu
@ 2009-12-10  0:09                             ` KOSAKI Motohiro
  0 siblings, 0 replies; 28+ messages in thread
From: KOSAKI Motohiro @ 2009-12-10  0:09 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kosaki.motohiro, Ingo Molnar, Andrew Morton, lkml, systemtap,
	DLE, Oleg Nesterov, Roland McGrath, Jason Baron

> Add signal coredump tracepoint which shows signal number,
> mm->flags, core file size limitation, the result of
> coredump, and core file name.
> 
> This tracepoint requirement comes mainly from the viewpoint of
> administrators. Since now we have introduced many coredump
> configurations (e.g. dumpable, coredump_filter, core_pattern,
> etc) and some of them can be modified by users, it will be hard
> to know what was actually dumped (or not dumped) after some
> problem happened on the system. For example, a process didn't
> generated core, coredump doesn't have some sections, etc.
> In those cases, the coredump tracepoint can help us to know
> why the core file is so big or small, or not generated, by
> recording all configurations for all processes on the system.
> That will reduce system-administration cost.
> 
> Changes in v4:
>  - Rename limit trace-argument to core_size_limit, because
>    of user friendly output.

Looks good to me.



^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2009-12-10  0:09 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-20 22:12 + binfmt-introduce-coredump-parameter-structure.patch added to -mm tree akpm
2009-11-26  8:53 ` KOSAKI Motohiro
2009-11-26 15:50   ` Masami Hiramatsu
2009-11-26 16:51     ` Oleg Nesterov
2009-11-29  3:59       ` Masami Hiramatsu
2009-11-29  4:39       ` [PATCH v2] mm: Introduce coredump parameter structure Masami Hiramatsu
2009-11-29  4:41       ` Masami Hiramatsu
2009-11-29 15:10         ` [PATCH][RFC] tracepoint: signal coredump (Re: [PATCH v2] mm: Introduce coredump parameter structure) Masami Hiramatsu
2009-12-02 20:46           ` [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint Masami Hiramatsu
2009-12-03 10:39             ` Ingo Molnar
2009-12-03 11:32               ` Masami Hiramatsu
2009-12-05  7:16                 ` Ingo Molnar
2009-12-07 17:19                   ` Masami Hiramatsu
2009-12-05  7:18             ` KOSAKI Motohiro
2009-12-07 15:25               ` Masami Hiramatsu
2009-12-08  1:51                 ` KOSAKI Motohiro
2009-12-08 20:40                   ` [PATCH v3] " Masami Hiramatsu
2009-12-09  5:34                     ` KOSAKI Motohiro
2009-12-09 16:07                       ` Masami Hiramatsu
2009-12-09 16:16                         ` Masami Hiramatsu
2009-12-09 20:38                           ` [PATCH v4] " Masami Hiramatsu
2009-12-10  0:09                             ` KOSAKI Motohiro
2009-12-02  0:18         ` [PATCH v2] mm: Introduce coredump parameter structure Andrew Morton
2009-12-02  0:27           ` KOSAKI Motohiro
2009-12-02  0:29           ` Masami Hiramatsu
2009-12-02  9:50             ` Ingo Molnar
2009-12-02 18:07               ` Masami Hiramatsu
2009-12-02  0:41           ` [PATCH v2] [RESEND] " Masami Hiramatsu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.