linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
@ 2015-05-08 12:28 Alexey Dobriyan
  2015-05-08 21:32 ` Andrew Morton
  2015-05-26 20:42 ` Jarod Wilson
  0 siblings, 2 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-08 12:28 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, jarod

/proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with

	$ cat /proc/self/cmdline $(seq 1037) 2>/dev/null

However, command line size was never limited to PAGE_SIZE but to 128 KB and
relatively recently limitation was removed altogether.

People noticed and ask questions:
http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit

seq file interface is not OK, because it kmalloc's for whole output and
open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
To not do that, limit must be imposed which is incompatible with
arbitrary sized command lines.

I apologize for hairy code, but this it direct consequence of command line
layout in memory and hacks to support things like "init [3]".

The loops are "unrolled" otherwise it is either macros which hide
control flow or functions with 7-8 arguments with equal line count.

There should be real setproctitle(2) or something.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Tested-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
---

 fs/proc/base.c |  203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 194 insertions(+), 9 deletions(-)

--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -196,18 +196,203 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
 	return result;
 }
 
-static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns,
-			    struct pid *pid, struct task_struct *task)
+static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
+				     size_t count, loff_t *pos)
 {
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	char *page;
+	unsigned long arg_start, arg_end, env_start, env_end;
+	unsigned long len1, len2, len;
+	unsigned long p;
+	char c;
+	ssize_t rv;
+
+	BUG_ON(*pos < 0);
+
+	tsk = get_proc_task(file_inode(file));
+	if (!tsk)
+		return -ESRCH;
+	mm = get_task_mm(tsk);
+	put_task_struct(tsk);
+	if (!mm)
+		return 0;
+	if (!mm->arg_end) {
+		rv = 0;
+		goto out_mmput;
+	}
+
+	page = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!page) {
+		rv = -ENOMEM;
+		goto out_mmput;
+	}
+
+	down_read(&mm->mmap_sem);
+	arg_start = mm->arg_start;
+	arg_end = mm->arg_end;
+	env_start = mm->env_start;
+	env_end = mm->env_end;
+	up_read(&mm->mmap_sem);
+
+	BUG_ON(arg_start > arg_end);
+	BUG_ON(env_start > env_end);
+
+	len1 = arg_end - arg_start;
+	len2 = env_end - env_start;
+
 	/*
-	 * Rely on struct seq_operations::show() being called once
-	 * per internal buffer allocation. See single_open(), traverse().
+	 * Inherently racy -- command line shares address space
+	 * with code and data.
 	 */
-	BUG_ON(m->size < PAGE_SIZE);
-	m->count += get_cmdline(task, m->buf, PAGE_SIZE);
-	return 0;
+	rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+	if (rv <= 0)
+		goto out_free_page;
+
+	rv = 0;
+
+	if (c == '\0') {
+		/* Command line (set of strings) occupies whole ARGV. */
+		if (len1 <= *pos)
+			goto out_free_page;
+
+		p = arg_start + *pos;
+		len = len1 - *pos;
+		while (count > 0 && len > 0) {
+			unsigned int _count;
+			int nr_read;
+
+			_count = min3(count, len, PAGE_SIZE);
+			nr_read = access_remote_vm(mm, p, page, _count, 0);
+			if (nr_read < 0)
+				rv = nr_read;
+			if (nr_read <= 0)
+				goto out_free_page;
+
+			if (copy_to_user(buf, page, nr_read)) {
+				rv = -EFAULT;
+				goto out_free_page;
+			}
+
+			p	+= nr_read;
+			len	-= nr_read;
+			buf	+= nr_read;
+			count	-= nr_read;
+			rv	+= nr_read;
+		}
+	} else {
+		/*
+		 * Command line (1 string) occupies ARGV and maybe
+		 * extends into ENVP.
+		 */
+		if (len1 + len2 <= *pos)
+			goto skip_argv_envp;
+		if (len1 <= *pos)
+			goto skip_argv;
+
+		p = arg_start + *pos;
+		len = len1 - *pos;
+		while (count > 0 && len > 0) {
+			unsigned int _count, l;
+			int nr_read;
+			bool final;
+
+			_count = min3(count, len, PAGE_SIZE);
+			nr_read = access_remote_vm(mm, p, page, _count, 0);
+			if (nr_read < 0)
+				rv = nr_read;
+			if (nr_read <= 0)
+				goto out_free_page;
+
+			/*
+			 * Command line can be shorter than whole ARGV
+			 * even if last "marker" byte says it is not.
+			 */
+			final = false;
+			l = strnlen(page, nr_read);
+			if (l < nr_read) {
+				nr_read = l;
+				final = true;
+			}
+
+			if (copy_to_user(buf, page, nr_read)) {
+				rv = -EFAULT;
+				goto out_free_page;
+			}
+
+			p	+= nr_read;
+			len	-= nr_read;
+			buf	+= nr_read;
+			count	-= nr_read;
+			rv	+= nr_read;
+
+			if (final)
+				goto out_free_page;
+		}
+skip_argv:
+		/*
+		 * Command line (1 string) occupies ARGV and
+		 * extends into ENVP.
+		 */
+		if (len1 <= *pos) {
+			p = env_start + *pos - len1;
+			len = len1 + len2 - *pos;
+		} else {
+			p = env_start;
+			len = len2;
+		}
+		while (count > 0 && len > 0) {
+			unsigned int _count, l;
+			int nr_read;
+			bool final;
+
+			_count = min3(count, len, PAGE_SIZE);
+			nr_read = access_remote_vm(mm, p, page, _count, 0);
+			if (nr_read < 0)
+				rv = nr_read;
+			if (nr_read <= 0)
+				goto out_free_page;
+
+			/* Find EOS. */
+			final = false;
+			l = strnlen(page, nr_read);
+			if (l < nr_read) {
+				nr_read = l;
+				final = true;
+			}
+
+			if (copy_to_user(buf, page, nr_read)) {
+				rv = -EFAULT;
+				goto out_free_page;
+			}
+
+			p	+= nr_read;
+			len	-= nr_read;
+			buf	+= nr_read;
+			count	-= nr_read;
+			rv	+= nr_read;
+
+			if (final)
+				goto out_free_page;
+		}
+skip_argv_envp:
+		;
+	}
+
+out_free_page:
+	free_page((unsigned long)page);
+out_mmput:
+	mmput(mm);
+	if (rv > 0)
+		*pos += rv;
+	return rv;
 }
 
+static const struct file_operations proc_pid_cmdline_ops = {
+	.read	= proc_pid_cmdline_read,
+	.llseek	= generic_file_llseek,
+};
+
 static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
 			 struct pid *pid, struct task_struct *task)
 {
@@ -2560,7 +2745,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 	ONE("syscall",    S_IRUSR, proc_pid_syscall),
 #endif
-	ONE("cmdline",    S_IRUGO, proc_pid_cmdline),
+	REG("cmdline",    S_IRUGO, proc_pid_cmdline_ops),
 	ONE("stat",       S_IRUGO, proc_tgid_stat),
 	ONE("statm",      S_IRUGO, proc_pid_statm),
 	REG("maps",       S_IRUGO, proc_pid_maps_operations),
@@ -2906,7 +3091,7 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 	ONE("syscall",   S_IRUSR, proc_pid_syscall),
 #endif
-	ONE("cmdline",   S_IRUGO, proc_pid_cmdline),
+	REG("cmdline",   S_IRUGO, proc_pid_cmdline_ops),
 	ONE("stat",      S_IRUGO, proc_tid_stat),
 	ONE("statm",     S_IRUGO, proc_pid_statm),
 	REG("maps",      S_IRUGO, proc_tid_maps_operations),
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-08 12:28 [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline Alexey Dobriyan
@ 2015-05-08 21:32 ` Andrew Morton
  2015-05-10 13:36   ` Alexey Dobriyan
  2015-05-26 20:42 ` Jarod Wilson
  1 sibling, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2015-05-08 21:32 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: linux-kernel, jarod

On Fri, 8 May 2015 15:28:05 +0300 Alexey Dobriyan <adobriyan@gmail.com> wrote:

> /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
> 
> 	$ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
> 
> However, command line size was never limited to PAGE_SIZE but to 128 KB and
> relatively recently limitation was removed altogether.
> 
> People noticed and ask questions:
> http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
> 
> seq file interface is not OK, because it kmalloc's for whole output and
> open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
> To not do that, limit must be imposed which is incompatible with
> arbitrary sized command lines.
> 
> I apologize for hairy code, but this it direct consequence of command line
> layout in memory and hacks to support things like "init [3]".
> 
> The loops are "unrolled" otherwise it is either macros which hide
> control flow or functions with 7-8 arguments with equal line count.
> 
> There should be real setproctitle(2) or something.
> 
> ...
>
>  fs/proc/base.c |  203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 194 insertions(+), 9 deletions(-)

I still hate your patch!


Also, dude.  i386:

In file included from include/asm-generic/bug.h:13:0,
                 from ./arch/x86/include/asm/bug.h:35,
                 from include/linux/bug.h:4,
                 from include/linux/thread_info.h:11,
                 from ./arch/x86/include/asm/uaccess.h:8,
                 from fs/proc/base.c:50:
fs/proc/base.c: In function 'proc_pid_cmdline_read':
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:600:9: note: in definition of macro 'min'
  typeof(x) _min1 = (x);   \
         ^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                                      ^
fs/proc/base.c:265:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:600:21: note: in definition of macro 'min'
  typeof(x) _min1 = (x);   \
                     ^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                                      ^
fs/proc/base.c:265:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:611:23: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                       ^
fs/proc/base.c:265:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:600:9: note: in definition of macro 'min'
  typeof(x) _min1 = (x);   \
         ^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                                      ^
fs/proc/base.c:300:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:600:21: note: in definition of macro 'min'
  typeof(x) _min1 = (x);   \
                     ^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                                      ^
fs/proc/base.c:300:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:611:23: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                       ^
fs/proc/base.c:300:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:600:9: note: in definition of macro 'min'
  typeof(x) _min1 = (x);   \
         ^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                                      ^
fs/proc/base.c:349:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:600:21: note: in definition of macro 'min'
  typeof(x) _min1 = (x);   \
                     ^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                                      ^
fs/proc/base.c:349:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
  (void) (&_min1 == &_min2);  \
                 ^
include/linux/kernel.h:611:23: note: in expansion of macro 'min'
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
                       ^
fs/proc/base.c:349:13: note: in expansion of macro 'min3'
    _count = min3(count, len, PAGE_SIZE);
             ^

--- a/fs/proc/base.c~proc-fix-page_size-limit-of-proc-pid-cmdline-fix
+++ a/fs/proc/base.c
@@ -197,11 +197,12 @@ static int proc_root_link(struct dentry
 }
 
 static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
-				     size_t count, loff_t *pos)
+				     size_t _count, loff_t *pos)
 {
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	char *page;
+	unsigned long count = _count;
 	unsigned long arg_start, arg_end, env_start, env_end;
 	unsigned long len1, len2, len;
 	unsigned long p;
_


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-08 21:32 ` Andrew Morton
@ 2015-05-10 13:36   ` Alexey Dobriyan
  0 siblings, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-10 13:36 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, jarod

On Fri, May 08, 2015 at 02:32:38PM -0700, Andrew Morton wrote:
> On Fri, 8 May 2015 15:28:05 +0300 Alexey Dobriyan <adobriyan@gmail.com> wrote:
> 
> > /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
> 
> I still hate your patch!
> 
> 
> Also, dude.  i386:

Someone sent i386 removal patch a month ago, why it wasn't applied???

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-08 12:28 [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline Alexey Dobriyan
  2015-05-08 21:32 ` Andrew Morton
@ 2015-05-26 20:42 ` Jarod Wilson
  2015-05-26 21:24   ` Alexey Dobriyan
  1 sibling, 1 reply; 10+ messages in thread
From: Jarod Wilson @ 2015-05-26 20:42 UTC (permalink / raw)
  To: Alexey Dobriyan, akpm; +Cc: linux-kernel

On 5/8/2015 8:28 AM, Alexey Dobriyan wrote:
> /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
>
> 	$ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
>
> However, command line size was never limited to PAGE_SIZE but to 128 KB and
> relatively recently limitation was removed altogether.
>
> People noticed and ask questions:
> http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
>
> seq file interface is not OK, because it kmalloc's for whole output and
> open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
> To not do that, limit must be imposed which is incompatible with
> arbitrary sized command lines.
>
> I apologize for hairy code, but this it direct consequence of command line
> layout in memory and hacks to support things like "init [3]".
>
> The loops are "unrolled" otherwise it is either macros which hide
> control flow or functions with 7-8 arguments with equal line count.
>
> There should be real setproctitle(2) or something.
>
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> Tested-by: Jarod Wilson <jarod@redhat.com>
> Acked-by: Jarod Wilson <jarod@redhat.com>

Should have tested on more than just x86, it appears. We've started 
hammering on this internally across all arches, and its exploded 
multiple times on ppc64 now:

[ 2717.074699] ------------[ cut here ]------------
[ 2717.074787] kernel BUG at fs/proc/base.c:244!
[ 2717.074822] Oops: Exception in kernel mode, sig: 5 [#1]
[ 2717.074854] SMP NR_CPUS=2048 NUMA pSeries
[ 2717.074891] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 
nfsv3 nfs_acl nfs lockd sunrpc grace fscache nfnetlink_queue 
nfnetlink_log nfnetlink bluetooth rfkill arc4 md4 nls_utf8 cifs 
dns_resolver ib_isert iscsi_target_mod ib_iser libiscsi 
scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp 
scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm iw_cm ib_cm 
ib_sa ib_mad ib_core ib_addr nls_koi8_u nls_cp932 ts_kmp 
nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack crct10dif_generic 
crct10dif_common pseries_rng virtio_console virtio_balloon xfs libcrc32c 
virtio_blk virtio_net virtio_pci virtio_ring virtio dm_mirror 
dm_region_hash dm_log dm_mod [last unloaded: 
stap_2c7aea09c8404123be8fa7c7e18bc50_31774]
[ 2717.075687] CPU: 4 PID: 21943 Comm: ps Tainted: G 
OE--------------   3.10.0-255.el7.ppc64.debug #1
[ 2717.075750] task: c000000229c08f10 ti: c000000224af0000 task.ti: 
c000000224af0000
[ 2717.075798] NIP: c0000000003f2e34 LR: c0000000003f2e14 CTR: 
c0000000003f2cb0
[ 2717.075849] REGS: c000000224af3a00 TRAP: 0700   Tainted: G 
OE--------------    (3.10.0-255.el7.ppc64.debug)
[ 2717.075911] MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI>  CR: 22004224 
XER: 00000000
[ 2717.076016] CFAR: c00000000011d758 SOFTE: 1
GPR00: c0000000003f2e14 c000000224af3c80 c0000000019b6bc0 0000000000000001
GPR04: 0000000000000001 c0000000003f2e14 0d40600000000000 00000003ffc0ac02
GPR08: 00000000036b80f2 0000000000000001 0000000000000000 ef7bdef7bdef7bdf
GPR12: 0000000022004224 c000000007b82400 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 00003fffdc61f61a 0000000000000000
GPR24: 00003fffdc61f60e c0000001b75f0000 00000100315aa850 00003fffdc61f61a
GPR28: c000000224af3df0 0000000000020000 c0000000183241c0 c000000018324100
[ 2717.076615] NIP [c0000000003f2e34] .proc_pid_cmdline_read+0x184/0x5a0
[ 2717.076656] LR [c0000000003f2e14] .proc_pid_cmdline_read+0x164/0x5a0
[ 2717.076697] Call Trace:
[ 2717.076714] [c000000224af3c80] [c0000000003f2e14] 
.proc_pid_cmdline_read+0x164/0x5a0 (unreliable)
[ 2717.076781] [c000000224af3d80] [c00000000033ba2c] .SyS_read+0x12c/0x320
[ 2717.076831] [c000000224af3e30] [c00000000000a188] syscall_exit+0x0/0x7c
[ 2717.076877] Instruction dump:
[ 2717.076901] 7fc3f378 eadf01f0 eabf01f8 4bd2a8f1 60000000 7d38d810 
7d294910 7d2900d0
[ 2717.076982] 0b090000 7d36a810 7d294910 7d2900d0 <0b090000> 38c00001 
7fe3fb78 389bffff
[ 2717.077075] ---[ end trace 65ad2b0a70ae8547 ]---

[ 2717.077774] BUG: sleeping function called from invalid context at 
kernel/rwsem.c:20
[ 2717.077819] in_atomic(): 0, irqs_disabled(): 1, pid: 21943, name: ps
[ 2717.077856] INFO: lockdep is turned off.
[ 2717.077884] irq event stamp: 25482
[ 2717.077911] hardirqs last  enabled at (25481): [<c00000000027d664>] 
.get_page_from_freelist+0x984/0x2790
[ 2717.077974] hardirqs last disabled at (25482): [<c000000000006310>] 
program_check_common+0x110/0x180
[ 2717.078034] softirqs last  enabled at (24584): [<c0000000000d7370>] 
.__do_softirq+0x220/0x5b0
[ 2717.078092] softirqs last disabled at (24567): [<c00000000002695c>] 
.call_do_softirq+0x14/0x24
[ 2717.078164] CPU: 4 PID: 21943 Comm: ps Tainted: G      D 
OE--------------   3.10.0-255.el7.ppc64.debug #1
[ 2717.078225] Call Trace:
[ 2717.078242] [c000000224af32b0] [c000000000019510] 
.show_stack+0x80/0x380 (unreliable)
[ 2717.078301] [c000000224af3380] [c000000000a05ec4] .dump_stack+0x28/0x3c
[ 2717.078350] [c000000224af33f0] [c00000000012ab54] 
.__might_sleep+0x1b4/0x2c0
[ 2717.078396] [c000000224af3480] [c0000000009e5158] .down_read+0x38/0x110
[ 2717.078446] [c000000224af3510] [c0000000000f1e14] 
.exit_signals+0x24/0x160
[ 2717.078496] [c000000224af35a0] [c0000000000d2c30] .do_exit+0xe0/0xe40
[ 2717.078543] [c000000224af36b0] [c000000000023090] .die+0x300/0x450
[ 2717.078593] [c000000224af3760] [c000000000023404] ._exception+0x1b4/0x1e0
[ 2717.078640] [c000000224af3900] [c0000000009eb6b8] 
.program_check_exception+0x288/0x3e0
[ 2717.078693] [c000000224af3990] [c000000000006318] 
program_check_common+0x118/0x180
[ 2717.078750] --- Exception: 700 at .proc_pid_cmdline_read+0x184/0x5a0
     LR = .proc_pid_cmdline_read+0x164/0x5a0
[ 2717.078819] [c000000224af3d80] [c00000000033ba2c] .SyS_read+0x12c/0x320
[ 2717.078864] [c000000224af3e30] [c00000000000a188] syscall_exit+0x0/0x7c

-- 
Jarod Wilson
jarod@redhat.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-26 20:42 ` Jarod Wilson
@ 2015-05-26 21:24   ` Alexey Dobriyan
  2015-05-27  5:27     ` Jarod Wilson
  0 siblings, 1 reply; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-26 21:24 UTC (permalink / raw)
  To: Jarod Wilson; +Cc: akpm, linux-kernel

On Tue, May 26, 2015 at 04:42:36PM -0400, Jarod Wilson wrote:
> On 5/8/2015 8:28 AM, Alexey Dobriyan wrote:
> > /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
> >
> > 	$ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
> >
> > However, command line size was never limited to PAGE_SIZE but to 128 KB and
> > relatively recently limitation was removed altogether.
> >
> > People noticed and ask questions:
> > http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
> >
> > seq file interface is not OK, because it kmalloc's for whole output and
> > open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
> > To not do that, limit must be imposed which is incompatible with
> > arbitrary sized command lines.
> >
> > I apologize for hairy code, but this it direct consequence of command line
> > layout in memory and hacks to support things like "init [3]".
> >
> > The loops are "unrolled" otherwise it is either macros which hide
> > control flow or functions with 7-8 arguments with equal line count.
> >
> > There should be real setproctitle(2) or something.
> >
> > Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> > Tested-by: Jarod Wilson <jarod@redhat.com>
> > Acked-by: Jarod Wilson <jarod@redhat.com>
> 
> Should have tested on more than just x86, it appears. We've started 
> hammering on this internally across all arches, and its exploded 
> multiple times on ppc64 now:
> 
> [ 2717.074699] ------------[ cut here ]------------
> [ 2717.074787] kernel BUG at fs/proc/base.c:244!

> OE--------------   3.10.0-255.el7.ppc64.debug #1

Which BUG_ON is this?

	BUG_ON(*pos < 0);
	BUG_ON(arg_start > arg_end);
	BUG_ON(env_start > env_end);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-26 21:24   ` Alexey Dobriyan
@ 2015-05-27  5:27     ` Jarod Wilson
  2015-05-27 10:17       ` Alexey Dobriyan
  2015-05-27 10:56       ` Jan Stancek
  0 siblings, 2 replies; 10+ messages in thread
From: Jarod Wilson @ 2015-05-27  5:27 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: Jarod Wilson, akpm, linux-kernel

On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
> 
>> On Tue, May 26, 2015 at 04:42:36PM -0400, Jarod Wilson wrote:
>>> On 5/8/2015 8:28 AM, Alexey Dobriyan wrote:
>>> /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
>>> 
>>>    $ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
>>> 
>>> However, command line size was never limited to PAGE_SIZE but to 128 KB and
>>> relatively recently limitation was removed altogether.
>>> 
>>> People noticed and ask questions:
>>> http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
>>> 
>>> seq file interface is not OK, because it kmalloc's for whole output and
>>> open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
>>> To not do that, limit must be imposed which is incompatible with
>>> arbitrary sized command lines.
>>> 
>>> I apologize for hairy code, but this it direct consequence of command line
>>> layout in memory and hacks to support things like "init [3]".
>>> 
>>> The loops are "unrolled" otherwise it is either macros which hide
>>> control flow or functions with 7-8 arguments with equal line count.
>>> 
>>> There should be real setproctitle(2) or something.
>>> 
>>> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
>>> Tested-by: Jarod Wilson <jarod@redhat.com>
>>> Acked-by: Jarod Wilson <jarod@redhat.com>
>> 
>> Should have tested on more than just x86, it appears. We've started 
>> hammering on this internally across all arches, and its exploded 
>> multiple times on ppc64 now:
>> 
>> [ 2717.074699] ------------[ cut here ]------------
>> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
> 
>> OE--------------   3.10.0-255.el7.ppc64.debug #1
> 
> Which BUG_ON is this?
> 
>    BUG_ON(*pos < 0);
>    BUG_ON(arg_start > arg_end);
>    BUG_ON(env_start > env_end);

Ah, sorry, right, might not be exactly the same with the back-up ported version... It was the env_start > env_end one.

-- 
Jarod Wilson

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-27  5:27     ` Jarod Wilson
@ 2015-05-27 10:17       ` Alexey Dobriyan
  2015-05-27 10:56       ` Jan Stancek
  1 sibling, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-27 10:17 UTC (permalink / raw)
  To: Jarod Wilson; +Cc: Jarod Wilson, akpm, linux-kernel

On Wed, May 27, 2015 at 8:27 AM, Jarod Wilson <jwilson@redhat.com> wrote:
> On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
>>
>>> On Tue, May 26, 2015 at 04:42:36PM -0400, Jarod Wilson wrote:
>>>> On 5/8/2015 8:28 AM, Alexey Dobriyan wrote:
>>>> /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
>>>>
>>>>    $ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
>>>>
>>>> However, command line size was never limited to PAGE_SIZE but to 128 KB and
>>>> relatively recently limitation was removed altogether.
>>>>
>>>> People noticed and ask questions:
>>>> http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
>>>>
>>>> seq file interface is not OK, because it kmalloc's for whole output and
>>>> open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
>>>> To not do that, limit must be imposed which is incompatible with
>>>> arbitrary sized command lines.
>>>>
>>>> I apologize for hairy code, but this it direct consequence of command line
>>>> layout in memory and hacks to support things like "init [3]".
>>>>
>>>> The loops are "unrolled" otherwise it is either macros which hide
>>>> control flow or functions with 7-8 arguments with equal line count.
>>>>
>>>> There should be real setproctitle(2) or something.
>>>>
>>>> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
>>>> Tested-by: Jarod Wilson <jarod@redhat.com>
>>>> Acked-by: Jarod Wilson <jarod@redhat.com>
>>>
>>> Should have tested on more than just x86, it appears. We've started
>>> hammering on this internally across all arches, and its exploded
>>> multiple times on ppc64 now:
>>>
>>> [ 2717.074699] ------------[ cut here ]------------
>>> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
>>
>>> OE--------------   3.10.0-255.el7.ppc64.debug #1
>>
>> Which BUG_ON is this?
>>
>>    BUG_ON(*pos < 0);
>>    BUG_ON(arg_start > arg_end);
>>    BUG_ON(env_start > env_end);
>
> Ah, sorry, right, might not be exactly the same with the back-up ported version... It was the env_start > env_end one.

Patch is OK. Checkpoint/restart people aren't checking syscall input properly:

        case PR_SET_MM_ENV_END:
                if (!vma) {
                        error = -EFAULT;
                        goto out;
                }
                if (opt == PR_SET_MM_START_STACK)
                        mm->start_stack = addr;
                else if (opt == PR_SET_MM_ARG_START)
                        mm->arg_start = addr;
                else if (opt == PR_SET_MM_ARG_END)
                        mm->arg_end = addr;
                else if (opt == PR_SET_MM_ENV_START)
                        mm->env_start = addr;
                else if (opt == PR_SET_MM_ENV_END)
                        mm->env_end = addr;
                break;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-27  5:27     ` Jarod Wilson
  2015-05-27 10:17       ` Alexey Dobriyan
@ 2015-05-27 10:56       ` Jan Stancek
  2015-05-27 11:08         ` Alexey Dobriyan
  2015-05-27 13:46         ` Alexey Dobriyan
  1 sibling, 2 replies; 10+ messages in thread
From: Jan Stancek @ 2015-05-27 10:56 UTC (permalink / raw)
  To: Jarod Wilson; +Cc: Alexey Dobriyan, Jarod Wilson, akpm, linux-kernel

On Wed, May 27, 2015 at 01:27:13AM -0400, Jarod Wilson wrote:
> On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
> >> Should have tested on more than just x86, it appears. We've started 
> >> hammering on this internally across all arches, and its exploded 
> >> multiple times on ppc64 now:
> >> 
> >> [ 2717.074699] ------------[ cut here ]------------
> >> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
> > 
> >> OE--------------   3.10.0-255.el7.ppc64.debug #1
> > 
> > Which BUG_ON is this?
> > 
> >    BUG_ON(*pos < 0);
> >    BUG_ON(arg_start > arg_end);
> >    BUG_ON(env_start > env_end);
> 

Is create_elf_tables() taking mm->mmap_sem when it's initialising env_start/end?

I could reproduce it on x86_64 as well, just by doing exec in loop
and "ps afx" in other window:

------------------------------------------
# cat a.sh b.sh go.sh 
#!/bin/sh
exec ./b.sh skajsdlakj leakj slekj alkse j

#!/bin/sh
exec ./a.sh ere rwer 23 r23 23r2 rwrd 23r 

#!/bin/sh
for i in `seq 1 50`; do
        sh ./a.sh &
done
------------------------------------------
In window 1: ./go.sh
In window 2: while [ True ]; do ps afx > /dev/null; done


>From vmcore:

struct mm_struct {
...
  [0x1e0] unsigned long arg_start;
  [0x1e8] unsigned long arg_end;
  [0x1f0] unsigned long env_start;
  [0x1f8] unsigned long env_end;


    RIP: ffffffff812996b1  RSP: ffff8800b362fe78  RFLAGS: 00010206
    RAX: ffff8801b0461040  RBX: ffff8801b0460f80  RCX: 0000000000003040
    RDX: 0000000000000001  RSI: ffff8801b6c00000  RDI: 0000000000000246
    RBP: ffff8800b362ff00   R8: 0000000000000000   R9: 0000000000000001
    R10: 0000000000000000  R11: 0000000000000000  R12: 00007ffd3fb0d396
    R13: 00007ffd3fb0d396  R14: 0000000000020000  R15: ffff8800b362ff48
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018

crash> disassemble proc_pid_cmdline_read
Dump of assembler code for function proc_pid_cmdline_read:
...
   0xffffffff812991f2 <+178>:   callq  0xffffffff816e4870 <down_read>
   0xffffffff812991f7 <+183>:   mov    0x1e0(%rbx),%rax  # arg_start
   0xffffffff812991fe <+190>:   mov    0x1f8(%rbx),%rdx  # env_end
   0xffffffff81299205 <+197>:   mov    0x1e8(%rbx),%r12  # arg_end
   0xffffffff8129920c <+204>:   mov    0x1f0(%rbx),%r13  # env_start
   0xffffffff81299213 <+211>:   mov    %rax,-0x50(%rbp)
   0xffffffff81299217 <+215>:   mov    -0x60(%rbp),%rax
   0xffffffff8129921b <+219>:   mov    %rdx,-0x58(%rbp)
   0xffffffff8129921f <+223>:   mov    %rax,%rdi
   0xffffffff81299222 <+226>:   callq  0xffffffff810bbb50 <up_read>
   0xffffffff81299227 <+231>:   cmp    %r12,-0x50(%rbp)
   0xffffffff8129922b <+235>:   ja     0xffffffff812996b3 <proc_pid_cmdline_read+1395>
   0xffffffff81299231 <+241>:   cmp    -0x58(%rbp),%r13
   0xffffffff81299235 <+245>:   ja     0xffffffff812996b1 <proc_pid_cmdline_read+1393>

crash> p/x ((struct mm_struct *)0xffff8801b0460f80)->arg_start
$5 = 0x7ffd3fb0d365
crash> p/x ((struct mm_struct *)0xffff8801b0460f80)->arg_end
$6 = 0x7ffd3fb0d396
crash> p/x ((struct mm_struct *)0xffff8801b0460f80)->env_start
$7 = 0x7ffd3fb0d396
crash> p/x ((struct mm_struct *)0xffff8801b0460f80)->env_end
$8 = 0x7ffd3fb0dfed

crash> x/1g (0xffff8800b362ff00-0x50)
0xffff8800b362feb0:     0x00007ffd3fb0d365

crash> x/1g (0xffff8800b362ff00-0x58)
0xffff8800b362fea8:     0x0000000000000000

Regards,
Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-27 10:56       ` Jan Stancek
@ 2015-05-27 11:08         ` Alexey Dobriyan
  2015-05-27 13:46         ` Alexey Dobriyan
  1 sibling, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-27 11:08 UTC (permalink / raw)
  To: Jan Stancek; +Cc: Jarod Wilson, Jarod Wilson, akpm, linux-kernel

On Wed, May 27, 2015 at 1:56 PM, Jan Stancek <jstancek@redhat.com> wrote:
> On Wed, May 27, 2015 at 01:27:13AM -0400, Jarod Wilson wrote:
>> On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
>> >> Should have tested on more than just x86, it appears. We've started
>> >> hammering on this internally across all arches, and its exploded
>> >> multiple times on ppc64 now:
>> >>
>> >> [ 2717.074699] ------------[ cut here ]------------
>> >> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
>> >
>> >> OE--------------   3.10.0-255.el7.ppc64.debug #1
>> >
>> > Which BUG_ON is this?
>> >
>> >    BUG_ON(*pos < 0);
>> >    BUG_ON(arg_start > arg_end);
>> >    BUG_ON(env_start > env_end);
>>
>
> Is create_elf_tables() taking mm->mmap_sem when it's initialising env_start/end?
>
> I could reproduce it on x86_64 as well, just by doing exec in loop
> and "ps afx" in other window:

OK.

I've reproduced BUG_ON by running this program which sets arg_start>arg_end:

#include <sys/mman.h>
#include <sys/prctl.h>
#include <unistd.h>

enum {PAGE_SIZE=4096};

int main(void)
{
        unsigned long env_start, env_end;
        void *p;

        p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);

#define PR_SET_MM               35
#define PR_SET_MM_ARG_START     8
#define PR_SET_MM_ARG_END       9
#define PR_SET_MM_ENV_START     10
#define PR_SET_MM_ENV_END       11
        prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long)p +
PAGE_SIZE - 1, 0, 0);
        prctl(PR_SET_MM, PR_SET_MM_ARG_END,   (unsigned long)p, 0, 0);

        pause();
        return 0;
}

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
  2015-05-27 10:56       ` Jan Stancek
  2015-05-27 11:08         ` Alexey Dobriyan
@ 2015-05-27 13:46         ` Alexey Dobriyan
  1 sibling, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-27 13:46 UTC (permalink / raw)
  To: Jan Stancek; +Cc: Jarod Wilson, Jarod Wilson, akpm, linux-kernel

On Wed, May 27, 2015 at 1:56 PM, Jan Stancek <jstancek@redhat.com> wrote:
> On Wed, May 27, 2015 at 01:27:13AM -0400, Jarod Wilson wrote:
>> On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
>> >> Should have tested on more than just x86, it appears. We've started
>> >> hammering on this internally across all arches, and its exploded
>> >> multiple times on ppc64 now:
>> >>
>> >> [ 2717.074699] ------------[ cut here ]------------
>> >> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
>> >
>> >> OE--------------   3.10.0-255.el7.ppc64.debug #1
>> >
>> > Which BUG_ON is this?
>> >
>> >    BUG_ON(*pos < 0);
>> >    BUG_ON(arg_start > arg_end);
>> >    BUG_ON(env_start > env_end);
>>
>
> Is create_elf_tables() taking mm->mmap_sem when it's initialising env_start/end?

No, and it wasn't needed before checkpoint/restart (prctl(PR_SET_MM ...)).
Fields were write-once, now they aren't. C/R is doing down_read() in
prctl_set_mm()
which is an obvious bug.

Check "if (!mm->arg_end)" is safeguard exactly against new process not having
ARGV/ENVP fields completely initialized. I didn't understand what it
does exactly,
now it's clear.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-05-27 13:46 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-08 12:28 [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline Alexey Dobriyan
2015-05-08 21:32 ` Andrew Morton
2015-05-10 13:36   ` Alexey Dobriyan
2015-05-26 20:42 ` Jarod Wilson
2015-05-26 21:24   ` Alexey Dobriyan
2015-05-27  5:27     ` Jarod Wilson
2015-05-27 10:17       ` Alexey Dobriyan
2015-05-27 10:56       ` Jan Stancek
2015-05-27 11:08         ` Alexey Dobriyan
2015-05-27 13:46         ` Alexey Dobriyan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).