* [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
@ 2015-05-08 12:28 Alexey Dobriyan
2015-05-08 21:32 ` Andrew Morton
2015-05-26 20:42 ` Jarod Wilson
0 siblings, 2 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-08 12:28 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel, jarod
/proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
$ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
However, command line size was never limited to PAGE_SIZE but to 128 KB and
relatively recently limitation was removed altogether.
People noticed and ask questions:
http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
seq file interface is not OK, because it kmalloc's for whole output and
open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
To not do that, limit must be imposed which is incompatible with
arbitrary sized command lines.
I apologize for hairy code, but this it direct consequence of command line
layout in memory and hacks to support things like "init [3]".
The loops are "unrolled" otherwise it is either macros which hide
control flow or functions with 7-8 arguments with equal line count.
There should be real setproctitle(2) or something.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Tested-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
---
fs/proc/base.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 194 insertions(+), 9 deletions(-)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -196,18 +196,203 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
-static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *task)
+static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
{
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ char *page;
+ unsigned long arg_start, arg_end, env_start, env_end;
+ unsigned long len1, len2, len;
+ unsigned long p;
+ char c;
+ ssize_t rv;
+
+ BUG_ON(*pos < 0);
+
+ tsk = get_proc_task(file_inode(file));
+ if (!tsk)
+ return -ESRCH;
+ mm = get_task_mm(tsk);
+ put_task_struct(tsk);
+ if (!mm)
+ return 0;
+ if (!mm->arg_end) {
+ rv = 0;
+ goto out_mmput;
+ }
+
+ page = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!page) {
+ rv = -ENOMEM;
+ goto out_mmput;
+ }
+
+ down_read(&mm->mmap_sem);
+ arg_start = mm->arg_start;
+ arg_end = mm->arg_end;
+ env_start = mm->env_start;
+ env_end = mm->env_end;
+ up_read(&mm->mmap_sem);
+
+ BUG_ON(arg_start > arg_end);
+ BUG_ON(env_start > env_end);
+
+ len1 = arg_end - arg_start;
+ len2 = env_end - env_start;
+
/*
- * Rely on struct seq_operations::show() being called once
- * per internal buffer allocation. See single_open(), traverse().
+ * Inherently racy -- command line shares address space
+ * with code and data.
*/
- BUG_ON(m->size < PAGE_SIZE);
- m->count += get_cmdline(task, m->buf, PAGE_SIZE);
- return 0;
+ rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0);
+ if (rv <= 0)
+ goto out_free_page;
+
+ rv = 0;
+
+ if (c == '\0') {
+ /* Command line (set of strings) occupies whole ARGV. */
+ if (len1 <= *pos)
+ goto out_free_page;
+
+ p = arg_start + *pos;
+ len = len1 - *pos;
+ while (count > 0 && len > 0) {
+ unsigned int _count;
+ int nr_read;
+
+ _count = min3(count, len, PAGE_SIZE);
+ nr_read = access_remote_vm(mm, p, page, _count, 0);
+ if (nr_read < 0)
+ rv = nr_read;
+ if (nr_read <= 0)
+ goto out_free_page;
+
+ if (copy_to_user(buf, page, nr_read)) {
+ rv = -EFAULT;
+ goto out_free_page;
+ }
+
+ p += nr_read;
+ len -= nr_read;
+ buf += nr_read;
+ count -= nr_read;
+ rv += nr_read;
+ }
+ } else {
+ /*
+ * Command line (1 string) occupies ARGV and maybe
+ * extends into ENVP.
+ */
+ if (len1 + len2 <= *pos)
+ goto skip_argv_envp;
+ if (len1 <= *pos)
+ goto skip_argv;
+
+ p = arg_start + *pos;
+ len = len1 - *pos;
+ while (count > 0 && len > 0) {
+ unsigned int _count, l;
+ int nr_read;
+ bool final;
+
+ _count = min3(count, len, PAGE_SIZE);
+ nr_read = access_remote_vm(mm, p, page, _count, 0);
+ if (nr_read < 0)
+ rv = nr_read;
+ if (nr_read <= 0)
+ goto out_free_page;
+
+ /*
+ * Command line can be shorter than whole ARGV
+ * even if last "marker" byte says it is not.
+ */
+ final = false;
+ l = strnlen(page, nr_read);
+ if (l < nr_read) {
+ nr_read = l;
+ final = true;
+ }
+
+ if (copy_to_user(buf, page, nr_read)) {
+ rv = -EFAULT;
+ goto out_free_page;
+ }
+
+ p += nr_read;
+ len -= nr_read;
+ buf += nr_read;
+ count -= nr_read;
+ rv += nr_read;
+
+ if (final)
+ goto out_free_page;
+ }
+skip_argv:
+ /*
+ * Command line (1 string) occupies ARGV and
+ * extends into ENVP.
+ */
+ if (len1 <= *pos) {
+ p = env_start + *pos - len1;
+ len = len1 + len2 - *pos;
+ } else {
+ p = env_start;
+ len = len2;
+ }
+ while (count > 0 && len > 0) {
+ unsigned int _count, l;
+ int nr_read;
+ bool final;
+
+ _count = min3(count, len, PAGE_SIZE);
+ nr_read = access_remote_vm(mm, p, page, _count, 0);
+ if (nr_read < 0)
+ rv = nr_read;
+ if (nr_read <= 0)
+ goto out_free_page;
+
+ /* Find EOS. */
+ final = false;
+ l = strnlen(page, nr_read);
+ if (l < nr_read) {
+ nr_read = l;
+ final = true;
+ }
+
+ if (copy_to_user(buf, page, nr_read)) {
+ rv = -EFAULT;
+ goto out_free_page;
+ }
+
+ p += nr_read;
+ len -= nr_read;
+ buf += nr_read;
+ count -= nr_read;
+ rv += nr_read;
+
+ if (final)
+ goto out_free_page;
+ }
+skip_argv_envp:
+ ;
+ }
+
+out_free_page:
+ free_page((unsigned long)page);
+out_mmput:
+ mmput(mm);
+ if (rv > 0)
+ *pos += rv;
+ return rv;
}
+static const struct file_operations proc_pid_cmdline_ops = {
+ .read = proc_pid_cmdline_read,
+ .llseek = generic_file_llseek,
+};
+
static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -2560,7 +2745,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
- ONE("cmdline", S_IRUGO, proc_pid_cmdline),
+ REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
ONE("stat", S_IRUGO, proc_tgid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_pid_maps_operations),
@@ -2906,7 +3091,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
- ONE("cmdline", S_IRUGO, proc_pid_cmdline),
+ REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_tid_maps_operations),
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-08 12:28 [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline Alexey Dobriyan
@ 2015-05-08 21:32 ` Andrew Morton
2015-05-10 13:36 ` Alexey Dobriyan
2015-05-26 20:42 ` Jarod Wilson
1 sibling, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2015-05-08 21:32 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: linux-kernel, jarod
On Fri, 8 May 2015 15:28:05 +0300 Alexey Dobriyan <adobriyan@gmail.com> wrote:
> /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
>
> $ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
>
> However, command line size was never limited to PAGE_SIZE but to 128 KB and
> relatively recently limitation was removed altogether.
>
> People noticed and ask questions:
> http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
>
> seq file interface is not OK, because it kmalloc's for whole output and
> open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
> To not do that, limit must be imposed which is incompatible with
> arbitrary sized command lines.
>
> I apologize for hairy code, but this it direct consequence of command line
> layout in memory and hacks to support things like "init [3]".
>
> The loops are "unrolled" otherwise it is either macros which hide
> control flow or functions with 7-8 arguments with equal line count.
>
> There should be real setproctitle(2) or something.
>
> ...
>
> fs/proc/base.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 194 insertions(+), 9 deletions(-)
I still hate your patch!
Also, dude. i386:
In file included from include/asm-generic/bug.h:13:0,
from ./arch/x86/include/asm/bug.h:35,
from include/linux/bug.h:4,
from include/linux/thread_info.h:11,
from ./arch/x86/include/asm/uaccess.h:8,
from fs/proc/base.c:50:
fs/proc/base.c: In function 'proc_pid_cmdline_read':
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:600:9: note: in definition of macro 'min'
typeof(x) _min1 = (x); \
^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:265:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:600:21: note: in definition of macro 'min'
typeof(x) _min1 = (x); \
^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:265:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:611:23: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:265:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:600:9: note: in definition of macro 'min'
typeof(x) _min1 = (x); \
^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:300:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:600:21: note: in definition of macro 'min'
typeof(x) _min1 = (x); \
^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:300:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:611:23: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:300:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:600:9: note: in definition of macro 'min'
typeof(x) _min1 = (x); \
^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:349:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:600:21: note: in definition of macro 'min'
typeof(x) _min1 = (x); \
^
include/linux/kernel.h:611:38: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:349:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
include/linux/kernel.h:602:17: warning: comparison of distinct pointer types lacks a cast [enabled by default]
(void) (&_min1 == &_min2); \
^
include/linux/kernel.h:611:23: note: in expansion of macro 'min'
#define min3(x, y, z) min((typeof(x))min(x, y), z)
^
fs/proc/base.c:349:13: note: in expansion of macro 'min3'
_count = min3(count, len, PAGE_SIZE);
^
--- a/fs/proc/base.c~proc-fix-page_size-limit-of-proc-pid-cmdline-fix
+++ a/fs/proc/base.c
@@ -197,11 +197,12 @@ static int proc_root_link(struct dentry
}
static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
- size_t count, loff_t *pos)
+ size_t _count, loff_t *pos)
{
struct task_struct *tsk;
struct mm_struct *mm;
char *page;
+ unsigned long count = _count;
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long len1, len2, len;
unsigned long p;
_
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-08 21:32 ` Andrew Morton
@ 2015-05-10 13:36 ` Alexey Dobriyan
0 siblings, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-10 13:36 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, jarod
On Fri, May 08, 2015 at 02:32:38PM -0700, Andrew Morton wrote:
> On Fri, 8 May 2015 15:28:05 +0300 Alexey Dobriyan <adobriyan@gmail.com> wrote:
>
> > /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
>
> I still hate your patch!
>
>
> Also, dude. i386:
Someone sent i386 removal patch a month ago, why it wasn't applied???
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-08 12:28 [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline Alexey Dobriyan
2015-05-08 21:32 ` Andrew Morton
@ 2015-05-26 20:42 ` Jarod Wilson
2015-05-26 21:24 ` Alexey Dobriyan
1 sibling, 1 reply; 10+ messages in thread
From: Jarod Wilson @ 2015-05-26 20:42 UTC (permalink / raw)
To: Alexey Dobriyan, akpm; +Cc: linux-kernel
On 5/8/2015 8:28 AM, Alexey Dobriyan wrote:
> /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
>
> $ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
>
> However, command line size was never limited to PAGE_SIZE but to 128 KB and
> relatively recently limitation was removed altogether.
>
> People noticed and ask questions:
> http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
>
> seq file interface is not OK, because it kmalloc's for whole output and
> open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
> To not do that, limit must be imposed which is incompatible with
> arbitrary sized command lines.
>
> I apologize for hairy code, but this it direct consequence of command line
> layout in memory and hacks to support things like "init [3]".
>
> The loops are "unrolled" otherwise it is either macros which hide
> control flow or functions with 7-8 arguments with equal line count.
>
> There should be real setproctitle(2) or something.
>
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> Tested-by: Jarod Wilson <jarod@redhat.com>
> Acked-by: Jarod Wilson <jarod@redhat.com>
Should have tested on more than just x86, it appears. We've started
hammering on this internally across all arches, and its exploded
multiple times on ppc64 now:
[ 2717.074699] ------------[ cut here ]------------
[ 2717.074787] kernel BUG at fs/proc/base.c:244!
[ 2717.074822] Oops: Exception in kernel mode, sig: 5 [#1]
[ 2717.074854] SMP NR_CPUS=2048 NUMA pSeries
[ 2717.074891] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4
nfsv3 nfs_acl nfs lockd sunrpc grace fscache nfnetlink_queue
nfnetlink_log nfnetlink bluetooth rfkill arc4 md4 nls_utf8 cifs
dns_resolver ib_isert iscsi_target_mod ib_iser libiscsi
scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp
scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm iw_cm ib_cm
ib_sa ib_mad ib_core ib_addr nls_koi8_u nls_cp932 ts_kmp
nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack crct10dif_generic
crct10dif_common pseries_rng virtio_console virtio_balloon xfs libcrc32c
virtio_blk virtio_net virtio_pci virtio_ring virtio dm_mirror
dm_region_hash dm_log dm_mod [last unloaded:
stap_2c7aea09c8404123be8fa7c7e18bc50_31774]
[ 2717.075687] CPU: 4 PID: 21943 Comm: ps Tainted: G
OE-------------- 3.10.0-255.el7.ppc64.debug #1
[ 2717.075750] task: c000000229c08f10 ti: c000000224af0000 task.ti:
c000000224af0000
[ 2717.075798] NIP: c0000000003f2e34 LR: c0000000003f2e14 CTR:
c0000000003f2cb0
[ 2717.075849] REGS: c000000224af3a00 TRAP: 0700 Tainted: G
OE-------------- (3.10.0-255.el7.ppc64.debug)
[ 2717.075911] MSR: 8000000000029032 <SF,EE,ME,IR,DR,RI> CR: 22004224
XER: 00000000
[ 2717.076016] CFAR: c00000000011d758 SOFTE: 1
GPR00: c0000000003f2e14 c000000224af3c80 c0000000019b6bc0 0000000000000001
GPR04: 0000000000000001 c0000000003f2e14 0d40600000000000 00000003ffc0ac02
GPR08: 00000000036b80f2 0000000000000001 0000000000000000 ef7bdef7bdef7bdf
GPR12: 0000000022004224 c000000007b82400 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 00003fffdc61f61a 0000000000000000
GPR24: 00003fffdc61f60e c0000001b75f0000 00000100315aa850 00003fffdc61f61a
GPR28: c000000224af3df0 0000000000020000 c0000000183241c0 c000000018324100
[ 2717.076615] NIP [c0000000003f2e34] .proc_pid_cmdline_read+0x184/0x5a0
[ 2717.076656] LR [c0000000003f2e14] .proc_pid_cmdline_read+0x164/0x5a0
[ 2717.076697] Call Trace:
[ 2717.076714] [c000000224af3c80] [c0000000003f2e14]
.proc_pid_cmdline_read+0x164/0x5a0 (unreliable)
[ 2717.076781] [c000000224af3d80] [c00000000033ba2c] .SyS_read+0x12c/0x320
[ 2717.076831] [c000000224af3e30] [c00000000000a188] syscall_exit+0x0/0x7c
[ 2717.076877] Instruction dump:
[ 2717.076901] 7fc3f378 eadf01f0 eabf01f8 4bd2a8f1 60000000 7d38d810
7d294910 7d2900d0
[ 2717.076982] 0b090000 7d36a810 7d294910 7d2900d0 <0b090000> 38c00001
7fe3fb78 389bffff
[ 2717.077075] ---[ end trace 65ad2b0a70ae8547 ]---
[ 2717.077774] BUG: sleeping function called from invalid context at
kernel/rwsem.c:20
[ 2717.077819] in_atomic(): 0, irqs_disabled(): 1, pid: 21943, name: ps
[ 2717.077856] INFO: lockdep is turned off.
[ 2717.077884] irq event stamp: 25482
[ 2717.077911] hardirqs last enabled at (25481): [<c00000000027d664>]
.get_page_from_freelist+0x984/0x2790
[ 2717.077974] hardirqs last disabled at (25482): [<c000000000006310>]
program_check_common+0x110/0x180
[ 2717.078034] softirqs last enabled at (24584): [<c0000000000d7370>]
.__do_softirq+0x220/0x5b0
[ 2717.078092] softirqs last disabled at (24567): [<c00000000002695c>]
.call_do_softirq+0x14/0x24
[ 2717.078164] CPU: 4 PID: 21943 Comm: ps Tainted: G D
OE-------------- 3.10.0-255.el7.ppc64.debug #1
[ 2717.078225] Call Trace:
[ 2717.078242] [c000000224af32b0] [c000000000019510]
.show_stack+0x80/0x380 (unreliable)
[ 2717.078301] [c000000224af3380] [c000000000a05ec4] .dump_stack+0x28/0x3c
[ 2717.078350] [c000000224af33f0] [c00000000012ab54]
.__might_sleep+0x1b4/0x2c0
[ 2717.078396] [c000000224af3480] [c0000000009e5158] .down_read+0x38/0x110
[ 2717.078446] [c000000224af3510] [c0000000000f1e14]
.exit_signals+0x24/0x160
[ 2717.078496] [c000000224af35a0] [c0000000000d2c30] .do_exit+0xe0/0xe40
[ 2717.078543] [c000000224af36b0] [c000000000023090] .die+0x300/0x450
[ 2717.078593] [c000000224af3760] [c000000000023404] ._exception+0x1b4/0x1e0
[ 2717.078640] [c000000224af3900] [c0000000009eb6b8]
.program_check_exception+0x288/0x3e0
[ 2717.078693] [c000000224af3990] [c000000000006318]
program_check_common+0x118/0x180
[ 2717.078750] --- Exception: 700 at .proc_pid_cmdline_read+0x184/0x5a0
LR = .proc_pid_cmdline_read+0x164/0x5a0
[ 2717.078819] [c000000224af3d80] [c00000000033ba2c] .SyS_read+0x12c/0x320
[ 2717.078864] [c000000224af3e30] [c00000000000a188] syscall_exit+0x0/0x7c
--
Jarod Wilson
jarod@redhat.com
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-26 20:42 ` Jarod Wilson
@ 2015-05-26 21:24 ` Alexey Dobriyan
2015-05-27 5:27 ` Jarod Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-26 21:24 UTC (permalink / raw)
To: Jarod Wilson; +Cc: akpm, linux-kernel
On Tue, May 26, 2015 at 04:42:36PM -0400, Jarod Wilson wrote:
> On 5/8/2015 8:28 AM, Alexey Dobriyan wrote:
> > /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
> >
> > $ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
> >
> > However, command line size was never limited to PAGE_SIZE but to 128 KB and
> > relatively recently limitation was removed altogether.
> >
> > People noticed and ask questions:
> > http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
> >
> > seq file interface is not OK, because it kmalloc's for whole output and
> > open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
> > To not do that, limit must be imposed which is incompatible with
> > arbitrary sized command lines.
> >
> > I apologize for hairy code, but this it direct consequence of command line
> > layout in memory and hacks to support things like "init [3]".
> >
> > The loops are "unrolled" otherwise it is either macros which hide
> > control flow or functions with 7-8 arguments with equal line count.
> >
> > There should be real setproctitle(2) or something.
> >
> > Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> > Tested-by: Jarod Wilson <jarod@redhat.com>
> > Acked-by: Jarod Wilson <jarod@redhat.com>
>
> Should have tested on more than just x86, it appears. We've started
> hammering on this internally across all arches, and its exploded
> multiple times on ppc64 now:
>
> [ 2717.074699] ------------[ cut here ]------------
> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
> OE-------------- 3.10.0-255.el7.ppc64.debug #1
Which BUG_ON is this?
BUG_ON(*pos < 0);
BUG_ON(arg_start > arg_end);
BUG_ON(env_start > env_end);
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-26 21:24 ` Alexey Dobriyan
@ 2015-05-27 5:27 ` Jarod Wilson
2015-05-27 10:17 ` Alexey Dobriyan
2015-05-27 10:56 ` Jan Stancek
0 siblings, 2 replies; 10+ messages in thread
From: Jarod Wilson @ 2015-05-27 5:27 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: Jarod Wilson, akpm, linux-kernel
On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
>
>> On Tue, May 26, 2015 at 04:42:36PM -0400, Jarod Wilson wrote:
>>> On 5/8/2015 8:28 AM, Alexey Dobriyan wrote:
>>> /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
>>>
>>> $ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
>>>
>>> However, command line size was never limited to PAGE_SIZE but to 128 KB and
>>> relatively recently limitation was removed altogether.
>>>
>>> People noticed and ask questions:
>>> http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
>>>
>>> seq file interface is not OK, because it kmalloc's for whole output and
>>> open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
>>> To not do that, limit must be imposed which is incompatible with
>>> arbitrary sized command lines.
>>>
>>> I apologize for hairy code, but this it direct consequence of command line
>>> layout in memory and hacks to support things like "init [3]".
>>>
>>> The loops are "unrolled" otherwise it is either macros which hide
>>> control flow or functions with 7-8 arguments with equal line count.
>>>
>>> There should be real setproctitle(2) or something.
>>>
>>> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
>>> Tested-by: Jarod Wilson <jarod@redhat.com>
>>> Acked-by: Jarod Wilson <jarod@redhat.com>
>>
>> Should have tested on more than just x86, it appears. We've started
>> hammering on this internally across all arches, and its exploded
>> multiple times on ppc64 now:
>>
>> [ 2717.074699] ------------[ cut here ]------------
>> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
>
>> OE-------------- 3.10.0-255.el7.ppc64.debug #1
>
> Which BUG_ON is this?
>
> BUG_ON(*pos < 0);
> BUG_ON(arg_start > arg_end);
> BUG_ON(env_start > env_end);
Ah, sorry, right, might not be exactly the same with the back-up ported version... It was the env_start > env_end one.
--
Jarod Wilson
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-27 5:27 ` Jarod Wilson
@ 2015-05-27 10:17 ` Alexey Dobriyan
2015-05-27 10:56 ` Jan Stancek
1 sibling, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-27 10:17 UTC (permalink / raw)
To: Jarod Wilson; +Cc: Jarod Wilson, akpm, linux-kernel
On Wed, May 27, 2015 at 8:27 AM, Jarod Wilson <jwilson@redhat.com> wrote:
> On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
>>
>>> On Tue, May 26, 2015 at 04:42:36PM -0400, Jarod Wilson wrote:
>>>> On 5/8/2015 8:28 AM, Alexey Dobriyan wrote:
>>>> /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with
>>>>
>>>> $ cat /proc/self/cmdline $(seq 1037) 2>/dev/null
>>>>
>>>> However, command line size was never limited to PAGE_SIZE but to 128 KB and
>>>> relatively recently limitation was removed altogether.
>>>>
>>>> People noticed and ask questions:
>>>> http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit
>>>>
>>>> seq file interface is not OK, because it kmalloc's for whole output and
>>>> open + read(, 1) + sleep will pin arbitrary amounts of kernel memory.
>>>> To not do that, limit must be imposed which is incompatible with
>>>> arbitrary sized command lines.
>>>>
>>>> I apologize for hairy code, but this it direct consequence of command line
>>>> layout in memory and hacks to support things like "init [3]".
>>>>
>>>> The loops are "unrolled" otherwise it is either macros which hide
>>>> control flow or functions with 7-8 arguments with equal line count.
>>>>
>>>> There should be real setproctitle(2) or something.
>>>>
>>>> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
>>>> Tested-by: Jarod Wilson <jarod@redhat.com>
>>>> Acked-by: Jarod Wilson <jarod@redhat.com>
>>>
>>> Should have tested on more than just x86, it appears. We've started
>>> hammering on this internally across all arches, and its exploded
>>> multiple times on ppc64 now:
>>>
>>> [ 2717.074699] ------------[ cut here ]------------
>>> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
>>
>>> OE-------------- 3.10.0-255.el7.ppc64.debug #1
>>
>> Which BUG_ON is this?
>>
>> BUG_ON(*pos < 0);
>> BUG_ON(arg_start > arg_end);
>> BUG_ON(env_start > env_end);
>
> Ah, sorry, right, might not be exactly the same with the back-up ported version... It was the env_start > env_end one.
Patch is OK. Checkpoint/restart people aren't checking syscall input properly:
case PR_SET_MM_ENV_END:
if (!vma) {
error = -EFAULT;
goto out;
}
if (opt == PR_SET_MM_START_STACK)
mm->start_stack = addr;
else if (opt == PR_SET_MM_ARG_START)
mm->arg_start = addr;
else if (opt == PR_SET_MM_ARG_END)
mm->arg_end = addr;
else if (opt == PR_SET_MM_ENV_START)
mm->env_start = addr;
else if (opt == PR_SET_MM_ENV_END)
mm->env_end = addr;
break;
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-27 5:27 ` Jarod Wilson
2015-05-27 10:17 ` Alexey Dobriyan
@ 2015-05-27 10:56 ` Jan Stancek
2015-05-27 11:08 ` Alexey Dobriyan
2015-05-27 13:46 ` Alexey Dobriyan
1 sibling, 2 replies; 10+ messages in thread
From: Jan Stancek @ 2015-05-27 10:56 UTC (permalink / raw)
To: Jarod Wilson; +Cc: Alexey Dobriyan, Jarod Wilson, akpm, linux-kernel
On Wed, May 27, 2015 at 01:27:13AM -0400, Jarod Wilson wrote:
> On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
> >> Should have tested on more than just x86, it appears. We've started
> >> hammering on this internally across all arches, and its exploded
> >> multiple times on ppc64 now:
> >>
> >> [ 2717.074699] ------------[ cut here ]------------
> >> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
> >
> >> OE-------------- 3.10.0-255.el7.ppc64.debug #1
> >
> > Which BUG_ON is this?
> >
> > BUG_ON(*pos < 0);
> > BUG_ON(arg_start > arg_end);
> > BUG_ON(env_start > env_end);
>
Is create_elf_tables() taking mm->mmap_sem when it's initialising env_start/end?
I could reproduce it on x86_64 as well, just by doing exec in loop
and "ps afx" in other window:
------------------------------------------
# cat a.sh b.sh go.sh
#!/bin/sh
exec ./b.sh skajsdlakj leakj slekj alkse j
#!/bin/sh
exec ./a.sh ere rwer 23 r23 23r2 rwrd 23r
#!/bin/sh
for i in `seq 1 50`; do
sh ./a.sh &
done
------------------------------------------
In window 1: ./go.sh
In window 2: while [ True ]; do ps afx > /dev/null; done
>From vmcore:
struct mm_struct {
...
[0x1e0] unsigned long arg_start;
[0x1e8] unsigned long arg_end;
[0x1f0] unsigned long env_start;
[0x1f8] unsigned long env_end;
RIP: ffffffff812996b1 RSP: ffff8800b362fe78 RFLAGS: 00010206
RAX: ffff8801b0461040 RBX: ffff8801b0460f80 RCX: 0000000000003040
RDX: 0000000000000001 RSI: ffff8801b6c00000 RDI: 0000000000000246
RBP: ffff8800b362ff00 R8: 0000000000000000 R9: 0000000000000001
R10: 0000000000000000 R11: 0000000000000000 R12: 00007ffd3fb0d396
R13: 00007ffd3fb0d396 R14: 0000000000020000 R15: ffff8800b362ff48
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
crash> disassemble proc_pid_cmdline_read
Dump of assembler code for function proc_pid_cmdline_read:
...
0xffffffff812991f2 <+178>: callq 0xffffffff816e4870 <down_read>
0xffffffff812991f7 <+183>: mov 0x1e0(%rbx),%rax # arg_start
0xffffffff812991fe <+190>: mov 0x1f8(%rbx),%rdx # env_end
0xffffffff81299205 <+197>: mov 0x1e8(%rbx),%r12 # arg_end
0xffffffff8129920c <+204>: mov 0x1f0(%rbx),%r13 # env_start
0xffffffff81299213 <+211>: mov %rax,-0x50(%rbp)
0xffffffff81299217 <+215>: mov -0x60(%rbp),%rax
0xffffffff8129921b <+219>: mov %rdx,-0x58(%rbp)
0xffffffff8129921f <+223>: mov %rax,%rdi
0xffffffff81299222 <+226>: callq 0xffffffff810bbb50 <up_read>
0xffffffff81299227 <+231>: cmp %r12,-0x50(%rbp)
0xffffffff8129922b <+235>: ja 0xffffffff812996b3 <proc_pid_cmdline_read+1395>
0xffffffff81299231 <+241>: cmp -0x58(%rbp),%r13
0xffffffff81299235 <+245>: ja 0xffffffff812996b1 <proc_pid_cmdline_read+1393>
crash> p/x ((struct mm_struct *)0xffff8801b0460f80)->arg_start
$5 = 0x7ffd3fb0d365
crash> p/x ((struct mm_struct *)0xffff8801b0460f80)->arg_end
$6 = 0x7ffd3fb0d396
crash> p/x ((struct mm_struct *)0xffff8801b0460f80)->env_start
$7 = 0x7ffd3fb0d396
crash> p/x ((struct mm_struct *)0xffff8801b0460f80)->env_end
$8 = 0x7ffd3fb0dfed
crash> x/1g (0xffff8800b362ff00-0x50)
0xffff8800b362feb0: 0x00007ffd3fb0d365
crash> x/1g (0xffff8800b362ff00-0x58)
0xffff8800b362fea8: 0x0000000000000000
Regards,
Jan
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-27 10:56 ` Jan Stancek
@ 2015-05-27 11:08 ` Alexey Dobriyan
2015-05-27 13:46 ` Alexey Dobriyan
1 sibling, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-27 11:08 UTC (permalink / raw)
To: Jan Stancek; +Cc: Jarod Wilson, Jarod Wilson, akpm, linux-kernel
On Wed, May 27, 2015 at 1:56 PM, Jan Stancek <jstancek@redhat.com> wrote:
> On Wed, May 27, 2015 at 01:27:13AM -0400, Jarod Wilson wrote:
>> On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
>> >> Should have tested on more than just x86, it appears. We've started
>> >> hammering on this internally across all arches, and its exploded
>> >> multiple times on ppc64 now:
>> >>
>> >> [ 2717.074699] ------------[ cut here ]------------
>> >> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
>> >
>> >> OE-------------- 3.10.0-255.el7.ppc64.debug #1
>> >
>> > Which BUG_ON is this?
>> >
>> > BUG_ON(*pos < 0);
>> > BUG_ON(arg_start > arg_end);
>> > BUG_ON(env_start > env_end);
>>
>
> Is create_elf_tables() taking mm->mmap_sem when it's initialising env_start/end?
>
> I could reproduce it on x86_64 as well, just by doing exec in loop
> and "ps afx" in other window:
OK.
I've reproduced BUG_ON by running this program which sets arg_start>arg_end:
#include <sys/mman.h>
#include <sys/prctl.h>
#include <unistd.h>
enum {PAGE_SIZE=4096};
int main(void)
{
unsigned long env_start, env_end;
void *p;
p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
#define PR_SET_MM 35
#define PR_SET_MM_ARG_START 8
#define PR_SET_MM_ARG_END 9
#define PR_SET_MM_ENV_START 10
#define PR_SET_MM_ENV_END 11
prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long)p +
PAGE_SIZE - 1, 0, 0);
prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long)p, 0, 0);
pause();
return 0;
}
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline
2015-05-27 10:56 ` Jan Stancek
2015-05-27 11:08 ` Alexey Dobriyan
@ 2015-05-27 13:46 ` Alexey Dobriyan
1 sibling, 0 replies; 10+ messages in thread
From: Alexey Dobriyan @ 2015-05-27 13:46 UTC (permalink / raw)
To: Jan Stancek; +Cc: Jarod Wilson, Jarod Wilson, akpm, linux-kernel
On Wed, May 27, 2015 at 1:56 PM, Jan Stancek <jstancek@redhat.com> wrote:
> On Wed, May 27, 2015 at 01:27:13AM -0400, Jarod Wilson wrote:
>> On May 26, 2015, at 5:24 PM, Alexey Dobriyan <adobriyan@gmail.com> wrote:
>> >> Should have tested on more than just x86, it appears. We've started
>> >> hammering on this internally across all arches, and its exploded
>> >> multiple times on ppc64 now:
>> >>
>> >> [ 2717.074699] ------------[ cut here ]------------
>> >> [ 2717.074787] kernel BUG at fs/proc/base.c:244!
>> >
>> >> OE-------------- 3.10.0-255.el7.ppc64.debug #1
>> >
>> > Which BUG_ON is this?
>> >
>> > BUG_ON(*pos < 0);
>> > BUG_ON(arg_start > arg_end);
>> > BUG_ON(env_start > env_end);
>>
>
> Is create_elf_tables() taking mm->mmap_sem when it's initialising env_start/end?
No, and it wasn't needed before checkpoint/restart (prctl(PR_SET_MM ...)).
Fields were write-once, now they aren't. C/R is doing down_read() in
prctl_set_mm()
which is an obvious bug.
Check "if (!mm->arg_end)" is safeguard exactly against new process not having
ARGV/ENVP fields completely initialized. I didn't understand what it
does exactly,
now it's clear.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2015-05-27 13:46 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-08 12:28 [PATCH try #4] proc: fix PAGE_SIZE limit of /proc/$PID/cmdline Alexey Dobriyan
2015-05-08 21:32 ` Andrew Morton
2015-05-10 13:36 ` Alexey Dobriyan
2015-05-26 20:42 ` Jarod Wilson
2015-05-26 21:24 ` Alexey Dobriyan
2015-05-27 5:27 ` Jarod Wilson
2015-05-27 10:17 ` Alexey Dobriyan
2015-05-27 10:56 ` Jan Stancek
2015-05-27 11:08 ` Alexey Dobriyan
2015-05-27 13:46 ` Alexey Dobriyan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).