All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] proc: augment /proc/pid/limits to allow setting of process limits.
@ 2009-09-28 20:06 Neil Horman
  2009-09-28 22:44 ` Andrew Morton
  2009-10-01 17:15 ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3) Neil Horman
  0 siblings, 2 replies; 107+ messages in thread
From: Neil Horman @ 2009-09-28 20:06 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, nhorman

Augment /proc/<pid>/limits file to support limit setting

It was suggested to me recently that we support a mechanism by which we can set
various process limits from points external to the process.  The reasoning being
that some processes are very long lived, and it would be beneficial to these
long lived processes if we could modify their various limits without needing to
kill them, adjust the limits for the user and restarting them.  While individual
application can certainly export this control on their own, it would be nice if
such functionality were available to a sysadmin, without needing to have each
application re-invent the wheel.

As such, I've implemented the below patch, which makes /proc/pid/limits writable
for each process.  By writing the following format:
<limit> <current value> <max value>
to the limits file, an administrator can now dynamically change the limits for
the respective process.  Tested by myself with good results.

Neil

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 base.c |  167 +++++++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 138 insertions(+), 29 deletions(-)


diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6..cdf6748 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -49,6 +49,8 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/string.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
@@ -455,72 +457,179 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 struct limit_names {
 	char *name;
 	char *unit;
+	char *match;
 };
 
 static const struct limit_names lnames[RLIM_NLIMITS] = {
-	[RLIMIT_CPU] = {"Max cpu time", "ms"},
-	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
-	[RLIMIT_DATA] = {"Max data size", "bytes"},
-	[RLIMIT_STACK] = {"Max stack size", "bytes"},
-	[RLIMIT_CORE] = {"Max core file size", "bytes"},
-	[RLIMIT_RSS] = {"Max resident set", "bytes"},
-	[RLIMIT_NPROC] = {"Max processes", "processes"},
-	[RLIMIT_NOFILE] = {"Max open files", "files"},
-	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
-	[RLIMIT_AS] = {"Max address space", "bytes"},
-	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
-	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
-	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
-	[RLIMIT_NICE] = {"Max nice priority", NULL},
-	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
-	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
+	[RLIMIT_CPU] = {"Max cpu time", "ms", "cpu"},
+	[RLIMIT_FSIZE] = {"Max file size", "bytes", "fsize"},
+	[RLIMIT_DATA] = {"Max data size", "bytes", "data"},
+	[RLIMIT_STACK] = {"Max stack size", "bytes", "stack"},
+	[RLIMIT_CORE] = {"Max core file size", "bytes", "core"},
+	[RLIMIT_RSS] = {"Max resident set", "bytes", "rss"},
+	[RLIMIT_NPROC] = {"Max processes", "processes", "nproc"},
+	[RLIMIT_NOFILE] = {"Max open files", "files", "nofile"},
+	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes", "memlock"},
+	[RLIMIT_AS] = {"Max address space", "bytes", "as"},
+	[RLIMIT_LOCKS] = {"Max file locks", "locks", "locks"},
+	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals", "sigpending"},
+	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes", "msgqueue"},
+	[RLIMIT_NICE] = {"Max nice priority", NULL, "nice"},
+	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL, "rtprio"},
+	[RLIMIT_RTTIME] = {"Max realtime timeout", "us", "rttime"},
 };
 
 /* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
 {
 	unsigned int i;
-	int count = 0;
 	unsigned long flags;
-	char *bufptr = buffer;
+	char *bufptr;
+	size_t bcount = 0;
+	size_t ccount = 0;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 
 	struct rlimit rlim[RLIM_NLIMITS];
 
+	bufptr = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!bufptr)
+		goto out;
+
 	if (!lock_task_sighand(task, &flags))
-		return 0;
+		goto out;
 	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 	unlock_task_sighand(task, &flags);
 
 	/*
 	 * print the file header
 	 */
-	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+	bcount += sprintf(&bufptr[bcount], "%-25s %-20s %-20s %-10s\n",
 			"Limit", "Soft Limit", "Hard Limit", "Units");
 
 	for (i = 0; i < RLIM_NLIMITS; i++) {
 		if (rlim[i].rlim_cur == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-25s %-20s ",
+			bcount += sprintf(&bufptr[bcount], "%-25s %-20s ",
 					 lnames[i].name, "unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-25s %-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-25s %-20lu ",
 					 lnames[i].name, rlim[i].rlim_cur);
 
 		if (rlim[i].rlim_max == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+			bcount += sprintf(&bufptr[bcount], "%-20s ",
+					 "unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-20lu ",
 					 rlim[i].rlim_max);
 
 		if (lnames[i].unit)
-			count += sprintf(&bufptr[count], "%-10s\n",
+			bcount += sprintf(&bufptr[bcount], "%-10s\n",
 					 lnames[i].unit);
 		else
-			count += sprintf(&bufptr[count], "\n");
+			bcount += sprintf(&bufptr[bcount], "\n");
 	}
 
+	if (*ppos >= bcount)
+		goto out_task;
+
+	ccount = min(count, (size_t)(bcount-(*ppos)));
+	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
+	*ppos += ccount;
+	kfree(bufptr);
+out_task:
+	put_task_struct(task);
+out:
+	return ccount;
+}
+
+static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	char *buffer;
+	char *element, *vmc, *vmm;
+	unsigned long long valuec, valuem;
+	unsigned long flags;
+	int i;
+	int index = -1;
+	size_t wcount = 0;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+
+
+	if (*ppos != 0)
+		goto out;
+
+	if (count > 128)
+		goto out;
+	buffer = kzalloc(128, GFP_KERNEL);
+
+	if (!buffer)
+		goto out;
+
+	element = kzalloc(sizeof(buffer), GFP_KERNEL);
+	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
+	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
+
+	if (!element || !vmm || !vmc)
+		goto out_free;
+
+	wcount = count - copy_from_user(buffer, buf, count);
+	if (wcount < count)
+		goto out_free;
+
+	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
+
+	if (i < 3)
+		goto out_free;
+
+	for (i = 0; i <= strlen(element); i++)
+		element[i] = tolower(element[i]);
+
+	if (!strncmp(vmc, "unlimited", 9))
+		valuec = RLIM_INFINITY;
+	else
+		valuec = simple_strtoull(vmc, NULL, 10);
+
+	if (!strncmp(vmm, "unlimited", 9))
+		valuem = RLIM_INFINITY;
+	else
+		valuem = simple_strtoull(vmm, NULL, 10);
+
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		if ((lnames[i].match) &&
+		    !strncmp(element, lnames[i].match, 
+		     strlen(lnames[i].match))) {
+			index = i;
+			break;
+		}
+	}
+
+	if (!lock_task_sighand(task, &flags))
+		goto out_free;
+
+	if (index >= 0) {
+		task->signal->rlim[index].rlim_cur = valuec;
+		task->signal->rlim[index].rlim_max = valuem;
+	}
+
+	unlock_task_sighand(task, &flags);
+
+out_free:
+	kfree(element);
+	kfree(vmc);
+	kfree(vmm);
+	kfree(buffer);
+out:
+	*ppos += count;
+	put_task_struct(task);
 	return count;
 }
 
+
+static const struct file_operations proc_limit_operations = {
+        .read           = proc_pid_limit_read,
+	.write		= proc_pid_limit_write,
+};
+
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 static int proc_pid_syscall(struct task_struct *task, char *buffer)
 {
@@ -2483,7 +2592,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("auxv",       S_IRUSR, proc_pid_auxv),
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	  S_IRUSR, proc_pid_limits),
+	REG("limits",	  S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
@@ -2822,7 +2931,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	 S_IRUSR, proc_pid_limits),
+	REG("limits",	 S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH] proc: augment /proc/pid/limits to allow setting of process limits.
  2009-09-28 20:06 [PATCH] proc: augment /proc/pid/limits to allow setting of process limits Neil Horman
@ 2009-09-28 22:44 ` Andrew Morton
  2009-09-29  1:14   ` Neil Horman
  2009-09-29 20:25   ` [PATCH] proc: augment /proc/pid/limits to allow setting of process limits (v2) Neil Horman
  2009-10-01 17:15 ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3) Neil Horman
  1 sibling, 2 replies; 107+ messages in thread
From: Andrew Morton @ 2009-09-28 22:44 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, nhorman

On Mon, 28 Sep 2009 16:06:00 -0400
Neil Horman <nhorman@tuxdriver.com> wrote:

> Augment /proc/<pid>/limits file to support limit setting
> 
> It was suggested to me recently that we support a mechanism by which we can set
> various process limits from points external to the process.  The reasoning being
> that some processes are very long lived, and it would be beneficial to these
> long lived processes if we could modify their various limits without needing to
> kill them, adjust the limits for the user and restarting them.  While individual
> application can certainly export this control on their own, it would be nice if
> such functionality were available to a sysadmin, without needing to have each
> application re-invent the wheel.
> 
> As such, I've implemented the below patch, which makes /proc/pid/limits writable
> for each process.  By writing the following format:
> <limit> <current value> <max value>
> to the limits file, an administrator can now dynamically change the limits for
> the respective process.  Tested by myself with good results.
> 

Confused.  This appears to allow processes to cheerily exceed their
inherited limits, without bound.  See sys_setrliit()'s

	if (new_rlim.rlim_cur > new_rlim.rlim_max)
		return -EINVAL;

It might allow user A to diddle user B's limit too, I didn't check?

And it cheerily avoids security_task_setrlimit() too.

Apart from those somewhat fatal problems, it's all a bit unpleasing that
we now have two ways of setting rlimits, one of which is a superset of
the other.  Perhaps a better way would be a new sys_setrlimit2() which
takes a pid (in the current pid namespace, one assumes).  Then deprecate
sys_setrlimit().

>
> ...
>
> +static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
> +		size_t count, loff_t *ppos)
> +{
> +	char *buffer;
> +	char *element, *vmc, *vmm;
> +	unsigned long long valuec, valuem;
> +	unsigned long flags;
> +	int i;
> +	int index = -1;
> +	size_t wcount = 0;
> +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> +
> +
> +	if (*ppos != 0)
> +		goto out;
> +
> +	if (count > 128)
> +		goto out;
> +	buffer = kzalloc(128, GFP_KERNEL);
> +
> +	if (!buffer)
> +		goto out;
> +
> +	element = kzalloc(sizeof(buffer), GFP_KERNEL);
> +	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
> +	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
> +
> +	if (!element || !vmm || !vmc)
> +		goto out_free;
> +
> +	wcount = count - copy_from_user(buffer, buf, count);
> +	if (wcount < count)
> +		goto out_free;
> +
> +	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
> +
> +	if (i < 3)
> +		goto out_free;
> +
> +	for (i = 0; i <= strlen(element); i++)
> +		element[i] = tolower(element[i]);
> +
> +	if (!strncmp(vmc, "unlimited", 9))
> +		valuec = RLIM_INFINITY;
> +	else
> +		valuec = simple_strtoull(vmc, NULL, 10);
> +
> +	if (!strncmp(vmm, "unlimited", 9))
> +		valuem = RLIM_INFINITY;
> +	else
> +		valuem = simple_strtoull(vmm, NULL, 10);
> +
> +	for (i = 0; i < RLIM_NLIMITS; i++) {
> +		if ((lnames[i].match) &&
> +		    !strncmp(element, lnames[i].match, 
> +		     strlen(lnames[i].match))) {
> +			index = i;
> +			break;
> +		}
> +	}
> +
> +	if (!lock_task_sighand(task, &flags))
> +		goto out_free;

The function silently does nothing if lock_task_sighand() fails.

> +	if (index >= 0) {
> +		task->signal->rlim[index].rlim_cur = valuec;
> +		task->signal->rlim[index].rlim_max = valuem;
> +	}
> +
> +	unlock_task_sighand(task, &flags);
> +
> +out_free:
> +	kfree(element);
> +	kfree(vmc);
> +	kfree(vmm);
> +	kfree(buffer);
> +out:
> +	*ppos += count;
> +	put_task_struct(task);
>  	return count;
>  }
>  
> +
> +static const struct file_operations proc_limit_operations = {
> +        .read           = proc_pid_limit_read,
> +	.write		= proc_pid_limit_write,

whitespace got munged.

> +};
> +


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH] proc: augment /proc/pid/limits to allow setting of process limits.
  2009-09-28 22:44 ` Andrew Morton
@ 2009-09-29  1:14   ` Neil Horman
  2009-09-29 20:25   ` [PATCH] proc: augment /proc/pid/limits to allow setting of process limits (v2) Neil Horman
  1 sibling, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-09-29  1:14 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Mon, Sep 28, 2009 at 03:44:03PM -0700, Andrew Morton wrote:
> On Mon, 28 Sep 2009 16:06:00 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > Augment /proc/<pid>/limits file to support limit setting
> > 
> > It was suggested to me recently that we support a mechanism by which we can set
> > various process limits from points external to the process.  The reasoning being
> > that some processes are very long lived, and it would be beneficial to these
> > long lived processes if we could modify their various limits without needing to
> > kill them, adjust the limits for the user and restarting them.  While individual
> > application can certainly export this control on their own, it would be nice if
> > such functionality were available to a sysadmin, without needing to have each
> > application re-invent the wheel.
> > 
> > As such, I've implemented the below patch, which makes /proc/pid/limits writable
> > for each process.  By writing the following format:
> > <limit> <current value> <max value>
> > to the limits file, an administrator can now dynamically change the limits for
> > the respective process.  Tested by myself with good results.
> > 
> 
> Confused.  This appears to allow processes to cheerily exceed their
> inherited limits, without bound.  See sys_setrliit()'s
> 
> 	if (new_rlim.rlim_cur > new_rlim.rlim_max)
> 		return -EINVAL;
> 
Gaahh!  You're right, in my worry to get all the string parsing right, I didn't
even consider the semantics of setrlimit.  

> It might allow user A to diddle user B's limit too, I didn't check?
> 
No, it can't do that.  file permissions only allow the process owner and root to
modify the limits.

> And it cheerily avoids security_task_setrlimit() too.
> 
Yeah, it completely breaks that.  Sorry.

> Apart from those somewhat fatal problems, it's all a bit unpleasing that
> we now have two ways of setting rlimits, one of which is a superset of
> the other.  Perhaps a better way would be a new sys_setrlimit2() which
> takes a pid (in the current pid namespace, one assumes).  Then deprecate
> sys_setrlimit().
> 
Do you think its worth adding a syscall just for this?  I think theres merit in
this feature (I wrote it :)), but I'm not sure if syscall is really warranted.
you're above notes are obviously a problem, but I think they can be fixed.  Its
easy to make sure that if the writing user is the process owner and restrict the
max value raising, and the selinux check can be added.

clearly I rescind this patch (sorry for the noise).  I'll see if I can add the
checks needed above and repost.
Neil

> >
> > ...
> >
> > +static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
> > +		size_t count, loff_t *ppos)
> > +{
> > +	char *buffer;
> > +	char *element, *vmc, *vmm;
> > +	unsigned long long valuec, valuem;
> > +	unsigned long flags;
> > +	int i;
> > +	int index = -1;
> > +	size_t wcount = 0;
> > +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> > +
> > +
> > +	if (*ppos != 0)
> > +		goto out;
> > +
> > +	if (count > 128)
> > +		goto out;
> > +	buffer = kzalloc(128, GFP_KERNEL);
> > +
> > +	if (!buffer)
> > +		goto out;
> > +
> > +	element = kzalloc(sizeof(buffer), GFP_KERNEL);
> > +	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
> > +	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
> > +
> > +	if (!element || !vmm || !vmc)
> > +		goto out_free;
> > +
> > +	wcount = count - copy_from_user(buffer, buf, count);
> > +	if (wcount < count)
> > +		goto out_free;
> > +
> > +	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
> > +
> > +	if (i < 3)
> > +		goto out_free;
> > +
> > +	for (i = 0; i <= strlen(element); i++)
> > +		element[i] = tolower(element[i]);
> > +
> > +	if (!strncmp(vmc, "unlimited", 9))
> > +		valuec = RLIM_INFINITY;
> > +	else
> > +		valuec = simple_strtoull(vmc, NULL, 10);
> > +
> > +	if (!strncmp(vmm, "unlimited", 9))
> > +		valuem = RLIM_INFINITY;
> > +	else
> > +		valuem = simple_strtoull(vmm, NULL, 10);
> > +
> > +	for (i = 0; i < RLIM_NLIMITS; i++) {
> > +		if ((lnames[i].match) &&
> > +		    !strncmp(element, lnames[i].match, 
> > +		     strlen(lnames[i].match))) {
> > +			index = i;
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (!lock_task_sighand(task, &flags))
> > +		goto out_free;
> 
> The function silently does nothing if lock_task_sighand() fails.
> 
> > +	if (index >= 0) {
> > +		task->signal->rlim[index].rlim_cur = valuec;
> > +		task->signal->rlim[index].rlim_max = valuem;
> > +	}
> > +
> > +	unlock_task_sighand(task, &flags);
> > +
> > +out_free:
> > +	kfree(element);
> > +	kfree(vmc);
> > +	kfree(vmm);
> > +	kfree(buffer);
> > +out:
> > +	*ppos += count;
> > +	put_task_struct(task);
> >  	return count;
> >  }
> >  
> > +
> > +static const struct file_operations proc_limit_operations = {
> > +        .read           = proc_pid_limit_read,
> > +	.write		= proc_pid_limit_write,
> 
> whitespace got munged.
> 
> > +};
> > +
> 
> 

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH] proc: augment /proc/pid/limits to allow setting of process limits (v2).
  2009-09-28 22:44 ` Andrew Morton
  2009-09-29  1:14   ` Neil Horman
@ 2009-09-29 20:25   ` Neil Horman
  2009-09-29 20:46     ` Andrew Morton
  1 sibling, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-09-29 20:25 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Version 2 of this patch, taking your notes into account Andrew.

Change notes:

1) Split sys_setrlimit into a wrapper (sys_setrlimit), and an interior common
function (do_setrlimit).  The latter takes a resource, struct rlimit and
task_struct argument to do the actual work of setting the request limit to the
requested rlim_cur/max values.

2) make sys_setrlimit call do_setrlimit

3) modify the proc/pid/limits write routine so that it uses do_setrlimit,
thereby giving us the previously missing security checks.


Augment /proc/<pid>/limits file to support limit setting

It was suggested to me recently that we support a mechanism by which we can set
various process limits from points external to the process.  The reasoning being
that some processes are very long lived, and it would be beneficial to these
long lived processes if we could modify their various limits without needing to
kill them, adjust the limits for the user and restarting them.  While individual
application can certainly export this control on their own, it would be nice if
such functionality were available to a sysadmin, without needing to have each
application re-invent the wheel.

As such, I've implemented the below patch, which makes /proc/pid/limits writable
for each process.  By writing the following format:
<limit> <current value> <max value>
to the limits file, an administrator can now dynamically change the limits for
the respective process.  Tested by myself with good results.

Neil

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 fs/proc/base.c        |  165 +++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sched.h |    3 
 kernel/sys.c          |   48 +++++++++-----
 3 files changed, 169 insertions(+), 47 deletions(-)


diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6..2f05799 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -49,6 +49,8 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/string.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
@@ -455,72 +457,177 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 struct limit_names {
 	char *name;
 	char *unit;
+	char *match;
 };
 
 static const struct limit_names lnames[RLIM_NLIMITS] = {
-	[RLIMIT_CPU] = {"Max cpu time", "ms"},
-	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
-	[RLIMIT_DATA] = {"Max data size", "bytes"},
-	[RLIMIT_STACK] = {"Max stack size", "bytes"},
-	[RLIMIT_CORE] = {"Max core file size", "bytes"},
-	[RLIMIT_RSS] = {"Max resident set", "bytes"},
-	[RLIMIT_NPROC] = {"Max processes", "processes"},
-	[RLIMIT_NOFILE] = {"Max open files", "files"},
-	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
-	[RLIMIT_AS] = {"Max address space", "bytes"},
-	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
-	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
-	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
-	[RLIMIT_NICE] = {"Max nice priority", NULL},
-	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
-	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
+	[RLIMIT_CPU] = {"Max cpu time", "ms", "cpu"},
+	[RLIMIT_FSIZE] = {"Max file size", "bytes", "fsize"},
+	[RLIMIT_DATA] = {"Max data size", "bytes", "data"},
+	[RLIMIT_STACK] = {"Max stack size", "bytes", "stack"},
+	[RLIMIT_CORE] = {"Max core file size", "bytes", "core"},
+	[RLIMIT_RSS] = {"Max resident set", "bytes", "rss"},
+	[RLIMIT_NPROC] = {"Max processes", "processes", "nproc"},
+	[RLIMIT_NOFILE] = {"Max open files", "files", "nofile"},
+	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes", "memlock"},
+	[RLIMIT_AS] = {"Max address space", "bytes", "as"},
+	[RLIMIT_LOCKS] = {"Max file locks", "locks", "locks"},
+	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals", "sigpending"},
+	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes", "msgqueue"},
+	[RLIMIT_NICE] = {"Max nice priority", NULL, "nice"},
+	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL, "rtprio"},
+	[RLIMIT_RTTIME] = {"Max realtime timeout", "us", "rttime"},
 };
 
 /* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
 {
 	unsigned int i;
-	int count = 0;
 	unsigned long flags;
-	char *bufptr = buffer;
+	char *bufptr;
+	size_t bcount = 0;
+	size_t ccount = 0;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 
 	struct rlimit rlim[RLIM_NLIMITS];
 
+	bufptr = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!bufptr)
+		goto out;
+
 	if (!lock_task_sighand(task, &flags))
-		return 0;
+		goto out;
 	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 	unlock_task_sighand(task, &flags);
 
 	/*
 	 * print the file header
 	 */
-	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+	bcount += sprintf(&bufptr[bcount], "%-25s %-20s %-20s %-10s\n",
 			"Limit", "Soft Limit", "Hard Limit", "Units");
 
 	for (i = 0; i < RLIM_NLIMITS; i++) {
 		if (rlim[i].rlim_cur == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-25s %-20s ",
+			bcount += sprintf(&bufptr[bcount], "%-25s %-20s ",
 					 lnames[i].name, "unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-25s %-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-25s %-20lu ",
 					 lnames[i].name, rlim[i].rlim_cur);
 
 		if (rlim[i].rlim_max == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+			bcount += sprintf(&bufptr[bcount], "%-20s ",
+					 "unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-20lu ",
 					 rlim[i].rlim_max);
 
 		if (lnames[i].unit)
-			count += sprintf(&bufptr[count], "%-10s\n",
+			bcount += sprintf(&bufptr[bcount], "%-10s\n",
 					 lnames[i].unit);
 		else
-			count += sprintf(&bufptr[count], "\n");
+			bcount += sprintf(&bufptr[bcount], "\n");
+	}
+
+	if (*ppos >= bcount)
+		goto out_task;
+
+	ccount = min(count, (size_t)(bcount-(*ppos)));
+	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
+	*ppos += ccount;
+	kfree(bufptr);
+out_task:
+	put_task_struct(task);
+out:
+	return ccount;
+}
+
+static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	char *buffer;
+	char *element, *vmc, *vmm;
+	struct rlimit new_rlim;
+	unsigned long flags;
+	int i;
+	int index = -1;
+	size_t wcount = 0;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+
+
+	if (*ppos != 0)
+		goto out;
+
+	if (count > 128)
+		goto out;
+	buffer = kzalloc(128, GFP_KERNEL);
+
+	if (!buffer)
+		goto out;
+
+	element = kzalloc(sizeof(buffer), GFP_KERNEL);
+	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
+	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
+
+	if (!element || !vmm || !vmc)
+		goto out_free;
+
+	wcount = count - copy_from_user(buffer, buf, count);
+	if (wcount < count)
+		goto out_free;
+
+	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
+
+	if (i < 3)
+		goto out_free;
+
+	for (i = 0; i <= strlen(element); i++)
+		element[i] = tolower(element[i]);
+
+	if (!strncmp(vmc, "unlimited", 9))
+		new_rlim.rlim_cur = RLIM_INFINITY;
+	else
+		new_rlim.rlim_cur = simple_strtoull(vmc, NULL, 10);
+
+	if (!strncmp(vmm, "unlimited", 9))
+		new_rlim.rlim_max = RLIM_INFINITY;
+	else
+		new_rlim.rlim_max = simple_strtoull(vmm, NULL, 10);
+
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		if ((lnames[i].match) &&
+		    !strncmp(element, lnames[i].match,
+		     strlen(lnames[i].match))) {
+			index = i;
+			break;
+		}
 	}
 
+	if (!lock_task_sighand(task, &flags))
+		goto out_free;
+
+	if ((index >= 0) && (index < RLIM_NLIMITS))
+		do_setrlimit(index, &new_rlim, task);
+
+	unlock_task_sighand(task, &flags);
+
+out_free:
+	kfree(element);
+	kfree(vmc);
+	kfree(vmm);
+	kfree(buffer);
+out:
+	*ppos += count;
+	put_task_struct(task);
 	return count;
 }
 
+
+static const struct file_operations proc_limit_operations = {
+	.read           = proc_pid_limit_read,
+	.write		= proc_pid_limit_write,
+};
+
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 static int proc_pid_syscall(struct task_struct *task, char *buffer)
 {
@@ -2483,7 +2590,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("auxv",       S_IRUSR, proc_pid_auxv),
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	  S_IRUSR, proc_pid_limits),
+	REG("limits",	  S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
@@ -2822,7 +2929,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	 S_IRUSR, proc_pid_limits),
+	REG("limits",	 S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f3d74bd..6bcc125 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -633,6 +633,9 @@ struct signal_struct {
 #endif
 };
 
+extern int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+			struct task_struct *tsk);
+
 /* Context switch must be unlocked if interrupts are to be enabled */
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 # define __ARCH_WANT_UNLOCKED_CTXSW
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097..05bd22a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1236,41 +1236,41 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 
 #endif
 
-SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+		 struct task_struct *tsk)
 {
-	struct rlimit new_rlim, *old_rlim;
 	int retval;
+	struct rlimit *old_rlim;
 
-	if (resource >= RLIM_NLIMITS)
-		return -EINVAL;
-	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
-		return -EFAULT;
-	if (new_rlim.rlim_cur > new_rlim.rlim_max)
+
+	if (new_rlim->rlim_cur > new_rlim->rlim_max)
 		return -EINVAL;
-	old_rlim = current->signal->rlim + resource;
-	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
+	old_rlim = tsk->signal->rlim + resource;
+
+	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
 	    !capable(CAP_SYS_RESOURCE))
 		return -EPERM;
-	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+
+	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
 		return -EPERM;
 
-	retval = security_task_setrlimit(resource, &new_rlim);
+	retval = security_task_setrlimit(resource, new_rlim);
 	if (retval)
 		return retval;
 
-	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
+	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 		/*
 		 * The caller is asking for an immediate RLIMIT_CPU
 		 * expiry.  But we use the zero value to mean "it was
 		 * never set".  So let's cheat and make it one second
 		 * instead
 		 */
-		new_rlim.rlim_cur = 1;
+		new_rlim->rlim_cur = 1;
 	}
 
-	task_lock(current->group_leader);
-	*old_rlim = new_rlim;
-	task_unlock(current->group_leader);
+	task_lock(tsk->group_leader);
+	*old_rlim = *new_rlim;
+	task_unlock(tsk->group_leader);
 
 	if (resource != RLIMIT_CPU)
 		goto out;
@@ -1281,14 +1281,26 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	 * very long-standing error, and fixing it now risks breakage of
 	 * applications, so we live with it
 	 */
-	if (new_rlim.rlim_cur == RLIM_INFINITY)
+	if (new_rlim->rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	update_rlimit_cpu(new_rlim.rlim_cur);
+	update_rlimit_cpu(new_rlim->rlim_cur);
 out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+{
+	struct rlimit new_rlim;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	return do_setrlimit(resource, &new_rlim, current);
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH] proc: augment /proc/pid/limits to allow setting of process limits (v2).
  2009-09-29 20:25   ` [PATCH] proc: augment /proc/pid/limits to allow setting of process limits (v2) Neil Horman
@ 2009-09-29 20:46     ` Andrew Morton
  2009-09-30  0:59       ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Andrew Morton @ 2009-09-29 20:46 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel

On Tue, 29 Sep 2009 16:25:04 -0400
Neil Horman <nhorman@tuxdriver.com> wrote:

> 3) modify the proc/pid/limits write routine so that it uses do_setrlimit,
> thereby giving us the previously missing security checks.

I dunno, the interface just seems goofy to me.

Yes, it's always been strange that rlimits cannot be externally
altered.  And desirable to extend that.  But doing what is really a
syscall via a profs poke when there already exists a syscall which does
the same thing seems Just Wrong.

What reason is there to do it via procfs?  Where's the benefit?

Maybe it's a plot to stop people from setting CONFIG_PROC_FS=n.

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH] proc: augment /proc/pid/limits to allow setting of process limits (v2).
  2009-09-29 20:46     ` Andrew Morton
@ 2009-09-30  0:59       ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-09-30  0:59 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Tue, Sep 29, 2009 at 01:46:03PM -0700, Andrew Morton wrote:
> On Tue, 29 Sep 2009 16:25:04 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > 3) modify the proc/pid/limits write routine so that it uses do_setrlimit,
> > thereby giving us the previously missing security checks.
> 
> I dunno, the interface just seems goofy to me.
> 
Well, I hear what your saying, although conversely, but it seems a bit goofy to
me to allocate another syscall number just to do what sys_setrlimit does, but
with the addition of a pid specification.  I know theres precedent to do it, but
it seems no less goofy to me to do it that way than via a proc write.

> Yes, it's always been strange that rlimits cannot be externally
> altered.  And desirable to extend that.  But doing what is really a
> syscall via a profs poke when there already exists a syscall which does
> the same thing seems Just Wrong.
> 
Again, I understand what your saying, but to draw a parallel, the reason
/proc/pid/limits exists in the first place is because there was a desire to know
what the rlimit values were for a process from external contexts.  We could have
done this by creating a new syscall, and modifying the ulimit utility to accept
the data needed to support that new syscall.  But we didn't do that, we made a
proc file.  This just seems like the natural extension to that file, in my view.
With it, we don't need to create a new utility, or extend ulimit to make it all
work.

> What reason is there to do it via procfs?  Where's the benefit?
> 
Ease of use really.  With this interface, we can use cat/echo/etc to do
administrative control of process limits.  No need to extend ulimit, or create a
new utility.

> Maybe it's a plot to stop people from setting CONFIG_PROC_FS=n.
> 
hmm, so I'm looking at the patch.  90% of the work is done for a new syscall.
If you're adamant that you'd rather see it that way, how would you feel about a
both option?  I can write a follow on patch that creates a do_getrlimit, and
creates 2 new syscalls (to get/set rlimits for a specified pid).  Then we take
this patch as is, as well as the new patch, and we can access limits either way,
programatically via the syscalls, or from a high sysadmin interface via the proc
file.  Thoughts?

Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-09-28 20:06 [PATCH] proc: augment /proc/pid/limits to allow setting of process limits Neil Horman
  2009-09-28 22:44 ` Andrew Morton
@ 2009-10-01 17:15 ` Neil Horman
  2009-10-01 17:16   ` [PATCH 1/3] " Neil Horman
                     ` (4 more replies)
  1 sibling, 5 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-01 17:15 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, nhorman

Ok, heres attempt number 3, in response to your thoughts and concerns Andrew.

Change Notes:
1) Kept ability to set/get rlimits via /proc/pid/limits as in the previous
patches

2) Augmented the patchset with 2 new syscall defines, getprlimit and setprlimit,
which take a pid_t as an additional argument, allowing a user to specify a
process id for the rlimits in question.  The core code for setting rlimits is
shared with the previous patch, and is in do_setrlimit, so that all the security
checks and such remain unaltered.

3) Add syscall numbers to asm-generic/unistd.h so that any arch using the
generic syscall code picks up these new syscalls.  I figure I don't have access
to systems that don't use that code, so I should probably leave those changes up
to the arch maintainers.


Summary

Its been requested often that we have the ability to read and modify process
rlimit values from contexts external to the owning process.  Ideally this allows
sysadmins to adjust rlimits on long running processes wihout the need to stop
and restart those processes, which incurs undesireable downtime.  This patch
enables that functionality,  It does so in two places.  First it enables process
limit setting by writing to the /proc/pid/limits file a string in the format:
<limit> <current limit> <max limit> > /proc/<pid>/limits
where limit is one of
[as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]

Secondly it allows for programatic setting of these limits via 2 new syscalls,
getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
setrlimit respectively, except that they except a process id as an extra
argument, to specify the process id of the rlimit values that you wish to
read/write

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-10-01 17:15 ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3) Neil Horman
@ 2009-10-01 17:16   ` Neil Horman
  2009-10-04 12:14     ` Marcin Slusarz
  2009-10-04 20:30     ` Marcin Slusarz
  2009-10-01 17:21   ` [PATCH 2/3] " Neil Horman
                     ` (3 subsequent siblings)
  4 siblings, 2 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-01 17:16 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, nhorman

Augment /proc/<pid>/limits file to support limit setting

It was suggested to me recently that we support a mechanism by which we can set
various process limits from points external to the process.  The reasoning being
that some processes are very long lived, and it would be beneficial to these
long lived processes if we could modify their various limits without needing to
kill them, adjust the limits for the user and restarting them.  While individual
application can certainly export this control on their own, it would be nice if
such functionality were available to a sysadmin, without needing to have each
application re-invent the wheel.

As such, I've implemented the below patch, which makes /proc/pid/limits writable
for each process.  By writing the following format:
<limit> <current value> <max value>
to the limits file, an administrator can now dynamically change the limits for
the respective process.  Tested by myself with good results.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 fs/proc/base.c        |  165 +++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sched.h |    3 
 kernel/sys.c          |   48 +++++++++-----
 3 files changed, 169 insertions(+), 47 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6..2f05799 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -49,6 +49,8 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/string.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
@@ -455,72 +457,177 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 struct limit_names {
 	char *name;
 	char *unit;
+	char *match;
 };
 
 static const struct limit_names lnames[RLIM_NLIMITS] = {
-	[RLIMIT_CPU] = {"Max cpu time", "ms"},
-	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
-	[RLIMIT_DATA] = {"Max data size", "bytes"},
-	[RLIMIT_STACK] = {"Max stack size", "bytes"},
-	[RLIMIT_CORE] = {"Max core file size", "bytes"},
-	[RLIMIT_RSS] = {"Max resident set", "bytes"},
-	[RLIMIT_NPROC] = {"Max processes", "processes"},
-	[RLIMIT_NOFILE] = {"Max open files", "files"},
-	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
-	[RLIMIT_AS] = {"Max address space", "bytes"},
-	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
-	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
-	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
-	[RLIMIT_NICE] = {"Max nice priority", NULL},
-	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
-	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
+	[RLIMIT_CPU] = {"Max cpu time", "ms", "cpu"},
+	[RLIMIT_FSIZE] = {"Max file size", "bytes", "fsize"},
+	[RLIMIT_DATA] = {"Max data size", "bytes", "data"},
+	[RLIMIT_STACK] = {"Max stack size", "bytes", "stack"},
+	[RLIMIT_CORE] = {"Max core file size", "bytes", "core"},
+	[RLIMIT_RSS] = {"Max resident set", "bytes", "rss"},
+	[RLIMIT_NPROC] = {"Max processes", "processes", "nproc"},
+	[RLIMIT_NOFILE] = {"Max open files", "files", "nofile"},
+	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes", "memlock"},
+	[RLIMIT_AS] = {"Max address space", "bytes", "as"},
+	[RLIMIT_LOCKS] = {"Max file locks", "locks", "locks"},
+	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals", "sigpending"},
+	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes", "msgqueue"},
+	[RLIMIT_NICE] = {"Max nice priority", NULL, "nice"},
+	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL, "rtprio"},
+	[RLIMIT_RTTIME] = {"Max realtime timeout", "us", "rttime"},
 };
 
 /* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
 {
 	unsigned int i;
-	int count = 0;
 	unsigned long flags;
-	char *bufptr = buffer;
+	char *bufptr;
+	size_t bcount = 0;
+	size_t ccount = 0;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 
 	struct rlimit rlim[RLIM_NLIMITS];
 
+	bufptr = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!bufptr)
+		goto out;
+
 	if (!lock_task_sighand(task, &flags))
-		return 0;
+		goto out;
 	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 	unlock_task_sighand(task, &flags);
 
 	/*
 	 * print the file header
 	 */
-	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+	bcount += sprintf(&bufptr[bcount], "%-25s %-20s %-20s %-10s\n",
 			"Limit", "Soft Limit", "Hard Limit", "Units");
 
 	for (i = 0; i < RLIM_NLIMITS; i++) {
 		if (rlim[i].rlim_cur == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-25s %-20s ",
+			bcount += sprintf(&bufptr[bcount], "%-25s %-20s ",
 					 lnames[i].name, "unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-25s %-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-25s %-20lu ",
 					 lnames[i].name, rlim[i].rlim_cur);
 
 		if (rlim[i].rlim_max == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+			bcount += sprintf(&bufptr[bcount], "%-20s ",
+					 "unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-20lu ",
 					 rlim[i].rlim_max);
 
 		if (lnames[i].unit)
-			count += sprintf(&bufptr[count], "%-10s\n",
+			bcount += sprintf(&bufptr[bcount], "%-10s\n",
 					 lnames[i].unit);
 		else
-			count += sprintf(&bufptr[count], "\n");
+			bcount += sprintf(&bufptr[bcount], "\n");
+	}
+
+	if (*ppos >= bcount)
+		goto out_task;
+
+	ccount = min(count, (size_t)(bcount-(*ppos)));
+	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
+	*ppos += ccount;
+	kfree(bufptr);
+out_task:
+	put_task_struct(task);
+out:
+	return ccount;
+}
+
+static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	char *buffer;
+	char *element, *vmc, *vmm;
+	struct rlimit new_rlim;
+	unsigned long flags;
+	int i;
+	int index = -1;
+	size_t wcount = 0;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+
+
+	if (*ppos != 0)
+		goto out;
+
+	if (count > 128)
+		goto out;
+	buffer = kzalloc(128, GFP_KERNEL);
+
+	if (!buffer)
+		goto out;
+
+	element = kzalloc(sizeof(buffer), GFP_KERNEL);
+	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
+	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
+
+	if (!element || !vmm || !vmc)
+		goto out_free;
+
+	wcount = count - copy_from_user(buffer, buf, count);
+	if (wcount < count)
+		goto out_free;
+
+	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
+
+	if (i < 3)
+		goto out_free;
+
+	for (i = 0; i <= strlen(element); i++)
+		element[i] = tolower(element[i]);
+
+	if (!strncmp(vmc, "unlimited", 9))
+		new_rlim.rlim_cur = RLIM_INFINITY;
+	else
+		new_rlim.rlim_cur = simple_strtoull(vmc, NULL, 10);
+
+	if (!strncmp(vmm, "unlimited", 9))
+		new_rlim.rlim_max = RLIM_INFINITY;
+	else
+		new_rlim.rlim_max = simple_strtoull(vmm, NULL, 10);
+
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		if ((lnames[i].match) &&
+		    !strncmp(element, lnames[i].match,
+		     strlen(lnames[i].match))) {
+			index = i;
+			break;
+		}
 	}
 
+	if (!lock_task_sighand(task, &flags))
+		goto out_free;
+
+	if ((index >= 0) && (index < RLIM_NLIMITS))
+		do_setrlimit(index, &new_rlim, task);
+
+	unlock_task_sighand(task, &flags);
+
+out_free:
+	kfree(element);
+	kfree(vmc);
+	kfree(vmm);
+	kfree(buffer);
+out:
+	*ppos += count;
+	put_task_struct(task);
 	return count;
 }
 
+
+static const struct file_operations proc_limit_operations = {
+	.read           = proc_pid_limit_read,
+	.write		= proc_pid_limit_write,
+};
+
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 static int proc_pid_syscall(struct task_struct *task, char *buffer)
 {
@@ -2483,7 +2590,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("auxv",       S_IRUSR, proc_pid_auxv),
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	  S_IRUSR, proc_pid_limits),
+	REG("limits",	  S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
@@ -2822,7 +2929,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	 S_IRUSR, proc_pid_limits),
+	REG("limits",	 S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f1ea4a..cada5d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -631,6 +631,9 @@ struct signal_struct {
 #endif
 };
 
+extern int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+			struct task_struct *tsk);
+
 /* Context switch must be unlocked if interrupts are to be enabled */
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 # define __ARCH_WANT_UNLOCKED_CTXSW
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097..05bd22a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1236,41 +1236,41 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 
 #endif
 
-SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+		 struct task_struct *tsk)
 {
-	struct rlimit new_rlim, *old_rlim;
 	int retval;
+	struct rlimit *old_rlim;
 
-	if (resource >= RLIM_NLIMITS)
-		return -EINVAL;
-	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
-		return -EFAULT;
-	if (new_rlim.rlim_cur > new_rlim.rlim_max)
+
+	if (new_rlim->rlim_cur > new_rlim->rlim_max)
 		return -EINVAL;
-	old_rlim = current->signal->rlim + resource;
-	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
+	old_rlim = tsk->signal->rlim + resource;
+
+	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
 	    !capable(CAP_SYS_RESOURCE))
 		return -EPERM;
-	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+
+	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
 		return -EPERM;
 
-	retval = security_task_setrlimit(resource, &new_rlim);
+	retval = security_task_setrlimit(resource, new_rlim);
 	if (retval)
 		return retval;
 
-	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
+	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 		/*
 		 * The caller is asking for an immediate RLIMIT_CPU
 		 * expiry.  But we use the zero value to mean "it was
 		 * never set".  So let's cheat and make it one second
 		 * instead
 		 */
-		new_rlim.rlim_cur = 1;
+		new_rlim->rlim_cur = 1;
 	}
 
-	task_lock(current->group_leader);
-	*old_rlim = new_rlim;
-	task_unlock(current->group_leader);
+	task_lock(tsk->group_leader);
+	*old_rlim = *new_rlim;
+	task_unlock(tsk->group_leader);
 
 	if (resource != RLIMIT_CPU)
 		goto out;
@@ -1281,14 +1281,26 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	 * very long-standing error, and fixing it now risks breakage of
 	 * applications, so we live with it
 	 */
-	if (new_rlim.rlim_cur == RLIM_INFINITY)
+	if (new_rlim->rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	update_rlimit_cpu(new_rlim.rlim_cur);
+	update_rlimit_cpu(new_rlim->rlim_cur);
 out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+{
+	struct rlimit new_rlim;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	return do_setrlimit(resource, &new_rlim, current);
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-10-01 17:15 ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3) Neil Horman
  2009-10-01 17:16   ` [PATCH 1/3] " Neil Horman
@ 2009-10-01 17:21   ` Neil Horman
  2009-10-01 17:22   ` [PATCH 3/3] " Neil Horman
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-01 17:21 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, nhorman

Add syscall infrastructure for getprlimit/setprlimit

This patch adds the definitions for the get/setprlimit syscalls.  They are
identical to the get/setlimit calls, except that they allow the caller to
manipulate limits for processes other than themselves.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 include/linux/syscalls.h |    4 ++
 kernel/sys.c             |   84 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 80de700..535210a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -584,11 +584,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long	sys_setprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
 asmlinkage long sys_umask(int mask);
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 05bd22a..4fe1140 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1211,6 +1211,50 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	}
 }
 
+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	unsigned long flags;
+	struct task_struct *tsk;
+	struct pid *ppid;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EBUSY;
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	else {
+		struct rlimit val;
+
+		task_lock(tsk->group_leader);
+		val = current->signal->rlim[resource];
+		task_unlock(tsk->group_leader);
+		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
+	}
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
+
 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
 
 /*
@@ -1301,6 +1345,46 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	return do_setrlimit(resource, &new_rlim, current);
 }
 
+SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	struct task_struct *tsk;
+	struct pid *ppid;
+	unsigned long flags;
+	struct rlimit new_rlim;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EFAULT;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		goto out_put_all;
+
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	retval = do_setrlimit(resource, &new_rlim, tsk);
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 3/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-10-01 17:15 ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3) Neil Horman
  2009-10-01 17:16   ` [PATCH 1/3] " Neil Horman
  2009-10-01 17:21   ` [PATCH 2/3] " Neil Horman
@ 2009-10-01 17:22   ` Neil Horman
  2009-10-05  0:26   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v4) Neil Horman
  2009-10-12 16:13   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Neil Horman
  4 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-01 17:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, nhorman

Define __NR_getprlimit and __NR_setprlimit syscalls for asm-generic
    
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 unistd.h |    7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)


diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 1125e5a..3321e4f 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -623,8 +623,13 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_counter_open 241
 __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
 
+#define __NR_getprlimit 242
+__SYSCALL(__NR_getprlimit, sys_getprlimit)
+#define __NR_setprlimit 243
+__SYSCALL(__NR_setprlimit, sys_setprlimit)
+
 #undef __NR_syscalls
-#define __NR_syscalls 242
+#define __NR_syscalls 244
 
 /*
  * All syscalls below here should go away really,

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-10-01 17:16   ` [PATCH 1/3] " Neil Horman
@ 2009-10-04 12:14     ` Marcin Slusarz
  2009-10-04 16:50       ` Neil Horman
  2009-10-04 20:30     ` Marcin Slusarz
  1 sibling, 1 reply; 107+ messages in thread
From: Marcin Slusarz @ 2009-10-04 12:14 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, akpm

Neil Horman wrote:
> Augment /proc/<pid>/limits file to support limit setting
>(...)
>  /* Display limits for a process */
> -static int proc_pid_limits(struct task_struct *task, char *buffer)
> +static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
> +			size_t count, loff_t *ppos)
>  {
>  	unsigned int i;
> -	int count = 0;
>  	unsigned long flags;
> -	char *bufptr = buffer;
> +	char *bufptr;
> +	size_t bcount = 0;
> +	size_t ccount = 0;
> +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
>  
>  	struct rlimit rlim[RLIM_NLIMITS];
>  
> +	bufptr = kzalloc(PAGE_SIZE, GFP_KERNEL);
> +	if (!bufptr)
> +		goto out;
> +
>  	if (!lock_task_sighand(task, &flags))
> -		return 0;
> +		goto out;

memory leak (bufptr)

>  	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
>  	unlock_task_sighand(task, &flags);
>  
>  	/*
>  	 * print the file header
>  	 */
> -	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
> +	bcount += sprintf(&bufptr[bcount], "%-25s %-20s %-20s %-10s\n",
>  			"Limit", "Soft Limit", "Hard Limit", "Units");
>  
>  	for (i = 0; i < RLIM_NLIMITS; i++) {
>  		if (rlim[i].rlim_cur == RLIM_INFINITY)
> -			count += sprintf(&bufptr[count], "%-25s %-20s ",
> +			bcount += sprintf(&bufptr[bcount], "%-25s %-20s ",
>  					 lnames[i].name, "unlimited");
>  		else
> -			count += sprintf(&bufptr[count], "%-25s %-20lu ",
> +			bcount += sprintf(&bufptr[bcount], "%-25s %-20lu ",
>  					 lnames[i].name, rlim[i].rlim_cur);
>  
>  		if (rlim[i].rlim_max == RLIM_INFINITY)
> -			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
> +			bcount += sprintf(&bufptr[bcount], "%-20s ",
> +					 "unlimited");
>  		else
> -			count += sprintf(&bufptr[count], "%-20lu ",
> +			bcount += sprintf(&bufptr[bcount], "%-20lu ",
>  					 rlim[i].rlim_max);
>  
>  		if (lnames[i].unit)
> -			count += sprintf(&bufptr[count], "%-10s\n",
> +			bcount += sprintf(&bufptr[bcount], "%-10s\n",
>  					 lnames[i].unit);
>  		else
> -			count += sprintf(&bufptr[count], "\n");
> +			bcount += sprintf(&bufptr[bcount], "\n");
> +	}
> +
> +	if (*ppos >= bcount)
> +		goto out_task;

again

> +
> +	ccount = min(count, (size_t)(bcount-(*ppos)));
> +	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
> +	*ppos += ccount;
> +	kfree(bufptr);
> +out_task:
> +	put_task_struct(task);
> +out:
> +	return ccount;
> +}
> +
> +static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
> +		size_t count, loff_t *ppos)
> +{
> +	char *buffer;
> +	char *element, *vmc, *vmm;
> +	struct rlimit new_rlim;
> +	unsigned long flags;
> +	int i;
> +	int index = -1;
> +	size_t wcount = 0;
> +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> +
> +
> +	if (*ppos != 0)
> +		goto out;
> +
> +	if (count > 128)
> +		goto out;
> +	buffer = kzalloc(128, GFP_KERNEL);
> +
> +	if (!buffer)
> +		goto out;

element, vmc, vmm are not initialized and you kfree them in this case

> +
> +	element = kzalloc(sizeof(buffer), GFP_KERNEL);
> +	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
> +	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);

sizeof(buffer) == 4 or 8 - pretty short buffer...

> +
> +	if (!element || !vmm || !vmc)
> +		goto out_free;
> +
> +	wcount = count - copy_from_user(buffer, buf, count);
> +	if (wcount < count)
> +		goto out_free;

copy_from_user usage usually looks like:
if (copy_from_user()) {
	ret = -EFAULT;
	goto err;
}
you don't seem to use copy_from_user return value for anything useful

> +
> +	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);

what if user pass strings longer than size of buffers?

> +
> +	if (i < 3)
> +		goto out_free;
> +
> +	for (i = 0; i <= strlen(element); i++)
> +		element[i] = tolower(element[i]);

it's harmless, but should be "i < strlen"

> +
> +	if (!strncmp(vmc, "unlimited", 9))
> +		new_rlim.rlim_cur = RLIM_INFINITY;
> +	else
> +		new_rlim.rlim_cur = simple_strtoull(vmc, NULL, 10);
> +
> +	if (!strncmp(vmm, "unlimited", 9))
> +		new_rlim.rlim_max = RLIM_INFINITY;
> +	else
> +		new_rlim.rlim_max = simple_strtoull(vmm, NULL, 10);
> +
> +	for (i = 0; i < RLIM_NLIMITS; i++) {
> +		if ((lnames[i].match) &&
> +		    !strncmp(element, lnames[i].match,
> +		     strlen(lnames[i].match))) {
> +			index = i;
> +			break;
> +		}
>  	}
>  
> +	if (!lock_task_sighand(task, &flags))
> +		goto out_free;
> +
> +	if ((index >= 0) && (index < RLIM_NLIMITS))
> +		do_setrlimit(index, &new_rlim, task);
> +
> +	unlock_task_sighand(task, &flags);
> +
> +out_free:
> +	kfree(element);
> +	kfree(vmc);
> +	kfree(vmm);
> +	kfree(buffer);
> +out:
> +	*ppos += count;
> +	put_task_struct(task);
>  	return count;
>  }
>  
> (...)

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-10-04 12:14     ` Marcin Slusarz
@ 2009-10-04 16:50       ` Neil Horman
  2009-10-04 20:04         ` Marcin Slusarz
  0 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-10-04 16:50 UTC (permalink / raw)
  To: Marcin Slusarz; +Cc: linux-kernel, akpm

On Sun, Oct 04, 2009 at 02:14:49PM +0200, Marcin Slusarz wrote:
> Neil Horman wrote:
> > Augment /proc/<pid>/limits file to support limit setting
> >(...)
> >  /* Display limits for a process */
> > -static int proc_pid_limits(struct task_struct *task, char *buffer)
> > +static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
> > +			size_t count, loff_t *ppos)
> >  {
> >  	unsigned int i;
> > -	int count = 0;
> >  	unsigned long flags;
> > -	char *bufptr = buffer;
> > +	char *bufptr;
> > +	size_t bcount = 0;
> > +	size_t ccount = 0;
> > +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> >  
> >  	struct rlimit rlim[RLIM_NLIMITS];
> >  
> > +	bufptr = kzalloc(PAGE_SIZE, GFP_KERNEL);
> > +	if (!bufptr)
> > +		goto out;
> > +
> >  	if (!lock_task_sighand(task, &flags))
> > -		return 0;
> > +		goto out;
> 
> memory leak (bufptr)
> 
Gah, thanks!

> >  	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
> >  	unlock_task_sighand(task, &flags);
> >  
> >  	/*
> >  	 * print the file header
> >  	 */
> > -	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
> > +	bcount += sprintf(&bufptr[bcount], "%-25s %-20s %-20s %-10s\n",
> >  			"Limit", "Soft Limit", "Hard Limit", "Units");
> >  
> >  	for (i = 0; i < RLIM_NLIMITS; i++) {
> >  		if (rlim[i].rlim_cur == RLIM_INFINITY)
> > -			count += sprintf(&bufptr[count], "%-25s %-20s ",
> > +			bcount += sprintf(&bufptr[bcount], "%-25s %-20s ",
> >  					 lnames[i].name, "unlimited");
> >  		else
> > -			count += sprintf(&bufptr[count], "%-25s %-20lu ",
> > +			bcount += sprintf(&bufptr[bcount], "%-25s %-20lu ",
> >  					 lnames[i].name, rlim[i].rlim_cur);
> >  
> >  		if (rlim[i].rlim_max == RLIM_INFINITY)
> > -			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
> > +			bcount += sprintf(&bufptr[bcount], "%-20s ",
> > +					 "unlimited");
> >  		else
> > -			count += sprintf(&bufptr[count], "%-20lu ",
> > +			bcount += sprintf(&bufptr[bcount], "%-20lu ",
> >  					 rlim[i].rlim_max);
> >  
> >  		if (lnames[i].unit)
> > -			count += sprintf(&bufptr[count], "%-10s\n",
> > +			bcount += sprintf(&bufptr[bcount], "%-10s\n",
> >  					 lnames[i].unit);
> >  		else
> > -			count += sprintf(&bufptr[count], "\n");
> > +			bcount += sprintf(&bufptr[bcount], "\n");
> > +	}
> > +
> > +	if (*ppos >= bcount)
> > +		goto out_task;
> 
> again
> 
ditto.

> > +
> > +	ccount = min(count, (size_t)(bcount-(*ppos)));
> > +	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
> > +	*ppos += ccount;
> > +	kfree(bufptr);
> > +out_task:
> > +	put_task_struct(task);
> > +out:
> > +	return ccount;
> > +}
> > +
> > +static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
> > +		size_t count, loff_t *ppos)
> > +{
> > +	char *buffer;
> > +	char *element, *vmc, *vmm;
> > +	struct rlimit new_rlim;
> > +	unsigned long flags;
> > +	int i;
> > +	int index = -1;
> > +	size_t wcount = 0;
> > +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> > +
> > +
> > +	if (*ppos != 0)
> > +		goto out;
> > +
> > +	if (count > 128)
> > +		goto out;
> > +	buffer = kzalloc(128, GFP_KERNEL);
> > +
> > +	if (!buffer)
> > +		goto out;
> 
> element, vmc, vmm are not initialized and you kfree them in this case
> 
Yep, I'll fix that

> > +
> > +	element = kzalloc(sizeof(buffer), GFP_KERNEL);
> > +	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
> > +	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
> 
> sizeof(buffer) == 4 or 8 - pretty short buffer...
> 
> > +
> > +	if (!element || !vmm || !vmc)
> > +		goto out_free;
> > +
> > +	wcount = count - copy_from_user(buffer, buf, count);
> > +	if (wcount < count)
> > +		goto out_free;
> 
> copy_from_user usage usually looks like:
> if (copy_from_user()) {
> 	ret = -EFAULT;
> 	goto err;
> }
> you don't seem to use copy_from_user return value for anything useful
> 
I did at one point, a few versions ago, that can likely be removed now.

> > +
> > +	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
> 
> what if user pass strings longer than size of buffers?
> 
You fail the write, theres a check at the top of the function for that.  By the
time you get here, buffer is guaranteed to be no more than 128 bytes.


> > +
> > +	if (i < 3)
> > +		goto out_free;
> > +
> > +	for (i = 0; i <= strlen(element); i++)
> > +		element[i] = tolower(element[i]);
> 
> it's harmless, but should be "i < strlen"
> 
Yeah, I guess we don't need a lower case \0 :)

> > +
> > +	if (!strncmp(vmc, "unlimited", 9))
> > +		new_rlim.rlim_cur = RLIM_INFINITY;
> > +	else
> > +		new_rlim.rlim_cur = simple_strtoull(vmc, NULL, 10);
> > +
> > +	if (!strncmp(vmm, "unlimited", 9))
> > +		new_rlim.rlim_max = RLIM_INFINITY;
> > +	else
> > +		new_rlim.rlim_max = simple_strtoull(vmm, NULL, 10);
> > +
> > +	for (i = 0; i < RLIM_NLIMITS; i++) {
> > +		if ((lnames[i].match) &&
> > +		    !strncmp(element, lnames[i].match,
> > +		     strlen(lnames[i].match))) {
> > +			index = i;
> > +			break;
> > +		}
> >  	}
> >  
> > +	if (!lock_task_sighand(task, &flags))
> > +		goto out_free;
> > +
> > +	if ((index >= 0) && (index < RLIM_NLIMITS))
> > +		do_setrlimit(index, &new_rlim, task);
> > +
> > +	unlock_task_sighand(task, &flags);
> > +
> > +out_free:
> > +	kfree(element);
> > +	kfree(vmc);
> > +	kfree(vmm);
> > +	kfree(buffer);
> > +out:
> > +	*ppos += count;
> > +	put_task_struct(task);
> >  	return count;
> >  }
> >  
> > (...)
> 


I'll make these corrections and repost.  Thanks!
Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-10-04 16:50       ` Neil Horman
@ 2009-10-04 20:04         ` Marcin Slusarz
  2009-10-04 23:10           ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Marcin Slusarz @ 2009-10-04 20:04 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, akpm

Neil Horman wrote:
>>> +static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
>>> +		size_t count, loff_t *ppos)
>>> +{
>>> +	char *buffer;
>>> +	char *element, *vmc, *vmm;
>>> +	struct rlimit new_rlim;
>>> +	unsigned long flags;
>>> +	int i;
>>> +	int index = -1;
>>> +	size_t wcount = 0;
>>> +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
>>> +
>>> +
>>> +	if (*ppos != 0)
>>> +		goto out;
>>> +
>>> +	if (count > 128)
>>> +		goto out;
>>> +	buffer = kzalloc(128, GFP_KERNEL);
>>> +
>>> +	if (!buffer)
>>> +		goto out;
>> element, vmc, vmm are not initialized and you kfree them in this case
>>
> Yep, I'll fix that
> 
>>> +
>>> +	element = kzalloc(sizeof(buffer), GFP_KERNEL);
>>> +	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
>>> +	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
>> sizeof(buffer) == 4 or 8 - pretty short buffer...
>>
>>> +
>>> +	if (!element || !vmm || !vmc)
>>> +		goto out_free;
>>> +
>>> +	wcount = count - copy_from_user(buffer, buf, count);
>>> +	if (wcount < count)
>>> +		goto out_free;
>> copy_from_user usage usually looks like:
>> if (copy_from_user()) {
>> 	ret = -EFAULT;
>> 	goto err;
>> }
>> you don't seem to use copy_from_user return value for anything useful
>>
> I did at one point, a few versions ago, that can likely be removed now.
> 
>>> +
>>> +	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
>> what if user pass strings longer than size of buffers?
>>
> You fail the write, theres a check at the top of the function for that.  By the
> time you get here, buffer is guaranteed to be no more than 128 bytes.

But you allocated only 4/8 bytes (depending on size of void*) for element, vmc, vmm.
It will overflow for string like "xxxxxxxxxxxxxxxxxxx y z".

Marcin

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-10-01 17:16   ` [PATCH 1/3] " Neil Horman
  2009-10-04 12:14     ` Marcin Slusarz
@ 2009-10-04 20:30     ` Marcin Slusarz
  1 sibling, 0 replies; 107+ messages in thread
From: Marcin Slusarz @ 2009-10-04 20:30 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, akpm

Neil Horman wrote:
> +static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
> +		size_t count, loff_t *ppos)
> +{
> +	char *buffer;
> +	char *element, *vmc, *vmm;
> +	struct rlimit new_rlim;
> +	unsigned long flags;
> +	int i;
> +	int index = -1;
> +	size_t wcount = 0;
> +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> +
> +
> +	if (*ppos != 0)
> +		goto out;
> +
> +	if (count > 128)
> +		goto out;

Shouldn't proc_pid_limit_write return 0 or some error in this case?
Right now it silently fails...

> +	buffer = kzalloc(128, GFP_KERNEL);
> +
> +	if (!buffer)
> +		goto out;
> +
> +	element = kzalloc(sizeof(buffer), GFP_KERNEL);
> +	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
> +	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
> +
> +	if (!element || !vmm || !vmc)
> +		goto out_free;
> +
> +	wcount = count - copy_from_user(buffer, buf, count);
> +	if (wcount < count)
> +		goto out_free;
> +
> +	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
> +
> +	if (i < 3)
> +		goto out_free;
> +
> +	for (i = 0; i <= strlen(element); i++)
> +		element[i] = tolower(element[i]);
> +
> +	if (!strncmp(vmc, "unlimited", 9))
> +		new_rlim.rlim_cur = RLIM_INFINITY;
> +	else
> +		new_rlim.rlim_cur = simple_strtoull(vmc, NULL, 10);
> +
> +	if (!strncmp(vmm, "unlimited", 9))
> +		new_rlim.rlim_max = RLIM_INFINITY;
> +	else
> +		new_rlim.rlim_max = simple_strtoull(vmm, NULL, 10);
> +
> +	for (i = 0; i < RLIM_NLIMITS; i++) {
> +		if ((lnames[i].match) &&
> +		    !strncmp(element, lnames[i].match,
> +		     strlen(lnames[i].match))) {
> +			index = i;
> +			break;
> +		}
>  	}
>  
> +	if (!lock_task_sighand(task, &flags))
> +		goto out_free;
> +
> +	if ((index >= 0) && (index < RLIM_NLIMITS))
> +		do_setrlimit(index, &new_rlim, task);

Another 2 silent failures:
- when user passed wrong name
- when do_setrlimit failed

> +
> +	unlock_task_sighand(task, &flags);
> +
> +out_free:
> +	kfree(element);
> +	kfree(vmc);
> +	kfree(vmm);
> +	kfree(buffer);
> +out:
> +	*ppos += count;
> +	put_task_struct(task);
>  	return count;
>  }
>  

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v3)
  2009-10-04 20:04         ` Marcin Slusarz
@ 2009-10-04 23:10           ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-04 23:10 UTC (permalink / raw)
  To: Marcin Slusarz; +Cc: linux-kernel, akpm

On Sun, Oct 04, 2009 at 10:04:20PM +0200, Marcin Slusarz wrote:
> Neil Horman wrote:
> >>> +static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
> >>> +		size_t count, loff_t *ppos)
> >>> +{
> >>> +	char *buffer;
> >>> +	char *element, *vmc, *vmm;
> >>> +	struct rlimit new_rlim;
> >>> +	unsigned long flags;
> >>> +	int i;
> >>> +	int index = -1;
> >>> +	size_t wcount = 0;
> >>> +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> >>> +
> >>> +
> >>> +	if (*ppos != 0)
> >>> +		goto out;
> >>> +
> >>> +	if (count > 128)
> >>> +		goto out;
> >>> +	buffer = kzalloc(128, GFP_KERNEL);
> >>> +
> >>> +	if (!buffer)
> >>> +		goto out;
> >> element, vmc, vmm are not initialized and you kfree them in this case
> >>
> > Yep, I'll fix that
> > 
> >>> +
> >>> +	element = kzalloc(sizeof(buffer), GFP_KERNEL);
> >>> +	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
> >>> +	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
> >> sizeof(buffer) == 4 or 8 - pretty short buffer...
> >>
> >>> +
> >>> +	if (!element || !vmm || !vmc)
> >>> +		goto out_free;
> >>> +
> >>> +	wcount = count - copy_from_user(buffer, buf, count);
> >>> +	if (wcount < count)
> >>> +		goto out_free;
> >> copy_from_user usage usually looks like:
> >> if (copy_from_user()) {
> >> 	ret = -EFAULT;
> >> 	goto err;
> >> }
> >> you don't seem to use copy_from_user return value for anything useful
> >>
> > I did at one point, a few versions ago, that can likely be removed now.
> > 
> >>> +
> >>> +	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
> >> what if user pass strings longer than size of buffers?
> >>
> > You fail the write, theres a check at the top of the function for that.  By the
> > time you get here, buffer is guaranteed to be no more than 128 bytes.
> 
Crud, you're right, that only works with static arrays, I'll fix that up.
Thanks!
> 

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v4)
  2009-10-01 17:15 ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3) Neil Horman
                     ` (2 preceding siblings ...)
  2009-10-01 17:22   ` [PATCH 3/3] " Neil Horman
@ 2009-10-05  0:26   ` Neil Horman
  2009-10-05  0:53     ` [PATCH 1/3] " Neil Horman
                       ` (2 more replies)
  2009-10-12 16:13   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Neil Horman
  4 siblings, 3 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-05  0:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz

Ok, heres attempt number 4, in response to new new set of notes.
 
Change Notes:

1) Cleaned up the memory leaks

2) Added error returns

3) Fixed buffer allocation sizes

Summary
 
Its been requested often that we have the ability to read and modify process
rlimit values from contexts external to the owning process.  Ideally this allows
sysadmins to adjust rlimits on long running processes wihout the need to stop
and restart those processes, which incurs undesireable downtime.  This patch
enables that functionality,  It does so in two places.  First it enables process
limit setting by writing to the /proc/pid/limits file a string in the format:
<limit> <current limit> <max limit> > /proc/<pid>/limits
where limit is one of
[as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]

Secondly it allows for programatic setting of these limits via 2 new syscalls,
getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
setrlimit respectively, except that they except a process id as an extra
argument, to specify the process id of the rlimit values that you wish to
read/write

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v4)
  2009-10-05  0:26   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v4) Neil Horman
@ 2009-10-05  0:53     ` Neil Horman
  2009-10-08 21:32       ` Marcin Slusarz
  2009-10-05  0:54     ` [PATCH 2/3] " Neil Horman
  2009-10-05  0:54     ` [PATCH 3/3] " Neil Horman
  2 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-10-05  0:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz

Augment /proc/<pid>/limits file to support limit setting

It was suggested to me recently that we support a mechanism by which we can set
various process limits from points external to the process.  The reasoning being
that some processes are very long lived, and it would be beneficial to these
long lived processes if we could modify their various limits without needing to
kill them, adjust the limits for the user and restarting them.  While individual
application can certainly export this control on their own, it would be nice if
such functionality were available to a sysadmin, without needing to have each
application re-invent the wheel.

As such, I've implemented the below patch, which makes /proc/pid/limits writable
for each process.  By writing the following format:
<limit> <current value> <max value>
to the limits file, an administrator can now dynamically change the limits for
the respective process.  Tested by myself with good results.

Neil

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 fs/proc/base.c        |  183 +++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sched.h |    3 
 kernel/sys.c          |   48 ++++++++-----
 3 files changed, 186 insertions(+), 48 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6..631f01b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -49,6 +49,8 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/string.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
@@ -455,72 +457,193 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 struct limit_names {
 	char *name;
 	char *unit;
+	char *match;
 };
 
 static const struct limit_names lnames[RLIM_NLIMITS] = {
-	[RLIMIT_CPU] = {"Max cpu time", "ms"},
-	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
-	[RLIMIT_DATA] = {"Max data size", "bytes"},
-	[RLIMIT_STACK] = {"Max stack size", "bytes"},
-	[RLIMIT_CORE] = {"Max core file size", "bytes"},
-	[RLIMIT_RSS] = {"Max resident set", "bytes"},
-	[RLIMIT_NPROC] = {"Max processes", "processes"},
-	[RLIMIT_NOFILE] = {"Max open files", "files"},
-	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
-	[RLIMIT_AS] = {"Max address space", "bytes"},
-	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
-	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
-	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
-	[RLIMIT_NICE] = {"Max nice priority", NULL},
-	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
-	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
+	[RLIMIT_CPU] = {"Max cpu time", "ms", "cpu"},
+	[RLIMIT_FSIZE] = {"Max file size", "bytes", "fsize"},
+	[RLIMIT_DATA] = {"Max data size", "bytes", "data"},
+	[RLIMIT_STACK] = {"Max stack size", "bytes", "stack"},
+	[RLIMIT_CORE] = {"Max core file size", "bytes", "core"},
+	[RLIMIT_RSS] = {"Max resident set", "bytes", "rss"},
+	[RLIMIT_NPROC] = {"Max processes", "processes", "nproc"},
+	[RLIMIT_NOFILE] = {"Max open files", "files", "nofile"},
+	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes", "memlock"},
+	[RLIMIT_AS] = {"Max address space", "bytes", "as"},
+	[RLIMIT_LOCKS] = {"Max file locks", "locks", "locks"},
+	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals", "sigpending"},
+	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes", "msgqueue"},
+	[RLIMIT_NICE] = {"Max nice priority", NULL, "nice"},
+	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL, "rtprio"},
+	[RLIMIT_RTTIME] = {"Max realtime timeout", "us", "rttime"},
 };
 
 /* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
 {
 	unsigned int i;
-	int count = 0;
 	unsigned long flags;
-	char *bufptr = buffer;
+	char *bufptr;
+	size_t bcount = 0;
+	size_t ccount = -ENOMEM;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 
 	struct rlimit rlim[RLIM_NLIMITS];
 
+	bufptr = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!bufptr)
+		goto out;
+
+	ccount = -EBUSY;
+
 	if (!lock_task_sighand(task, &flags))
-		return 0;
+		goto out_free;
 	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 	unlock_task_sighand(task, &flags);
 
 	/*
 	 * print the file header
 	 */
-	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+	bcount += sprintf(&bufptr[bcount], "%-25s %-20s %-20s %-10s\n",
 			"Limit", "Soft Limit", "Hard Limit", "Units");
 
 	for (i = 0; i < RLIM_NLIMITS; i++) {
 		if (rlim[i].rlim_cur == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-25s %-20s ",
+			bcount += sprintf(&bufptr[bcount], "%-25s %-20s ",
 					 lnames[i].name, "unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-25s %-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-25s %-20lu ",
 					 lnames[i].name, rlim[i].rlim_cur);
 
 		if (rlim[i].rlim_max == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+			bcount += sprintf(&bufptr[bcount], "%-20s ",
+					 "unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-20lu ",
 					 rlim[i].rlim_max);
 
 		if (lnames[i].unit)
-			count += sprintf(&bufptr[count], "%-10s\n",
+			bcount += sprintf(&bufptr[bcount], "%-10s\n",
 					 lnames[i].unit);
 		else
-			count += sprintf(&bufptr[count], "\n");
+			bcount += sprintf(&bufptr[bcount], "\n");
 	}
 
-	return count;
+	ccount = -EMSGSIZE;
+
+	if (*ppos >= bcount)
+		goto out_task;
+
+	ccount = min(count, (size_t)(bcount-(*ppos)));
+	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
+	*ppos += ccount;
+
+out_task:
+	put_task_struct(task);
+out_free:
+	kfree(bufptr);
+out:
+	return ccount;
+}
+
+#define PROC_PID_BUF_SZ 128
+static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	char *buffer;
+	char *element, *vmc, *vmm;
+	struct rlimit new_rlim;
+	unsigned long flags;
+	int i;
+	int index = -1;
+	size_t wcount = -EMSGSIZE;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+
+	if (*ppos != 0)
+		goto out;
+
+	if (count > PROC_PID_BUF_SZ)
+		goto out;
+
+	wcount = -ENOMEM;
+	buffer = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
+
+	if (!buffer)
+		goto out;
+
+	element = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
+	vmc = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
+	vmm = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
+
+	if (!element || !vmm || !vmc)
+		goto out_free;
+
+	wcount = -EFAULT;
+	if (copy_from_user(buffer, buf, count))
+		goto out_free;
+
+	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
+
+	if (i < 3)
+		goto out_free;
+
+	for (i = 0; i < strlen(element); i++)
+		element[i] = tolower(element[i]);
+
+	if (!strncmp(vmc, "unlimited", 9))
+		new_rlim.rlim_cur = RLIM_INFINITY;
+	else
+		new_rlim.rlim_cur = simple_strtoull(vmc, NULL, 10);
+
+	if (!strncmp(vmm, "unlimited", 9))
+		new_rlim.rlim_max = RLIM_INFINITY;
+	else
+		new_rlim.rlim_max = simple_strtoull(vmm, NULL, 10);
+
+
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		if ((lnames[i].match) &&
+		    !strncmp(element, lnames[i].match,
+		     strlen(lnames[i].match))) {
+			index = i;
+			break;
+		}
+	}
+
+	wcount = -EBUSY;
+
+	if (!lock_task_sighand(task, &flags))
+		goto out_free;
+
+	wcount = -ENOENT;
+
+	if ((index >= 0) && (index < RLIM_NLIMITS))
+		wcount = do_setrlimit(index, &new_rlim, task);
+
+	unlock_task_sighand(task, &flags);
+
+out_free:
+	kfree(element);
+	kfree(vmc);
+	kfree(vmm);
+	kfree(buffer);
+out:
+	if (!wcount) {
+		*ppos += count;
+		wcount = count;
+	}
+	put_task_struct(task);
+	return wcount;
 }
 
+
+static const struct file_operations proc_limit_operations = {
+	.read           = proc_pid_limit_read,
+	.write		= proc_pid_limit_write,
+};
+
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 static int proc_pid_syscall(struct task_struct *task, char *buffer)
 {
@@ -2483,7 +2606,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("auxv",       S_IRUSR, proc_pid_auxv),
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	  S_IRUSR, proc_pid_limits),
+	REG("limits",	  S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
@@ -2822,7 +2945,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	 S_IRUSR, proc_pid_limits),
+	REG("limits",	 S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f1ea4a..cada5d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -631,6 +631,9 @@ struct signal_struct {
 #endif
 };
 
+extern int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+			struct task_struct *tsk);
+
 /* Context switch must be unlocked if interrupts are to be enabled */
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 # define __ARCH_WANT_UNLOCKED_CTXSW
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097..05bd22a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1236,41 +1236,41 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 
 #endif
 
-SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+		 struct task_struct *tsk)
 {
-	struct rlimit new_rlim, *old_rlim;
 	int retval;
+	struct rlimit *old_rlim;
 
-	if (resource >= RLIM_NLIMITS)
-		return -EINVAL;
-	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
-		return -EFAULT;
-	if (new_rlim.rlim_cur > new_rlim.rlim_max)
+
+	if (new_rlim->rlim_cur > new_rlim->rlim_max)
 		return -EINVAL;
-	old_rlim = current->signal->rlim + resource;
-	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
+	old_rlim = tsk->signal->rlim + resource;
+
+	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
 	    !capable(CAP_SYS_RESOURCE))
 		return -EPERM;
-	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+
+	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
 		return -EPERM;
 
-	retval = security_task_setrlimit(resource, &new_rlim);
+	retval = security_task_setrlimit(resource, new_rlim);
 	if (retval)
 		return retval;
 
-	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
+	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 		/*
 		 * The caller is asking for an immediate RLIMIT_CPU
 		 * expiry.  But we use the zero value to mean "it was
 		 * never set".  So let's cheat and make it one second
 		 * instead
 		 */
-		new_rlim.rlim_cur = 1;
+		new_rlim->rlim_cur = 1;
 	}
 
-	task_lock(current->group_leader);
-	*old_rlim = new_rlim;
-	task_unlock(current->group_leader);
+	task_lock(tsk->group_leader);
+	*old_rlim = *new_rlim;
+	task_unlock(tsk->group_leader);
 
 	if (resource != RLIMIT_CPU)
 		goto out;
@@ -1281,14 +1281,26 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	 * very long-standing error, and fixing it now risks breakage of
 	 * applications, so we live with it
 	 */
-	if (new_rlim.rlim_cur == RLIM_INFINITY)
+	if (new_rlim->rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	update_rlimit_cpu(new_rlim.rlim_cur);
+	update_rlimit_cpu(new_rlim->rlim_cur);
 out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+{
+	struct rlimit new_rlim;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	return do_setrlimit(resource, &new_rlim, current);
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v4)
  2009-10-05  0:26   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v4) Neil Horman
  2009-10-05  0:53     ` [PATCH 1/3] " Neil Horman
@ 2009-10-05  0:54     ` Neil Horman
  2009-10-05  1:57       ` Américo Wang
  2009-10-05  0:54     ` [PATCH 3/3] " Neil Horman
  2 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-10-05  0:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz

Add syscall infrastructure for getprlimit/setprlimit

This patch adds the definitions for the get/setprlimit syscalls.  They are
identical to the get/setlimit calls, except that they allow the caller to
manipulate limits for processes other than themselves.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 include/linux/syscalls.h |    4 ++
 kernel/sys.c             |   84 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 80de700..535210a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -584,11 +584,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long	sys_setprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
 asmlinkage long sys_umask(int mask);
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 05bd22a..4fe1140 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1211,6 +1211,50 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	}
 }
 
+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	unsigned long flags;
+	struct task_struct *tsk;
+	struct pid *ppid;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EBUSY;
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	else {
+		struct rlimit val;
+
+		task_lock(tsk->group_leader);
+		val = current->signal->rlim[resource];
+		task_unlock(tsk->group_leader);
+		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
+	}
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
+
 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
 
 /*
@@ -1301,6 +1345,46 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	return do_setrlimit(resource, &new_rlim, current);
 }
 
+SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	struct task_struct *tsk;
+	struct pid *ppid;
+	unsigned long flags;
+	struct rlimit new_rlim;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EFAULT;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		goto out_put_all;
+
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	retval = do_setrlimit(resource, &new_rlim, tsk);
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 3/3] extend get/setrlimit to support setting rlimits external to a process (v4)
  2009-10-05  0:26   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v4) Neil Horman
  2009-10-05  0:53     ` [PATCH 1/3] " Neil Horman
  2009-10-05  0:54     ` [PATCH 2/3] " Neil Horman
@ 2009-10-05  0:54     ` Neil Horman
  2 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-05  0:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz

Define __NR_getprlimit and __NR_setprlimit syscalls for asm-generic

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 unistd.h |    7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 1125e5a..3321e4f 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -623,8 +623,13 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_counter_open 241
 __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
 
+#define __NR_getprlimit 242
+__SYSCALL(__NR_getprlimit, sys_getprlimit)
+#define __NR_setprlimit 243
+__SYSCALL(__NR_setprlimit, sys_setprlimit)
+
 #undef __NR_syscalls
-#define __NR_syscalls 242
+#define __NR_syscalls 244
 
 /*
  * All syscalls below here should go away really,

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v4)
  2009-10-05  0:54     ` [PATCH 2/3] " Neil Horman
@ 2009-10-05  1:57       ` Américo Wang
  2009-10-05 12:32         ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Américo Wang @ 2009-10-05  1:57 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, akpm, marcin.slusarz

On Sun, Oct 04, 2009 at 08:54:05PM -0400, Neil Horman wrote:
>Add syscall infrastructure for getprlimit/setprlimit
>
>This patch adds the definitions for the get/setprlimit syscalls.  They are
>identical to the get/setlimit calls, except that they allow the caller to
>manipulate limits for processes other than themselves.
>
>Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

Hello, Neil.

What is your point of adding two new syscalls?
Why not add them to prctl(2)? :-)


Thanks.

>
>
> include/linux/syscalls.h |    4 ++
> kernel/sys.c             |   84 +++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 88 insertions(+)
>
>diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
>index 80de700..535210a 100644
>--- a/include/linux/syscalls.h
>+++ b/include/linux/syscalls.h
>@@ -584,11 +584,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
> 
> asmlinkage long sys_getrlimit(unsigned int resource,
> 				struct rlimit __user *rlim);
>+asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource,
>+				struct rlimit __user *rlim);
> #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
> asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
> #endif
> asmlinkage long sys_setrlimit(unsigned int resource,
> 				struct rlimit __user *rlim);
>+asmlinkage long	sys_setprlimit(pid_t pid, unsigned int resource,
>+				struct rlimit __user *rlim);
> asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
> asmlinkage long sys_umask(int mask);
> 
>diff --git a/kernel/sys.c b/kernel/sys.c
>index 05bd22a..4fe1140 100644
>--- a/kernel/sys.c
>+++ b/kernel/sys.c
>@@ -1211,6 +1211,50 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> 	}
> }
> 
>+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
>+		struct rlimit __user *, rlim)
>+{
>+	unsigned long flags;
>+	struct task_struct *tsk;
>+	struct pid *ppid;
>+	int retval = -EINVAL;
>+
>+	ppid = find_get_pid(pid);
>+	if (!ppid)
>+		goto out;
>+
>+	tsk = get_pid_task(ppid, PIDTYPE_PID);
>+
>+	if (!tsk)
>+		goto out_put_pid;
>+
>+	if (resource >= RLIM_NLIMITS)
>+		goto out_put_all;
>+
>+	retval = -EBUSY;
>+	if (!lock_task_sighand(tsk, &flags))
>+		goto out_put_all;
>+
>+	else {
>+		struct rlimit val;
>+
>+		task_lock(tsk->group_leader);
>+		val = current->signal->rlim[resource];
>+		task_unlock(tsk->group_leader);
>+		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
>+	}
>+
>+	unlock_task_sighand(tsk, &flags);
>+
>+out_put_all:
>+	put_task_struct(tsk);
>+out_put_pid:
>+	put_pid(ppid);
>+out:
>+	return retval;
>+}
>+
>+
> #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
> 
> /*
>@@ -1301,6 +1345,46 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> 	return do_setrlimit(resource, &new_rlim, current);
> }
> 
>+SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource,
>+		struct rlimit __user *, rlim)
>+{
>+	struct task_struct *tsk;
>+	struct pid *ppid;
>+	unsigned long flags;
>+	struct rlimit new_rlim;
>+	int retval = -EINVAL;
>+
>+	ppid = find_get_pid(pid);
>+	if (!ppid)
>+		goto out;
>+
>+	tsk = get_pid_task(ppid, PIDTYPE_PID);
>+
>+	if (!tsk)
>+		goto out_put_pid;
>+
>+	if (resource >= RLIM_NLIMITS)
>+		goto out_put_all;
>+
>+	retval = -EFAULT;
>+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
>+		goto out_put_all;
>+
>+	if (!lock_task_sighand(tsk, &flags))
>+		goto out_put_all;
>+
>+	retval = do_setrlimit(resource, &new_rlim, tsk);
>+
>+	unlock_task_sighand(tsk, &flags);
>+
>+out_put_all:
>+	put_task_struct(tsk);
>+out_put_pid:
>+	put_pid(ppid);
>+out:
>+	return retval;
>+}
>+
> /*
>  * It would make sense to put struct rusage in the task_struct,
>  * except that would make the task_struct be *really big*.  After
>--
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/

-- 
Live like a child, think like the god.
 

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v4)
  2009-10-05  1:57       ` Américo Wang
@ 2009-10-05 12:32         ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-05 12:32 UTC (permalink / raw)
  To: Américo Wang; +Cc: linux-kernel, akpm, marcin.slusarz

On Mon, Oct 05, 2009 at 09:57:56AM +0800, Américo Wang wrote:
> On Sun, Oct 04, 2009 at 08:54:05PM -0400, Neil Horman wrote:
> >Add syscall infrastructure for getprlimit/setprlimit
> >
> >This patch adds the definitions for the get/setprlimit syscalls.  They are
> >identical to the get/setlimit calls, except that they allow the caller to
> >manipulate limits for processes other than themselves.
> >
> >Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> Hello, Neil.
> 
> What is your point of adding two new syscalls?
> Why not add them to prctl(2)? :-)
> 
See the conversations between Andrew and I over the first two version of this
changeset :).  Initially I was opposed to adding syscalls at all, but Andrew
thought that this interface lent itself naturally to syscalls (given that
setrlimit and getrlimit are already syscalls).  Since the core work here is
applicable to both syscalls and the proc interface, I figured it didn't hurt to
do both.

Best
Neil

> 
> Thanks.
> 
> >
> >
> > include/linux/syscalls.h |    4 ++
> > kernel/sys.c             |   84 +++++++++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 88 insertions(+)
> >
> >diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> >index 80de700..535210a 100644
> >--- a/include/linux/syscalls.h
> >+++ b/include/linux/syscalls.h
> >@@ -584,11 +584,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
> > 
> > asmlinkage long sys_getrlimit(unsigned int resource,
> > 				struct rlimit __user *rlim);
> >+asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource,
> >+				struct rlimit __user *rlim);
> > #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
> > asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
> > #endif
> > asmlinkage long sys_setrlimit(unsigned int resource,
> > 				struct rlimit __user *rlim);
> >+asmlinkage long	sys_setprlimit(pid_t pid, unsigned int resource,
> >+				struct rlimit __user *rlim);
> > asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
> > asmlinkage long sys_umask(int mask);
> > 
> >diff --git a/kernel/sys.c b/kernel/sys.c
> >index 05bd22a..4fe1140 100644
> >--- a/kernel/sys.c
> >+++ b/kernel/sys.c
> >@@ -1211,6 +1211,50 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> > 	}
> > }
> > 
> >+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
> >+		struct rlimit __user *, rlim)
> >+{
> >+	unsigned long flags;
> >+	struct task_struct *tsk;
> >+	struct pid *ppid;
> >+	int retval = -EINVAL;
> >+
> >+	ppid = find_get_pid(pid);
> >+	if (!ppid)
> >+		goto out;
> >+
> >+	tsk = get_pid_task(ppid, PIDTYPE_PID);
> >+
> >+	if (!tsk)
> >+		goto out_put_pid;
> >+
> >+	if (resource >= RLIM_NLIMITS)
> >+		goto out_put_all;
> >+
> >+	retval = -EBUSY;
> >+	if (!lock_task_sighand(tsk, &flags))
> >+		goto out_put_all;
> >+
> >+	else {
> >+		struct rlimit val;
> >+
> >+		task_lock(tsk->group_leader);
> >+		val = current->signal->rlim[resource];
> >+		task_unlock(tsk->group_leader);
> >+		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
> >+	}
> >+
> >+	unlock_task_sighand(tsk, &flags);
> >+
> >+out_put_all:
> >+	put_task_struct(tsk);
> >+out_put_pid:
> >+	put_pid(ppid);
> >+out:
> >+	return retval;
> >+}
> >+
> >+
> > #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
> > 
> > /*
> >@@ -1301,6 +1345,46 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
> > 	return do_setrlimit(resource, &new_rlim, current);
> > }
> > 
> >+SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource,
> >+		struct rlimit __user *, rlim)
> >+{
> >+	struct task_struct *tsk;
> >+	struct pid *ppid;
> >+	unsigned long flags;
> >+	struct rlimit new_rlim;
> >+	int retval = -EINVAL;
> >+
> >+	ppid = find_get_pid(pid);
> >+	if (!ppid)
> >+		goto out;
> >+
> >+	tsk = get_pid_task(ppid, PIDTYPE_PID);
> >+
> >+	if (!tsk)
> >+		goto out_put_pid;
> >+
> >+	if (resource >= RLIM_NLIMITS)
> >+		goto out_put_all;
> >+
> >+	retval = -EFAULT;
> >+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> >+		goto out_put_all;
> >+
> >+	if (!lock_task_sighand(tsk, &flags))
> >+		goto out_put_all;
> >+
> >+	retval = do_setrlimit(resource, &new_rlim, tsk);
> >+
> >+	unlock_task_sighand(tsk, &flags);
> >+
> >+out_put_all:
> >+	put_task_struct(tsk);
> >+out_put_pid:
> >+	put_pid(ppid);
> >+out:
> >+	return retval;
> >+}
> >+
> > /*
> >  * It would make sense to put struct rusage in the task_struct,
> >  * except that would make the task_struct be *really big*.  After
> >--
> >To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> >the body of a message to majordomo@vger.kernel.org
> >More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >Please read the FAQ at  http://www.tux.org/lkml/
> 
> -- 
> Live like a child, think like the god.
>  
> 

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v4)
  2009-10-05  0:53     ` [PATCH 1/3] " Neil Horman
@ 2009-10-08 21:32       ` Marcin Slusarz
  2009-10-09  2:00         ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Marcin Slusarz @ 2009-10-08 21:32 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, akpm

I found some new issues in this patch, sorry ;).

Neil Horman wrote:
> (...)
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 6f742f6..631f01b 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -49,6 +49,8 @@
>  
>  #include <asm/uaccess.h>
>  
> +#include <linux/string.h>
> +#include <linux/ctype.h>
>  #include <linux/errno.h>
>  #include <linux/time.h>
>  #include <linux/proc_fs.h>
> @@ -455,72 +457,193 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
>  struct limit_names {
>  	char *name;
>  	char *unit;
> +	char *match;
>  };
>  
>  static const struct limit_names lnames[RLIM_NLIMITS] = {
> -	[RLIMIT_CPU] = {"Max cpu time", "ms"},
> -	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
> -	[RLIMIT_DATA] = {"Max data size", "bytes"},
> -	[RLIMIT_STACK] = {"Max stack size", "bytes"},
> -	[RLIMIT_CORE] = {"Max core file size", "bytes"},
> -	[RLIMIT_RSS] = {"Max resident set", "bytes"},
> -	[RLIMIT_NPROC] = {"Max processes", "processes"},
> -	[RLIMIT_NOFILE] = {"Max open files", "files"},
> -	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
> -	[RLIMIT_AS] = {"Max address space", "bytes"},
> -	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
> -	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
> -	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
> -	[RLIMIT_NICE] = {"Max nice priority", NULL},
> -	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
> -	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
> +	[RLIMIT_CPU] = {"Max cpu time", "ms", "cpu"},
> +	[RLIMIT_FSIZE] = {"Max file size", "bytes", "fsize"},
> +	[RLIMIT_DATA] = {"Max data size", "bytes", "data"},
> +	[RLIMIT_STACK] = {"Max stack size", "bytes", "stack"},
> +	[RLIMIT_CORE] = {"Max core file size", "bytes", "core"},
> +	[RLIMIT_RSS] = {"Max resident set", "bytes", "rss"},
> +	[RLIMIT_NPROC] = {"Max processes", "processes", "nproc"},
> +	[RLIMIT_NOFILE] = {"Max open files", "files", "nofile"},
> +	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes", "memlock"},
> +	[RLIMIT_AS] = {"Max address space", "bytes", "as"},
> +	[RLIMIT_LOCKS] = {"Max file locks", "locks", "locks"},
> +	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals", "sigpending"},
> +	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes", "msgqueue"},
> +	[RLIMIT_NICE] = {"Max nice priority", NULL, "nice"},
> +	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL, "rtprio"},
> +	[RLIMIT_RTTIME] = {"Max realtime timeout", "us", "rttime"},
>  };

There's no way user can figure out what's the "match" for every limit.
Maybe you could print it after "limit name"?

>  
>  /* Display limits for a process */
> -static int proc_pid_limits(struct task_struct *task, char *buffer)
> +static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
> +			size_t count, loff_t *ppos)
>  {
>  	unsigned int i;
> -	int count = 0;
>  	unsigned long flags;
> -	char *bufptr = buffer;
> +	char *bufptr;
> +	size_t bcount = 0;
> +	size_t ccount = -ENOMEM;
> +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
>  
>  	struct rlimit rlim[RLIM_NLIMITS];
>  
> +	bufptr = kzalloc(PAGE_SIZE, GFP_KERNEL);

I think you could derive size of allocation from RLIM_NLIMITS.
If I'm reading correctly it will be something like (RLIM_NLIMITS + 1) * 80.

> +	if (!bufptr)
> +		goto out;
> +
> +	ccount = -EBUSY;
> +
>  	if (!lock_task_sighand(task, &flags))
> -		return 0;
> +		goto out_free;
>  	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
>  	unlock_task_sighand(task, &flags);
>  
>  	/*
>  	 * print the file header
>  	 */
> -	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
> +	bcount += sprintf(&bufptr[bcount], "%-25s %-20s %-20s %-10s\n",
>  			"Limit", "Soft Limit", "Hard Limit", "Units");
>  
>  	for (i = 0; i < RLIM_NLIMITS; i++) {
>  		if (rlim[i].rlim_cur == RLIM_INFINITY)
> -			count += sprintf(&bufptr[count], "%-25s %-20s ",
> +			bcount += sprintf(&bufptr[bcount], "%-25s %-20s ",
>  					 lnames[i].name, "unlimited");
>  		else
> -			count += sprintf(&bufptr[count], "%-25s %-20lu ",
> +			bcount += sprintf(&bufptr[bcount], "%-25s %-20lu ",
>  					 lnames[i].name, rlim[i].rlim_cur);
>  
>  		if (rlim[i].rlim_max == RLIM_INFINITY)
> -			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
> +			bcount += sprintf(&bufptr[bcount], "%-20s ",
> +					 "unlimited");
>  		else
> -			count += sprintf(&bufptr[count], "%-20lu ",
> +			bcount += sprintf(&bufptr[bcount], "%-20lu ",
>  					 rlim[i].rlim_max);
>  
>  		if (lnames[i].unit)
> -			count += sprintf(&bufptr[count], "%-10s\n",
> +			bcount += sprintf(&bufptr[bcount], "%-10s\n",
>  					 lnames[i].unit);
>  		else
> -			count += sprintf(&bufptr[count], "\n");
> +			bcount += sprintf(&bufptr[bcount], "\n");
>  	}
>  
> -	return count;
> +	ccount = -EMSGSIZE;
> +
> +	if (*ppos >= bcount)
> +		goto out_task;
> +
> +	ccount = min(count, (size_t)(bcount-(*ppos)));
> +	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
> +	*ppos += ccount;
> +
> +out_task:
> +	put_task_struct(task);
> +out_free:
> +	kfree(bufptr);
> +out:
> +	return ccount;
> +}
> +
> +#define PROC_PID_BUF_SZ 128
> +static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
> +		size_t count, loff_t *ppos)
> +{
> +	char *buffer;
> +	char *element, *vmc, *vmm;
> +	struct rlimit new_rlim;
> +	unsigned long flags;
> +	int i;
> +	int index = -1;
> +	size_t wcount = -EMSGSIZE;
> +	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> +
> +	if (*ppos != 0)
> +		goto out;
> +
> +	if (count > PROC_PID_BUF_SZ)
> +		goto out;
> +
> +	wcount = -ENOMEM;
> +	buffer = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
> +
> +	if (!buffer)
> +		goto out;
> +
> +	element = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
> +	vmc = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
> +	vmm = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
> +
> +	if (!element || !vmm || !vmc)
> +		goto out_free;
> +
> +	wcount = -EFAULT;
> +	if (copy_from_user(buffer, buf, count))
> +		goto out_free;
> +
> +	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
> +
> +	if (i < 3)
> +		goto out_free;
> +
> +	for (i = 0; i < strlen(element); i++)
> +		element[i] = tolower(element[i]);

I don't think we should fix user mistakes like this...

> +
> +	if (!strncmp(vmc, "unlimited", 9))
> +		new_rlim.rlim_cur = RLIM_INFINITY;
> +	else
> +		new_rlim.rlim_cur = simple_strtoull(vmc, NULL, 10);

rlim_cur and rlim_max are unsigned long so you should use simple_strtoul

> +
> +	if (!strncmp(vmm, "unlimited", 9))
> +		new_rlim.rlim_max = RLIM_INFINITY;
> +	else
> +		new_rlim.rlim_max = simple_strtoull(vmm, NULL, 10);
> +
> +
> +	for (i = 0; i < RLIM_NLIMITS; i++) {
> +		if ((lnames[i].match) &&

match is always not null, you can drop this check

> +		    !strncmp(element, lnames[i].match,
> +		     strlen(lnames[i].match))) {
> +			index = i;
> +			break;
> +		}
> +	}
> +
> +	wcount = -EBUSY;
> +
> +	if (!lock_task_sighand(task, &flags))
> +		goto out_free;
> +
> +	wcount = -ENOENT;
> +
> +	if ((index >= 0) && (index < RLIM_NLIMITS))
> +		wcount = do_setrlimit(index, &new_rlim, task);
> +
> +	unlock_task_sighand(task, &flags);
> +
> +out_free:
> +	kfree(element);
> +	kfree(vmc);
> +	kfree(vmm);
> +	kfree(buffer);
> +out:
> +	if (!wcount) {
> +		*ppos += count;
> +		wcount = count;
> +	}
> +	put_task_struct(task);
> +	return wcount;
>  }
> (...)

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v4)
  2009-10-08 21:32       ` Marcin Slusarz
@ 2009-10-09  2:00         ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-09  2:00 UTC (permalink / raw)
  To: Marcin Slusarz; +Cc: linux-kernel, akpm

On Thu, Oct 08, 2009 at 11:32:03PM +0200, Marcin Slusarz wrote:
> I found some new issues in this patch, sorry ;).
> 
> Neil Horman wrote:
> > (...)
> > diff --git a/fs/proc/base.c b/fs/proc/base.c
> > index 6f742f6..631f01b 100644
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -49,6 +49,8 @@
> >  
> >  #include <asm/uaccess.h>
> >  
> > +#include <linux/string.h>
> > +#include <linux/ctype.h>
> >  #include <linux/errno.h>
> >  #include <linux/time.h>
> >  #include <linux/proc_fs.h>
> > @@ -455,72 +457,193 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
> >  struct limit_names {
> >  	char *name;
> >  	char *unit;
> > +	char *match;
> >  };
> >  
> >  static const struct limit_names lnames[RLIM_NLIMITS] = {
> > -	[RLIMIT_CPU] = {"Max cpu time", "ms"},
> > -	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
> > -	[RLIMIT_DATA] = {"Max data size", "bytes"},
> > -	[RLIMIT_STACK] = {"Max stack size", "bytes"},
> > -	[RLIMIT_CORE] = {"Max core file size", "bytes"},
> > -	[RLIMIT_RSS] = {"Max resident set", "bytes"},
> > -	[RLIMIT_NPROC] = {"Max processes", "processes"},
> > -	[RLIMIT_NOFILE] = {"Max open files", "files"},
> > -	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
> > -	[RLIMIT_AS] = {"Max address space", "bytes"},
> > -	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
> > -	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
> > -	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
> > -	[RLIMIT_NICE] = {"Max nice priority", NULL},
> > -	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
> > -	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
> > +	[RLIMIT_CPU] = {"Max cpu time", "ms", "cpu"},
> > +	[RLIMIT_FSIZE] = {"Max file size", "bytes", "fsize"},
> > +	[RLIMIT_DATA] = {"Max data size", "bytes", "data"},
> > +	[RLIMIT_STACK] = {"Max stack size", "bytes", "stack"},
> > +	[RLIMIT_CORE] = {"Max core file size", "bytes", "core"},
> > +	[RLIMIT_RSS] = {"Max resident set", "bytes", "rss"},
> > +	[RLIMIT_NPROC] = {"Max processes", "processes", "nproc"},
> > +	[RLIMIT_NOFILE] = {"Max open files", "files", "nofile"},
> > +	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes", "memlock"},
> > +	[RLIMIT_AS] = {"Max address space", "bytes", "as"},
> > +	[RLIMIT_LOCKS] = {"Max file locks", "locks", "locks"},
> > +	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals", "sigpending"},
> > +	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes", "msgqueue"},
> > +	[RLIMIT_NICE] = {"Max nice priority", NULL, "nice"},
> > +	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL, "rtprio"},
> > +	[RLIMIT_RTTIME] = {"Max realtime timeout", "us", "rttime"},
> >  };
> 
> There's no way user can figure out what's the "match" for every limit.
> Maybe you could print it after "limit name"?
> 
I was figuring we could just document the names, but sure, thats fine.  I'll
likely do a format in which I do "Limit Name(id)" to display the name and id.

> > +	bufptr = kzalloc(PAGE_SIZE, GFP_KERNEL);
> 
> I think you could derive size of allocation from RLIM_NLIMITS.
> If I'm reading correctly it will be something like (RLIM_NLIMITS + 1) * 80.
> 
meh, ok.  It should be more like *90 if I add the id, but we can do that.

> > +
> > +	for (i = 0; i < strlen(element); i++)
> > +		element[i] = tolower(element[i]);
> 
> I don't think we should fix user mistakes like this...
> 
I guess not, if we display the id's

> > +
> > +	if (!strncmp(vmc, "unlimited", 9))
> > +		new_rlim.rlim_cur = RLIM_INFINITY;
> > +	else
> > +		new_rlim.rlim_cur = simple_strtoull(vmc, NULL, 10);
> 
> rlim_cur and rlim_max are unsigned long so you should use simple_strtoul
> 
Ok

> > +
> > +	if (!strncmp(vmm, "unlimited", 9))
> > +		new_rlim.rlim_max = RLIM_INFINITY;
> > +	else
> > +		new_rlim.rlim_max = simple_strtoull(vmm, NULL, 10);
> > +
> > +
> > +	for (i = 0; i < RLIM_NLIMITS; i++) {
> > +		if ((lnames[i].match) &&
> 
> match is always not null, you can drop this check
> 
Ok.

fine, one more version.  It'll take me a few days to test, the system I
developed this on is otherwise occupied at the moment.

Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5)
  2009-10-01 17:15 ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3) Neil Horman
                     ` (3 preceding siblings ...)
  2009-10-05  0:26   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v4) Neil Horman
@ 2009-10-12 16:13   ` Neil Horman
  2009-10-12 16:20     ` [PATCH 1/3] " Neil Horman
                       ` (4 more replies)
  4 siblings, 5 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-12 16:13 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, nhorman

Ok, Sorry for the delay, I had several other items that I needed to finish.
Version 5 of this patch set taking Marcins notes into account

Change Notes:

1) Fixed up various buffer leaks, sizings, and other misc. items that Marcin
pointed out in his last post to this thread

2) Added documentation in Documentation/filesystems/proc.txt so that users
would have a better idea about how to use the proc interface here (I figured
that the syscall interface would be rather self explanitory and get augmented
into the man pages soon enough)

Neil


Summary

Its been requested often that we have the ability to read and modify process
rlimit values from contexts external to the owning process.  Ideally this allows
sysadmins to adjust rlimits on long running processes wihout the need to stop
and restart those processes, which incurs undesireable downtime.  This patch
enables that functionality,  It does so in two places.  First it enables process
limit setting by writing to the /proc/pid/limits file a string in the format:
<limit> <current limit> <max limit> > /proc/<pid>/limits
where limit is one of
[as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]

Secondly it allows for programatic setting of these limits via 2 new syscalls,
getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
setrlimit respectively, except that they except a process id as an extra
argument, to specify the process id of the rlimit values that you wish to
read/write

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>



^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v5)
  2009-10-12 16:13   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Neil Horman
@ 2009-10-12 16:20     ` Neil Horman
  2009-10-12 16:25     ` [PATCH 2/3] " Neil Horman
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-12 16:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, nhorman

Augment /proc/<pid>/limits file to support limit setting

It was suggested to me recently that we support a mechanism by which we can set
various process limits from points external to the process.  The reasoning being
that some processes are very long lived, and it would be beneficial to these
long lived processes if we could modify their various limits without needing to
kill them, adjust the limits for the user and restarting them.  While individual
application can certainly export this control on their own, it would be nice if
such functionality were available to a sysadmin, without needing to have each
application re-invent the wheel.

As such, I've implemented the below patch, which makes /proc/pid/limits writable
for each process.  By writing the following format:
<limit> <current value> <max value>
to the limits file, an administrator can now dynamically change the limits for
the respective process.  Tested by myself with good results.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 Documentation/filesystems/proc.txt |   26 +++++
 fs/proc/base.c                     |  170 +++++++++++++++++++++++++++++--------
 include/linux/sched.h              |    3 
 kernel/sys.c                       |   48 ++++++----
 4 files changed, 195 insertions(+), 52 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 2c48f94..62fd7f5 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -38,6 +38,7 @@ Table of Contents
   3.3	/proc/<pid>/io - Display the IO accounting fields
   3.4	/proc/<pid>/coredump_filter - Core dump filtering settings
   3.5	/proc/<pid>/mountinfo - Information about mounts
+  3.6	/proc/<pid>/limits - Information about process rlimit value
 
 
 ------------------------------------------------------------------------------
@@ -1408,3 +1409,28 @@ For more information on mount propagation see:
 
   Documentation/filesystems/sharedsubtree.txt
 
+3.6	/proc/<pid>/limits - Information about rlimit values
+------------------------------------------------------------
+
+This file contains information regarding the processes rlimit settings.
+Normally this information is only available programatically via the
+getrlimit/setrlimit syscalls.  This file exports it so that sysadmins may
+dyanmically see their values.  This file contains lines of the form:
+
+Limit     Set String     Soft Limit     Hard Limit     Units 
+
+Limit - A description of the limit
+Set String - A consise string defining the limit meaning
+Soft Limit - The rlim_cur value returned by getrlimit for the corresponding limit
+Hard Limit - The rlim_max value returned by getrlimit for the corresponding limit
+Units	   - The units that the given limit is measured in
+
+Limits for a given process can also be set by writing to this file by writing a
+string in the following format:
+<Set String> [value|"unlimited"] [value|"unlimited"] > proc/<pid>/limits
+
+For example to set the maximum core files size for process 2000 to a soft limit
+of 1024 bytes and a max limit of unlimited, we would do the following from a
+shell prompt:
+echo core 1024 unlimited > /proc/2000/limits
+
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd5bed0..823745b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -49,6 +49,8 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/string.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
@@ -456,72 +458,172 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 struct limit_names {
 	char *name;
 	char *unit;
+	char *match;
 };
 
 static const struct limit_names lnames[RLIM_NLIMITS] = {
-	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
-	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
-	[RLIMIT_DATA] = {"Max data size", "bytes"},
-	[RLIMIT_STACK] = {"Max stack size", "bytes"},
-	[RLIMIT_CORE] = {"Max core file size", "bytes"},
-	[RLIMIT_RSS] = {"Max resident set", "bytes"},
-	[RLIMIT_NPROC] = {"Max processes", "processes"},
-	[RLIMIT_NOFILE] = {"Max open files", "files"},
-	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
-	[RLIMIT_AS] = {"Max address space", "bytes"},
-	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
-	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
-	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
-	[RLIMIT_NICE] = {"Max nice priority", NULL},
-	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
-	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
+	[RLIMIT_CPU] = {"Max cpu time", "ms", "cpu"},
+	[RLIMIT_FSIZE] = {"Max file size", "bytes", "fsize"},
+	[RLIMIT_DATA] = {"Max data size", "bytes", "data"},
+	[RLIMIT_STACK] = {"Max stack size", "bytes", "stack"},
+	[RLIMIT_CORE] = {"Max core file size", "bytes", "core"},
+	[RLIMIT_RSS] = {"Max resident set", "bytes", "rss"},
+	[RLIMIT_NPROC] = {"Max processes", "processes", "nproc"},
+	[RLIMIT_NOFILE] = {"Max open files", "files", "nofile"},
+	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes", "memlock"},
+	[RLIMIT_AS] = {"Max address space", "bytes", "as"},
+	[RLIMIT_LOCKS] = {"Max file locks", "locks", "locks"},
+	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals", "sigpending"},
+	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes", "msgqueue"},
+	[RLIMIT_NICE] = {"Max nice priority", NULL, "nice"},
+	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL, "rtprio"},
+	[RLIMIT_RTTIME] = {"Max realtime timeout", "us", "rttime"},
 };
 
 /* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
+		size_t count, loff_t *ppos)
 {
 	unsigned int i;
-	int count = 0;
 	unsigned long flags;
-	char *bufptr = buffer;
+	char *bufptr;
+	size_t bcount = 0;
+	size_t ccount = 0;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 
 	struct rlimit rlim[RLIM_NLIMITS];
 
+	bufptr = kzalloc((RLIM_NLIMITS+1)*90, GFP_KERNEL);
+	if (!bufptr)
+		goto out;
+
 	if (!lock_task_sighand(task, &flags))
-		return 0;
+		goto out;
+
 	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 	unlock_task_sighand(task, &flags);
 
 	/*
 	 * print the file header
 	 */
-	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
-			"Limit", "Soft Limit", "Hard Limit", "Units");
+	bcount += sprintf(&bufptr[bcount], "%-25s %-12s %-20s %-20s %-10s\n",
+			"Limit", "Set String", "Soft Limit", "Hard Limit", "Units");
 
 	for (i = 0; i < RLIM_NLIMITS; i++) {
 		if (rlim[i].rlim_cur == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-25s %-20s ",
-					 lnames[i].name, "unlimited");
+			bcount += sprintf(&bufptr[bcount], "%-25s %-12s %-20s ",
+					lnames[i].name ,lnames[i].match,
+					"unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-25s %-20lu ",
-					 lnames[i].name, rlim[i].rlim_cur);
-
+			bcount += sprintf(&bufptr[bcount], "%-25s %-12s %-20lu ",
+					lnames[i].name, lnames[i].match,
+					rlim[i].rlim_cur);
 		if (rlim[i].rlim_max == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+			bcount += sprintf(&bufptr[bcount], "%-20s ",
+					"unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-20lu ",
 					 rlim[i].rlim_max);
-
 		if (lnames[i].unit)
-			count += sprintf(&bufptr[count], "%-10s\n",
+			bcount += sprintf(&bufptr[bcount], "%-10s\n",
 					 lnames[i].unit);
 		else
-			count += sprintf(&bufptr[count], "\n");
+			bcount += sprintf(&bufptr[bcount], "\n");
 	}
+	if (*ppos >= bcount)
+		goto out_task;
+ 
+	ccount = min(count, (size_t)(bcount-(*ppos)));
+	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
+	*ppos += ccount;
+	kfree(bufptr);
+ out_task:
+	put_task_struct(task);
+ out:
+	return ccount;
+}
 
+static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	char *buffer;
+	char *element, *vmc, *vmm;
+	struct rlimit new_rlim;
+	unsigned long flags;
+	int i;
+	int index = -1;
+	size_t wcount = 0;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ 
+ 
+	if (*ppos != 0)
+		goto out;
+ 
+	if (count > 128)
+		goto out;
+	buffer = kzalloc(128, GFP_KERNEL);
+ 
+	if (!buffer)
+		goto out;
+ 
+	element = kzalloc(sizeof(buffer), GFP_KERNEL);
+	vmc = kzalloc(sizeof(buffer), GFP_KERNEL);
+	vmm = kzalloc(sizeof(buffer), GFP_KERNEL);
+ 
+	if (!element || !vmm || !vmc)
+		goto out_free;
+ 
+	wcount = count - copy_from_user(buffer, buf, count);
+	if (wcount < count)
+		goto out_free;
+ 
+	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
+ 
+	if (i < 3)
+		goto out_free;
+ 
+	if (!strncmp(vmc, "unlimited", 9))
+		new_rlim.rlim_cur = RLIM_INFINITY;
+	else
+		new_rlim.rlim_cur = simple_strtoul(vmc, NULL, 10);
+ 
+	if (!strncmp(vmm, "unlimited", 9))
+		new_rlim.rlim_max = RLIM_INFINITY;
+	else
+		new_rlim.rlim_max = simple_strtoul(vmm, NULL, 10);
+ 
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		if (!strncmp(element, lnames[i].match,
+		     strlen(lnames[i].match))) {
+			index = i;
+			break;
+		}
+	}
+ 
+	if (!lock_task_sighand(task, &flags))
+		goto out_free;
+ 
+	if ((index >= 0) && (index < RLIM_NLIMITS))
+		do_setrlimit(index, &new_rlim, task);
+ 
+	unlock_task_sighand(task, &flags);
+ 
+  out_free:
+	kfree(element);
+	kfree(vmc);
+	kfree(vmm);
+	kfree(buffer);
+  out:
+	*ppos += count;
+	put_task_struct(task);
 	return count;
 }
 
+static const struct file_operations proc_limit_operations = {
+	.read           = proc_pid_limit_read,
+	.write          = proc_pid_limit_write,
+};
+
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 static int proc_pid_syscall(struct task_struct *task, char *buffer)
 {
@@ -2501,7 +2603,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("auxv",       S_IRUSR, proc_pid_auxv),
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	  S_IRUSR, proc_pid_limits),
+	REG("limits",	  S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
@@ -2836,7 +2938,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	 S_IRUSR, proc_pid_limits),
+	REG("limits",	 S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2be3760..be54f28 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -672,6 +672,9 @@ struct signal_struct {
 	int oom_adj;	/* OOM kill score adjustment (bit shift) */
 };
 
+extern int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+			struct task_struct *tsk);
+
 /* Context switch must be unlocked if interrupts are to be enabled */
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 # define __ARCH_WANT_UNLOCKED_CTXSW
diff --git a/kernel/sys.c b/kernel/sys.c
index 1828f8d..0e210a4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1238,41 +1238,41 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 
 #endif
 
-SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+		 struct task_struct *tsk)
 {
-	struct rlimit new_rlim, *old_rlim;
 	int retval;
+	struct rlimit *old_rlim;
 
-	if (resource >= RLIM_NLIMITS)
-		return -EINVAL;
-	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
-		return -EFAULT;
-	if (new_rlim.rlim_cur > new_rlim.rlim_max)
+
+	if (new_rlim->rlim_cur > new_rlim->rlim_max)
 		return -EINVAL;
-	old_rlim = current->signal->rlim + resource;
-	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
+	old_rlim = tsk->signal->rlim + resource;
+
+	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
 	    !capable(CAP_SYS_RESOURCE))
 		return -EPERM;
-	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+
+	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
 		return -EPERM;
 
-	retval = security_task_setrlimit(resource, &new_rlim);
+	retval = security_task_setrlimit(resource, new_rlim);
 	if (retval)
 		return retval;
 
-	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
+	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 		/*
 		 * The caller is asking for an immediate RLIMIT_CPU
 		 * expiry.  But we use the zero value to mean "it was
 		 * never set".  So let's cheat and make it one second
 		 * instead
 		 */
-		new_rlim.rlim_cur = 1;
+		new_rlim->rlim_cur = 1;
 	}
 
-	task_lock(current->group_leader);
-	*old_rlim = new_rlim;
-	task_unlock(current->group_leader);
+	task_lock(tsk->group_leader);
+	*old_rlim = *new_rlim;
+	task_unlock(tsk->group_leader);
 
 	if (resource != RLIMIT_CPU)
 		goto out;
@@ -1283,14 +1283,26 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	 * very long-standing error, and fixing it now risks breakage of
 	 * applications, so we live with it
 	 */
-	if (new_rlim.rlim_cur == RLIM_INFINITY)
+	if (new_rlim->rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	update_rlimit_cpu(new_rlim.rlim_cur);
+	update_rlimit_cpu(new_rlim->rlim_cur);
 out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+{
+	struct rlimit new_rlim;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	return do_setrlimit(resource, &new_rlim, current);
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v5)
  2009-10-12 16:13   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Neil Horman
  2009-10-12 16:20     ` [PATCH 1/3] " Neil Horman
@ 2009-10-12 16:25     ` Neil Horman
  2009-10-12 16:27     ` [PATCH 3/3] " Neil Horman
                       ` (2 subsequent siblings)
  4 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-12 16:25 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz

Add syscall infrastructure for getprlimit/setprlimit

This patch adds the definitions for the get/setprlimit syscalls.  They are
identical to the get/setlimit calls, except that they allow the caller to
manipulate limits for processes other than themselves.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 include/linux/syscalls.h |    4 ++
 kernel/sys.c             |   84 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace..9f357ab 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -702,11 +702,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long	sys_setprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
 asmlinkage long sys_umask(int mask);
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 0e210a4..6ca9e7f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1213,6 +1213,50 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	}
 }
 
+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	unsigned long flags;
+	struct task_struct *tsk;
+	struct pid *ppid;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EBUSY;
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	else {
+		struct rlimit val;
+
+		task_lock(tsk->group_leader);
+		val = current->signal->rlim[resource];
+		task_unlock(tsk->group_leader);
+		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
+	}
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
+
 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
 
 /*
@@ -1303,6 +1347,46 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	return do_setrlimit(resource, &new_rlim, current);
 }
 
+SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	struct task_struct *tsk;
+	struct pid *ppid;
+	unsigned long flags;
+	struct rlimit new_rlim;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EFAULT;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		goto out_put_all;
+
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	retval = do_setrlimit(resource, &new_rlim, tsk);
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 3/3] extend get/setrlimit to support setting rlimits external to a process (v5)
  2009-10-12 16:13   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Neil Horman
  2009-10-12 16:20     ` [PATCH 1/3] " Neil Horman
  2009-10-12 16:25     ` [PATCH 2/3] " Neil Horman
@ 2009-10-12 16:27     ` Neil Horman
  2009-10-12 20:13     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v6) Neil Horman
  2009-10-12 21:58     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Andrew Morton
  4 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-12 16:27 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, nhorman

Define __NR_getprlimit and __NR_setprlimit syscalls for asm-generic

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

unistd.h |    7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)


 unistd.h |    7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index d76b66a..4912f71 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -623,8 +623,13 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open 241
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
+#define __NR_getprlimit 242
+__SYSCALL(__NR_getprlimit, sys_getprlimit)
+#define __NR_setprlimit 243
+__SYSCALL(__NR_setprlimit, sys_setprlimit)
+
 #undef __NR_syscalls
-#define __NR_syscalls 242
+#define __NR_syscalls 244
 
 /*
  * All syscalls below here should go away really,

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v6)
  2009-10-12 16:13   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Neil Horman
                       ` (2 preceding siblings ...)
  2009-10-12 16:27     ` [PATCH 3/3] " Neil Horman
@ 2009-10-12 20:13     ` Neil Horman
  2009-10-12 20:20       ` [PATCH 1/3] " Neil Horman
                         ` (3 more replies)
  2009-10-12 21:58     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Andrew Morton
  4 siblings, 4 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-12 20:13 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, nhorman

Sign, marcin just pointed out that I'm an idiot (thanks :) ).  I posted a
version of this patch that was incorrect.  Heres the right one, with the all the
fixes he requested

Neil

Summary:

Its been requested often that we have the ability to read and modify process
rlimit values from contexts external to the owning process.  Ideally this allows
sysadmins to adjust rlimits on long running processes wihout the need to stop
and restart those processes, which incurs undesireable downtime.  This patch
enables that functionality,  It does so in two places.  First it enables process
limit setting by writing to the /proc/pid/limits file a string in the format:
<limit> <current limit> <max limit> > /proc/<pid>/limits
where limit is one of
[as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]

Secondly it allows for programatic setting of these limits via 2 new syscalls,
getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
setrlimit respectively, except that they except a process id as an extra
argument, to specify the process id of the rlimit values that you wish to
read/write

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>




^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v6)
  2009-10-12 20:13     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v6) Neil Horman
@ 2009-10-12 20:20       ` Neil Horman
  2009-10-12 20:23       ` [PATCH 2/3] " Neil Horman
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-12 20:20 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, nhorman

Augment /proc/<pid>/limits file to support limit setting

It was suggested to me recently that we support a mechanism by which we can set
various process limits from points external to the process.  The reasoning being
that some processes are very long lived, and it would be beneficial to these
long lived processes if we could modify their various limits without needing to
kill them, adjust the limits for the user and restarting them.  While individual
application can certainly export this control on their own, it would be nice if
such functionality were available to a sysadmin, without needing to have each
application re-invent the wheel.

As such, I've implemented the below patch, which makes /proc/pid/limits writable
for each process.  By writing the following format:
<limit> <current value> <max value>
to the limits file, an administrator can now dynamically change the limits for
the respective process.  Tested by myself with good results.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 Documentation/filesystems/proc.txt |   26 +++++
 fs/proc/base.c                     |  184 ++++++++++++++++++++++++++++++-------
 include/linux/sched.h              |    3 
 kernel/sys.c                       |   48 ++++++---
 4 files changed, 209 insertions(+), 52 deletions(-)


diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 2c48f94..62fd7f5 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -38,6 +38,7 @@ Table of Contents
   3.3	/proc/<pid>/io - Display the IO accounting fields
   3.4	/proc/<pid>/coredump_filter - Core dump filtering settings
   3.5	/proc/<pid>/mountinfo - Information about mounts
+  3.6	/proc/<pid>/limits - Information about process rlimit value
 
 
 ------------------------------------------------------------------------------
@@ -1408,3 +1409,28 @@ For more information on mount propagation see:
 
   Documentation/filesystems/sharedsubtree.txt
 
+3.6	/proc/<pid>/limits - Information about rlimit values
+------------------------------------------------------------
+
+This file contains information regarding the processes rlimit settings.
+Normally this information is only available programatically via the
+getrlimit/setrlimit syscalls.  This file exports it so that sysadmins may
+dyanmically see their values.  This file contains lines of the form:
+
+Limit     Set String     Soft Limit     Hard Limit     Units 
+
+Limit - A description of the limit
+Set String - A consise string defining the limit meaning
+Soft Limit - The rlim_cur value returned by getrlimit for the corresponding limit
+Hard Limit - The rlim_max value returned by getrlimit for the corresponding limit
+Units	   - The units that the given limit is measured in
+
+Limits for a given process can also be set by writing to this file by writing a
+string in the following format:
+<Set String> [value|"unlimited"] [value|"unlimited"] > proc/<pid>/limits
+
+For example to set the maximum core files size for process 2000 to a soft limit
+of 1024 bytes and a max limit of unlimited, we would do the following from a
+shell prompt:
+echo core 1024 unlimited > /proc/2000/limits
+
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd5bed0..69d2a55 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -49,6 +49,8 @@
 
 #include <asm/uaccess.h>
 
+#include <linux/string.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
@@ -456,72 +458,186 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 struct limit_names {
 	char *name;
 	char *unit;
+	char *match;
 };
 
 static const struct limit_names lnames[RLIM_NLIMITS] = {
-	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
-	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
-	[RLIMIT_DATA] = {"Max data size", "bytes"},
-	[RLIMIT_STACK] = {"Max stack size", "bytes"},
-	[RLIMIT_CORE] = {"Max core file size", "bytes"},
-	[RLIMIT_RSS] = {"Max resident set", "bytes"},
-	[RLIMIT_NPROC] = {"Max processes", "processes"},
-	[RLIMIT_NOFILE] = {"Max open files", "files"},
-	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
-	[RLIMIT_AS] = {"Max address space", "bytes"},
-	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
-	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
-	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
-	[RLIMIT_NICE] = {"Max nice priority", NULL},
-	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
-	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
+	[RLIMIT_CPU] = {"Max cpu time", "ms", "cpu"},
+	[RLIMIT_FSIZE] = {"Max file size", "bytes", "fsize"},
+	[RLIMIT_DATA] = {"Max data size", "bytes", "data"},
+	[RLIMIT_STACK] = {"Max stack size", "bytes", "stack"},
+	[RLIMIT_CORE] = {"Max core file size", "bytes", "core"},
+	[RLIMIT_RSS] = {"Max resident set", "bytes", "rss"},
+	[RLIMIT_NPROC] = {"Max processes", "processes", "nproc"},
+	[RLIMIT_NOFILE] = {"Max open files", "files", "nofile"},
+	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes", "memlock"},
+	[RLIMIT_AS] = {"Max address space", "bytes", "as"},
+	[RLIMIT_LOCKS] = {"Max file locks", "locks", "locks"},
+	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals", "sigpending"},
+	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes", "msgqueue"},
+	[RLIMIT_NICE] = {"Max nice priority", NULL, "nice"},
+	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL, "rtprio"},
+	[RLIMIT_RTTIME] = {"Max realtime timeout", "us", "rttime"},
 };
 
 /* Display limits for a process */
-static int proc_pid_limits(struct task_struct *task, char *buffer)
+static ssize_t proc_pid_limit_read(struct file *file, char __user *buf,
+		size_t count, loff_t *ppos)
 {
 	unsigned int i;
-	int count = 0;
 	unsigned long flags;
-	char *bufptr = buffer;
+	char *bufptr;
+	size_t bcount = 0;
+	size_t ccount = -ENOMEM;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 
 	struct rlimit rlim[RLIM_NLIMITS];
 
+	bufptr = kzalloc((RLIM_NLIMITS+1)*90, GFP_KERNEL);
+	if (!bufptr)
+		goto out;
+
+	ccount = -EBUSY;
+
 	if (!lock_task_sighand(task, &flags))
-		return 0;
+		goto out_free;
+
 	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
 	unlock_task_sighand(task, &flags);
 
 	/*
 	 * print the file header
 	 */
-	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
-			"Limit", "Soft Limit", "Hard Limit", "Units");
+	bcount += sprintf(&bufptr[bcount], "%-25s %-12s %-20s %-20s %-10s\n",
+			"Limit", "Set String", "Soft Limit", "Hard Limit", "Units");
 
 	for (i = 0; i < RLIM_NLIMITS; i++) {
 		if (rlim[i].rlim_cur == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-25s %-20s ",
-					 lnames[i].name, "unlimited");
+			bcount += sprintf(&bufptr[bcount], "%-25s %-12s %-20s ",
+					lnames[i].name ,lnames[i].match,
+					"unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-25s %-20lu ",
-					 lnames[i].name, rlim[i].rlim_cur);
-
+			bcount += sprintf(&bufptr[bcount], "%-25s %-12s %-20lu ",
+					lnames[i].name, lnames[i].match,
+					rlim[i].rlim_cur);
 		if (rlim[i].rlim_max == RLIM_INFINITY)
-			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+			bcount += sprintf(&bufptr[bcount], "%-20s ",
+					"unlimited");
 		else
-			count += sprintf(&bufptr[count], "%-20lu ",
+			bcount += sprintf(&bufptr[bcount], "%-20lu ",
 					 rlim[i].rlim_max);
-
 		if (lnames[i].unit)
-			count += sprintf(&bufptr[count], "%-10s\n",
+			bcount += sprintf(&bufptr[bcount], "%-10s\n",
 					 lnames[i].unit);
 		else
-			count += sprintf(&bufptr[count], "\n");
+			bcount += sprintf(&bufptr[bcount], "\n");
+	}
+
+	ccount = -EMSGSIZE;
+
+	if (*ppos >= bcount)
+		goto out_task;
+ 
+	ccount = min(count, (size_t)(bcount-(*ppos)));
+	ccount = ccount - copy_to_user(buf, &bufptr[*ppos], ccount);
+	*ppos += ccount;
+out_task:
+	put_task_struct(task);
+out_free:
+	kfree(bufptr);
+out:
+	return ccount;
+}
+
+#define PROC_PID_BUF_SZ 128
+static ssize_t proc_pid_limit_write(struct file *file, const char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	char *buffer;
+	char *element, *vmc, *vmm;
+	struct rlimit new_rlim;
+	unsigned long flags;
+	int i;
+	int index = -1;
+	size_t wcount = -EMSGSIZE;
+	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ 
+ 
+	if (*ppos != 0)
+		goto out;
+ 
+	if (count > PROC_PID_BUF_SZ)
+		goto out;
+
+	wcount = -ENOMEM;
+	buffer = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
+ 
+	if (!buffer)
+		goto out;
+ 
+	element = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
+	vmc = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
+	vmm = kzalloc(PROC_PID_BUF_SZ, GFP_KERNEL);
+ 
+	if (!element || !vmm || !vmc)
+		goto out_free;
+
+	wcount = -EFAULT;
+ 
+	if (copy_from_user(buffer, buf, count))
+		goto out_free;
+ 
+	i = sscanf(buffer, "%s %s %s", element, vmc, vmm);
+ 
+	if (i < 3)
+		goto out_free;
+ 
+	if (!strncmp(vmc, "unlimited", 9))
+		new_rlim.rlim_cur = RLIM_INFINITY;
+	else
+		new_rlim.rlim_cur = simple_strtoul(vmc, NULL, 10);
+ 
+	if (!strncmp(vmm, "unlimited", 9))
+		new_rlim.rlim_max = RLIM_INFINITY;
+	else
+		new_rlim.rlim_max = simple_strtoul(vmm, NULL, 10);
+ 
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		if (!strncmp(element, lnames[i].match,
+		     strlen(lnames[i].match))) {
+			index = i;
+			break;
+		}
 	}
 
+	wcount = -EBUSY;
+ 
+	if (!lock_task_sighand(task, &flags))
+		goto out_free;
+
+	wcount = -ENOENT;
+ 
+	if ((index >= 0) && (index < RLIM_NLIMITS))
+		wcount = do_setrlimit(index, &new_rlim, task);
+ 
+	unlock_task_sighand(task, &flags);
+ 
+out_free:
+	kfree(element);
+	kfree(vmc);
+	kfree(vmm);
+	kfree(buffer);
+out:
+	*ppos += count;
+	put_task_struct(task);
 	return count;
 }
 
+static const struct file_operations proc_limit_operations = {
+	.read           = proc_pid_limit_read,
+	.write          = proc_pid_limit_write,
+};
+
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 static int proc_pid_syscall(struct task_struct *task, char *buffer)
 {
@@ -2501,7 +2617,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("auxv",       S_IRUSR, proc_pid_auxv),
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	  S_IRUSR, proc_pid_limits),
+	REG("limits",	  S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
@@ -2836,7 +2952,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	 S_IRUSR, proc_pid_limits),
+	REG("limits",	 S_IRUSR|S_IWUSR, proc_limit_operations),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2be3760..be54f28 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -672,6 +672,9 @@ struct signal_struct {
 	int oom_adj;	/* OOM kill score adjustment (bit shift) */
 };
 
+extern int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+			struct task_struct *tsk);
+
 /* Context switch must be unlocked if interrupts are to be enabled */
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 # define __ARCH_WANT_UNLOCKED_CTXSW
diff --git a/kernel/sys.c b/kernel/sys.c
index 1828f8d..0e210a4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1238,41 +1238,41 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 
 #endif
 
-SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+		 struct task_struct *tsk)
 {
-	struct rlimit new_rlim, *old_rlim;
 	int retval;
+	struct rlimit *old_rlim;
 
-	if (resource >= RLIM_NLIMITS)
-		return -EINVAL;
-	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
-		return -EFAULT;
-	if (new_rlim.rlim_cur > new_rlim.rlim_max)
+
+	if (new_rlim->rlim_cur > new_rlim->rlim_max)
 		return -EINVAL;
-	old_rlim = current->signal->rlim + resource;
-	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
+	old_rlim = tsk->signal->rlim + resource;
+
+	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
 	    !capable(CAP_SYS_RESOURCE))
 		return -EPERM;
-	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+
+	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
 		return -EPERM;
 
-	retval = security_task_setrlimit(resource, &new_rlim);
+	retval = security_task_setrlimit(resource, new_rlim);
 	if (retval)
 		return retval;
 
-	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
+	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 		/*
 		 * The caller is asking for an immediate RLIMIT_CPU
 		 * expiry.  But we use the zero value to mean "it was
 		 * never set".  So let's cheat and make it one second
 		 * instead
 		 */
-		new_rlim.rlim_cur = 1;
+		new_rlim->rlim_cur = 1;
 	}
 
-	task_lock(current->group_leader);
-	*old_rlim = new_rlim;
-	task_unlock(current->group_leader);
+	task_lock(tsk->group_leader);
+	*old_rlim = *new_rlim;
+	task_unlock(tsk->group_leader);
 
 	if (resource != RLIMIT_CPU)
 		goto out;
@@ -1283,14 +1283,26 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	 * very long-standing error, and fixing it now risks breakage of
 	 * applications, so we live with it
 	 */
-	if (new_rlim.rlim_cur == RLIM_INFINITY)
+	if (new_rlim->rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	update_rlimit_cpu(new_rlim.rlim_cur);
+	update_rlimit_cpu(new_rlim->rlim_cur);
 out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+{
+	struct rlimit new_rlim;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	return do_setrlimit(resource, &new_rlim, current);
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v6)
  2009-10-12 20:13     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v6) Neil Horman
  2009-10-12 20:20       ` [PATCH 1/3] " Neil Horman
@ 2009-10-12 20:23       ` Neil Horman
  2009-10-12 20:25       ` [PATCH 3/3] " Neil Horman
  2009-10-20  0:52       ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Neil Horman
  3 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-12 20:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, nhorman

Add syscall infrastructure for getprlimit/setprlimit

This patch adds the definitions for the get/setprlimit syscalls.  They are
identical to the get/setlimit calls, except that they allow the caller to
manipulate limits for processes other than themselves.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 include/linux/syscalls.h |    4 ++
 kernel/sys.c             |   84 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace..9f357ab 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -702,11 +702,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long	sys_setprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
 asmlinkage long sys_umask(int mask);
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 0e210a4..6ca9e7f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1213,6 +1213,50 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	}
 }
 
+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	unsigned long flags;
+	struct task_struct *tsk;
+	struct pid *ppid;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EBUSY;
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	else {
+		struct rlimit val;
+
+		task_lock(tsk->group_leader);
+		val = current->signal->rlim[resource];
+		task_unlock(tsk->group_leader);
+		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
+	}
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
+
 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
 
 /*
@@ -1303,6 +1347,46 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	return do_setrlimit(resource, &new_rlim, current);
 }
 
+SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	struct task_struct *tsk;
+	struct pid *ppid;
+	unsigned long flags;
+	struct rlimit new_rlim;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EFAULT;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		goto out_put_all;
+
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	retval = do_setrlimit(resource, &new_rlim, tsk);
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 3/3] extend get/setrlimit to support setting rlimits external to a process (v6)
  2009-10-12 20:13     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v6) Neil Horman
  2009-10-12 20:20       ` [PATCH 1/3] " Neil Horman
  2009-10-12 20:23       ` [PATCH 2/3] " Neil Horman
@ 2009-10-12 20:25       ` Neil Horman
  2009-10-20  0:52       ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Neil Horman
  3 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-12 20:25 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz

Define __NR_getprlimit and __NR_setprlimit syscalls for asm-generic

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 unistd.h |    7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index d76b66a..4912f71 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -623,8 +623,13 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open 241
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
+#define __NR_getprlimit 242
+__SYSCALL(__NR_getprlimit, sys_getprlimit)
+#define __NR_setprlimit 243
+__SYSCALL(__NR_setprlimit, sys_setprlimit)
+
 #undef __NR_syscalls
-#define __NR_syscalls 242
+#define __NR_syscalls 244
 
 /*
  * All syscalls below here should go away really,

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5)
  2009-10-12 16:13   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Neil Horman
                       ` (3 preceding siblings ...)
  2009-10-12 20:13     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v6) Neil Horman
@ 2009-10-12 21:58     ` Andrew Morton
  2009-10-13  0:06       ` Neil Horman
  4 siblings, 1 reply; 107+ messages in thread
From: Andrew Morton @ 2009-10-12 21:58 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, marcin.slusarz

On Mon, 12 Oct 2009 12:13:42 -0400
Neil Horman <nhorman@tuxdriver.com> wrote:

> Its been requested often that we have the ability to read and modify process
> rlimit values from contexts external to the owning process.  Ideally this allows
> sysadmins to adjust rlimits on long running processes wihout the need to stop
> and restart those processes, which incurs undesireable downtime.  This patch
> enables that functionality,  It does so in two places.  First it enables process
> limit setting by writing to the /proc/pid/limits file a string in the format:
> <limit> <current limit> <max limit> > /proc/<pid>/limits
> where limit is one of
> [as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]
> 
> Secondly it allows for programatic setting of these limits via 2 new syscalls,
> getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
> setrlimit respectively, except that they except a process id as an extra
> argument, to specify the process id of the rlimit values that you wish to
> read/write

I'm still not seeing why we need the /proc interface.

We've been using a syscall to set rlimits for ever and we've survived.

It just adds bloat and complexity to the kernel because putting a
100-line tool into util-linux is All Too Hard.


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5)
  2009-10-12 21:58     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Andrew Morton
@ 2009-10-13  0:06       ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-13  0:06 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, marcin.slusarz

On Mon, Oct 12, 2009 at 02:58:10PM -0700, Andrew Morton wrote:
> On Mon, 12 Oct 2009 12:13:42 -0400
> Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > Its been requested often that we have the ability to read and modify process
> > rlimit values from contexts external to the owning process.  Ideally this allows
> > sysadmins to adjust rlimits on long running processes wihout the need to stop
> > and restart those processes, which incurs undesireable downtime.  This patch
> > enables that functionality,  It does so in two places.  First it enables process
> > limit setting by writing to the /proc/pid/limits file a string in the format:
> > <limit> <current limit> <max limit> > /proc/<pid>/limits
> > where limit is one of
> > [as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]
> > 
> > Secondly it allows for programatic setting of these limits via 2 new syscalls,
> > getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
> > setrlimit respectively, except that they except a process id as an extra
> > argument, to specify the process id of the rlimit values that you wish to
> > read/write
> 
> I'm still not seeing why we need the /proc interface.
> 
> We've been using a syscall to set rlimits for ever and we've survived.
> 
Except that we haven't.  We've had the read side of the proc interface for years
now, simply because people asked for it.  We could have add getprlimit back
then, but we didn't because users liked this.  Its easy, its obvious, and and it
doesn't require a sysadmin to remember the name of another utility.

> It just adds bloat and complexity to the kernel because putting a
> 100-line tool into util-linux is All Too Hard.
> 
And, with the adjustments to support the getprlimit/setprlimit syscalls in this
series, the net additional code to support a writeable /proc interface totals
about 80 lines.  I really think 'bloat' is a bit of an overstatement here.  If
you're really that worried about it, we can surround the proc interface with a
config option.  But at this point people have become acoustomed to having
/proc/pid/limits available, I don't see why 80 lines to add the ability to set
limits is really that much of a barrier.

Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-10-12 20:13     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v6) Neil Horman
                         ` (2 preceding siblings ...)
  2009-10-12 20:25       ` [PATCH 3/3] " Neil Horman
@ 2009-10-20  0:52       ` Neil Horman
  2009-10-20  0:53         ` [PATCH 1/3] " Neil Horman
                           ` (4 more replies)
  3 siblings, 5 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-20  0:52 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, nhorman, tglx, mingo, hpa

Ok, I give.  I was hoping that some of the requestors of this feature would pipe
up and support the use case for the proc file interface to set limits.  clearly
they're not that interested, but I still think theres merit in the patch.  So
heres version 7 of this patch set.  Its the same as before, but the proc
interface has been dropped, leaving only the syscall interface behind.  I've
tested the interface on intel 32 and 64 bit, with success

Summary:

Its been requested often that we have the ability to read and modify process
rlimit values from contexts external to the owning process.  Ideally this allows
sysadmins to adjust rlimits on long running processes wihout the need to stop
and restart those processes, which incurs undesireable downtime.  This patch
enables that functionality,  It does so in two places.  First it enables process
limit setting by writing to the /proc/pid/limits file a string in the format:
<limit> <current limit> <max limit> > /proc/<pid>/limits
where limit is one of
[as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]

Secondly it allows for programatic setting of these limits via 2 new syscalls,
getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
setrlimit respectively, except that they except a process id as an extra
argument, to specify the process id of the rlimit values that you wish to
read/write

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
 
 
 
 

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 1/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-10-20  0:52       ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Neil Horman
@ 2009-10-20  0:53         ` Neil Horman
  2009-10-20  0:54         ` [PATCH 2/3] " Neil Horman
                           ` (3 subsequent siblings)
  4 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-20  0:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, tglx, mingo, hpa, nhorman

    Modify setrlimit syscall to accomodate various usages.
    
    Split the sys_setrlimit syscall into two parts:
    1) a DEFINE_SYSCALL wrapper that implements the sys_setrlimit function
    2) a core do_setrlimit function that accepts as a parameter a task on which to
    operate on the limits of
    
    This allows us later to implement sys_setprlimit, which allows us to change the
    limits of any process from userspace
    
    Signed-off-by: Neil Horman <nhorman@tuxdriver.com


 include/linux/sched.h |    3 +++
 kernel/sys.c          |   48 ++++++++++++++++++++++++++++++------------------
 2 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7755763..ce95005 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -672,6 +672,9 @@ struct signal_struct {
 	int oom_adj;	/* OOM kill score adjustment (bit shift) */
 };
 
+extern int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+			struct task_struct *tsk);
+
 /* Context switch must be unlocked if interrupts are to be enabled */
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 # define __ARCH_WANT_UNLOCKED_CTXSW
diff --git a/kernel/sys.c b/kernel/sys.c
index 1828f8d..0e210a4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1238,41 +1238,41 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 
 #endif
 
-SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+int do_setrlimit(unsigned int resource, struct rlimit *new_rlim,
+		 struct task_struct *tsk)
 {
-	struct rlimit new_rlim, *old_rlim;
 	int retval;
+	struct rlimit *old_rlim;
 
-	if (resource >= RLIM_NLIMITS)
-		return -EINVAL;
-	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
-		return -EFAULT;
-	if (new_rlim.rlim_cur > new_rlim.rlim_max)
+
+	if (new_rlim->rlim_cur > new_rlim->rlim_max)
 		return -EINVAL;
-	old_rlim = current->signal->rlim + resource;
-	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
+	old_rlim = tsk->signal->rlim + resource;
+
+	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
 	    !capable(CAP_SYS_RESOURCE))
 		return -EPERM;
-	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
+
+	if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open)
 		return -EPERM;
 
-	retval = security_task_setrlimit(resource, &new_rlim);
+	retval = security_task_setrlimit(resource, new_rlim);
 	if (retval)
 		return retval;
 
-	if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) {
+	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 		/*
 		 * The caller is asking for an immediate RLIMIT_CPU
 		 * expiry.  But we use the zero value to mean "it was
 		 * never set".  So let's cheat and make it one second
 		 * instead
 		 */
-		new_rlim.rlim_cur = 1;
+		new_rlim->rlim_cur = 1;
 	}
 
-	task_lock(current->group_leader);
-	*old_rlim = new_rlim;
-	task_unlock(current->group_leader);
+	task_lock(tsk->group_leader);
+	*old_rlim = *new_rlim;
+	task_unlock(tsk->group_leader);
 
 	if (resource != RLIMIT_CPU)
 		goto out;
@@ -1283,14 +1283,26 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	 * very long-standing error, and fixing it now risks breakage of
 	 * applications, so we live with it
 	 */
-	if (new_rlim.rlim_cur == RLIM_INFINITY)
+	if (new_rlim->rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	update_rlimit_cpu(new_rlim.rlim_cur);
+	update_rlimit_cpu(new_rlim->rlim_cur);
 out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
+{
+	struct rlimit new_rlim;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	return do_setrlimit(resource, &new_rlim, current);
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-10-20  0:52       ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Neil Horman
  2009-10-20  0:53         ` [PATCH 1/3] " Neil Horman
@ 2009-10-20  0:54         ` Neil Horman
  2009-11-02 15:10           ` Ingo Molnar
  2009-10-20  0:55         ` [PATCH 3/3] " Neil Horman
                           ` (2 subsequent siblings)
  4 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-10-20  0:54 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, tglx, mingo, hpa, nhorman

    Implement sys_getprlimit and sys_setprlimit syscalls
    
    This patch adds the code to support hte sys_setprlimit and set_getprlimit
    syscalls which modify the rlim values of a selected process
    
    Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 include/linux/syscalls.h |    4 ++
 kernel/sys.c             |   84 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace..9f357ab 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -702,11 +702,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long	sys_setprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
 asmlinkage long sys_umask(int mask);
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 0e210a4..6ca9e7f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1213,6 +1213,50 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	}
 }
 
+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	unsigned long flags;
+	struct task_struct *tsk;
+	struct pid *ppid;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EBUSY;
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	else {
+		struct rlimit val;
+
+		task_lock(tsk->group_leader);
+		val = current->signal->rlim[resource];
+		task_unlock(tsk->group_leader);
+		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
+	}
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
+
 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
 
 /*
@@ -1303,6 +1347,46 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	return do_setrlimit(resource, &new_rlim, current);
 }
 
+SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	struct task_struct *tsk;
+	struct pid *ppid;
+	unsigned long flags;
+	struct rlimit new_rlim;
+	int retval = -EINVAL;
+
+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EFAULT;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		goto out_put_all;
+
+	if (!lock_task_sighand(tsk, &flags))
+		goto out_put_all;
+
+	retval = do_setrlimit(resource, &new_rlim, tsk);
+
+	unlock_task_sighand(tsk, &flags);
+
+out_put_all:
+	put_task_struct(tsk);
+out_put_pid:
+	put_pid(ppid);
+out:
+	return retval;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 3/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-10-20  0:52       ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Neil Horman
  2009-10-20  0:53         ` [PATCH 1/3] " Neil Horman
  2009-10-20  0:54         ` [PATCH 2/3] " Neil Horman
@ 2009-10-20  0:55         ` Neil Horman
  2009-10-28 14:44         ` [PATCH 0/3] " Neil Horman
  2009-11-02 15:25         ` Ingo Molnar
  4 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-20  0:55 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, tglx, mingo, hpa, nhorman

    Add __NR_[get|set]prlimit syscall numbers to asm-generic.  I'm also adding them
    to asm-x86 since I was able to test on 32 and 64 bit intel locally here.
    
    Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 arch/x86/include/asm/unistd_32.h |    4 +++-
 arch/x86/include/asm/unistd_64.h |    4 ++++
 include/asm-generic/unistd.h     |    7 ++++++-
 include/asm-x86/asm-offsets.h    |    2 +-
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6fb3c20..06dbb34 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -342,10 +342,12 @@
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
 #define __NR_perf_event_open	336
+#define __NR_getprlimit		337
+#define __NR_setprlimit		338
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 337
+#define NR_syscalls 339
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 8d3ad0a..48ea56c 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -661,6 +661,10 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open			298
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_getprlimit				299
+__SYSCALL(__NR_getprlimit, sys_getprlimit)
+#define __NR_setprlimit				300
+__SYSCALL(__NR_setprlimit, sys_setprlimit)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index d76b66a..4912f71 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -623,8 +623,13 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open 241
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
+#define __NR_getprlimit 242
+__SYSCALL(__NR_getprlimit, sys_getprlimit)
+#define __NR_setprlimit 243
+__SYSCALL(__NR_setprlimit, sys_setprlimit)
+
 #undef __NR_syscalls
-#define __NR_syscalls 242
+#define __NR_syscalls 244
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/asm-x86/asm-offsets.h b/include/asm-x86/asm-offsets.h
index 9858ff0..6a26521 100644
--- a/include/asm-x86/asm-offsets.h
+++ b/include/asm-x86/asm-offsets.h
@@ -74,7 +74,7 @@
 
 #define crypto_tfm_ctx_offset 88 /* offsetof(struct crypto_tfm, __crt_ctx)	# */
 
-#define __NR_syscall_max 298 /* sizeof(syscalls) - 1	# */
+#define __NR_syscall_max 300 /* sizeof(syscalls) - 1	# */
 
 #define BP_scratch 484 /* offsetof(struct boot_params, scratch)	# */
 #define BP_loadflags 529 /* offsetof(struct boot_params, hdr.loadflags)	# */

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-10-20  0:52       ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Neil Horman
                           ` (2 preceding siblings ...)
  2009-10-20  0:55         ` [PATCH 3/3] " Neil Horman
@ 2009-10-28 14:44         ` Neil Horman
  2009-10-30 18:24           ` Neil Horman
  2009-11-02 15:25         ` Ingo Molnar
  4 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-10-28 14:44 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, tglx, mingo, hpa, nhorman

On Mon, Oct 19, 2009 at 08:52:15PM -0400, Neil Horman wrote:
> Ok, I give.  I was hoping that some of the requestors of this feature would pipe
> up and support the use case for the proc file interface to set limits.  clearly
> they're not that interested, but I still think theres merit in the patch.  So
> heres version 7 of this patch set.  Its the same as before, but the proc
> interface has been dropped, leaving only the syscall interface behind.  I've
> tested the interface on intel 32 and 64 bit, with success
> 
> Summary:
> 
> Its been requested often that we have the ability to read and modify process
> rlimit values from contexts external to the owning process.  Ideally this allows
> sysadmins to adjust rlimits on long running processes wihout the need to stop
> and restart those processes, which incurs undesireable downtime.  This patch
> enables that functionality,  It does so in two places.  First it enables process
> limit setting by writing to the /proc/pid/limits file a string in the format:
> <limit> <current limit> <max limit> > /proc/<pid>/limits
> where limit is one of
> [as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]
> 
> Secondly it allows for programatic setting of these limits via 2 new syscalls,
> getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
> setrlimit respectively, except that they except a process id as an extra
> argument, to specify the process id of the rlimit values that you wish to
> read/write
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>  
>  
>  
>  
> 

Ping?  Any thoughts
Regards
Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-10-28 14:44         ` [PATCH 0/3] " Neil Horman
@ 2009-10-30 18:24           ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-10-30 18:24 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, marcin.slusarz, tglx, mingo, hpa

On Wed, Oct 28, 2009 at 10:44:26AM -0400, Neil Horman wrote:
> On Mon, Oct 19, 2009 at 08:52:15PM -0400, Neil Horman wrote:
> > Ok, I give.  I was hoping that some of the requestors of this feature would pipe
> > up and support the use case for the proc file interface to set limits.  clearly
> > they're not that interested, but I still think theres merit in the patch.  So
> > heres version 7 of this patch set.  Its the same as before, but the proc
> > interface has been dropped, leaving only the syscall interface behind.  I've
> > tested the interface on intel 32 and 64 bit, with success
> > 
> > Summary:
> > 
> > Its been requested often that we have the ability to read and modify process
> > rlimit values from contexts external to the owning process.  Ideally this allows
> > sysadmins to adjust rlimits on long running processes wihout the need to stop
> > and restart those processes, which incurs undesireable downtime.  This patch
> > enables that functionality,  It does so in two places.  First it enables process
> > limit setting by writing to the /proc/pid/limits file a string in the format:
> > <limit> <current limit> <max limit> > /proc/<pid>/limits
> > where limit is one of
> > [as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]
> > 
> > Secondly it allows for programatic setting of these limits via 2 new syscalls,
> > getprlimit, and setprlimit, which act in an identical fashion to getrlimit and
> > setrlimit respectively, except that they except a process id as an extra
> > argument, to specify the process id of the rlimit values that you wish to
> > read/write
> > 
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> >  
> >  
> >  
> >  
> > 
> 
> Ping?  Any thoughts
> Regards
> Neil

Ping?
Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-10-20  0:54         ` [PATCH 2/3] " Neil Horman
@ 2009-11-02 15:10           ` Ingo Molnar
  2009-11-02 17:40             ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Ingo Molnar @ 2009-11-02 15:10 UTC (permalink / raw)
  To: Neil Horman; +Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa


two small comments:

this is a really dangerous pattern:

> +	if (!lock_task_sighand(tsk, &flags))
> +		goto out_put_all;
> +

as it's followed by:

> +	else {
> +		struct rlimit val;
> +
> +		task_lock(tsk->group_leader);
> +		val = current->signal->rlim[resource];
> +		task_unlock(tsk->group_leader);
> +		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
> +	}

please turn that into the regular:

	if () {
		...
	} else {
		...
	}

pattern that is a lot harder to mess up later on.

> +	return retval;
> +}
> +
> +
>  #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT

one too many newlines?

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-10-20  0:52       ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Neil Horman
                           ` (3 preceding siblings ...)
  2009-10-28 14:44         ` [PATCH 0/3] " Neil Horman
@ 2009-11-02 15:25         ` Ingo Molnar
  2009-11-02 17:54           ` Neil Horman
  4 siblings, 1 reply; 107+ messages in thread
From: Ingo Molnar @ 2009-11-02 15:25 UTC (permalink / raw)
  To: Neil Horman
  Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa, Linus Torvalds


* Neil Horman <nhorman@tuxdriver.com> wrote:

> Ok, I give.  I was hoping that some of the requestors of this feature 
> would pipe up and support the use case for the proc file interface to 
> set limits.  clearly they're not that interested, but I still think 
> theres merit in the patch.  So heres version 7 of this patch set.  Its 
> the same as before, but the proc interface has been dropped, leaving 
> only the syscall interface behind.  I've tested the interface on intel 
> 32 and 64 bit, with success
> 
> Summary:
> 
> Its been requested often that we have the ability to read and modify 
> process rlimit values from contexts external to the owning process.  
> Ideally this allows sysadmins to adjust rlimits on long running 
> processes wihout the need to stop and restart those processes, which 
> incurs undesireable downtime.  This patch enables that functionality, 
> It does so in two places.  First it enables process limit setting by 
> writing to the /proc/pid/limits file a string in the format: <limit> 
> <current limit> <max limit> > /proc/<pid>/limits where limit is one of 
> [as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]
> 
> Secondly it allows for programatic setting of these limits via 2 new 
> syscalls, getprlimit, and setprlimit, which act in an identical 
> fashion to getrlimit and setrlimit respectively, except that they 
> except a process id as an extra argument, to specify the process id of 
> the rlimit values that you wish to read/write

This looks potentially useful but i think the implementation might be 
too optimistic, from a security POV.

Have you ensured that no rlimit gets propagated during task init into 
some other value - under the previously correct assumption that rlimits 
dont change asynchronously under the feet of tasks?

Also, there's SMP safety: right now all the accesses to 
current->signal->rlim[] are unlocked and assume that if we are executing 
in a syscall those values cannot change. Is this a safe assumption on 
all SMP architectures?

Plus, the locking looks structured in a weird way: why is the 
sighand-lock taken in the procfs code instead of moving it where the 
data structure is updated (the resource limits code).

Also, a patch submission observation: every single patch you submitted 
here had a messed up title that had a 'Re: ' in it, making it hard to 
sort out what is the latest. Some of the patches also had their 
changelog indented. Please use the standard patch submission methods.

So this patch-set needs more work.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 2/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-02 15:10           ` Ingo Molnar
@ 2009-11-02 17:40             ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-11-02 17:40 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa

On Mon, Nov 02, 2009 at 04:10:32PM +0100, Ingo Molnar wrote:
> 
> two small comments:
> 
> this is a really dangerous pattern:
> 
> > +	if (!lock_task_sighand(tsk, &flags))
> > +		goto out_put_all;
> > +
> 
> as it's followed by:
> 
> > +	else {
> > +		struct rlimit val;
> > +
> > +		task_lock(tsk->group_leader);
> > +		val = current->signal->rlim[resource];
> > +		task_unlock(tsk->group_leader);
> > +		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
> > +	}
> 
> please turn that into the regular:
> 
> 	if () {
> 		...
> 	} else {
> 		...
> 	}
> 
> pattern that is a lot harder to mess up later on.
> 
Yeah, ok, I can do that.

> > +	return retval;
> > +}
> > +
> > +
> >  #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
> 
> one too many newlines?
> 
Probably, I'll trim it.  Thanks.
Neil

> 	Ingo
> 

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-02 15:25         ` Ingo Molnar
@ 2009-11-02 17:54           ` Neil Horman
  2009-11-02 18:51             ` Ingo Molnar
  0 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-11-02 17:54 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On Mon, Nov 02, 2009 at 04:25:20PM +0100, Ingo Molnar wrote:
> 
> * Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > Ok, I give.  I was hoping that some of the requestors of this feature 
> > would pipe up and support the use case for the proc file interface to 
> > set limits.  clearly they're not that interested, but I still think 
> > theres merit in the patch.  So heres version 7 of this patch set.  Its 
> > the same as before, but the proc interface has been dropped, leaving 
> > only the syscall interface behind.  I've tested the interface on intel 
> > 32 and 64 bit, with success
> > 
> > Summary:
> > 
> > Its been requested often that we have the ability to read and modify 
> > process rlimit values from contexts external to the owning process.  
> > Ideally this allows sysadmins to adjust rlimits on long running 
> > processes wihout the need to stop and restart those processes, which 
> > incurs undesireable downtime.  This patch enables that functionality, 
> > It does so in two places.  First it enables process limit setting by 
> > writing to the /proc/pid/limits file a string in the format: <limit> 
> > <current limit> <max limit> > /proc/<pid>/limits where limit is one of 
> > [as,core,cpu,data,fsize,locks,memlock,msgqueue,nice,nofile,nproc,rss,rtprio,rttime]
> > 
> > Secondly it allows for programatic setting of these limits via 2 new 
> > syscalls, getprlimit, and setprlimit, which act in an identical 
> > fashion to getrlimit and setrlimit respectively, except that they 
> > except a process id as an extra argument, to specify the process id of 
> > the rlimit values that you wish to read/write
> 
> This looks potentially useful but i think the implementation might be 
> too optimistic, from a security POV.
> 
> Have you ensured that no rlimit gets propagated during task init into 
> some other value - under the previously correct assumption that rlimits 
> dont change asynchronously under the feet of tasks?
> 
I've looked, and the only place that I see the rlim array getting copied is via
copy_signal when we're in the clone path.  The entire rlim array is copied from
old task_struct to new task_struct under the protection of the
current->group_leader task lock, which I also hold when updating via
sys_setprlimit, so I think we're safe in this case.

> Also, there's SMP safety: right now all the accesses to 
> current->signal->rlim[] are unlocked and assume that if we are executing 
> in a syscall those values cannot change. Is this a safe assumption on 
> all SMP architectures?
> 
I was concerned about this too a bit, but looking at it all the RLIM's are 32
bit values.  So this is going to be an atomic write on any arch that is 32 bits
or larger (or am I mistaken there?).  Sure, there might be some caching effects
that result in an old value getting read on another processor, but such
inconsistencies should be short lived, shouldn't they?  Just thinking that
introducing locking to prevent such temporary inconsistencies might not be worth
the performance hit.  Certainly open to opinion on this though.

> Plus, the locking looks structured in a weird way: why is the 
> sighand-lock taken in the procfs code instead of moving it where the 
> data structure is updated (the resource limits code).
> 
Lack of change is really the answer.  Thats the way it was previously, and since
Andrew was interested in just going with the syscall implementation, I didn't
introduce more changes to the proc path.  figured I would update the proc path
when/if this got accpeted.

> Also, a patch submission observation: every single patch you submitted 
> here had a messed up title that had a 'Re: ' in it, making it hard to 
> sort out what is the latest. Some of the patches also had their 
Thats why I appended a version number to the end.

> changelog indented. Please use the standard patch submission methods.
> 
Yeah, sorry about that.

I'll make the changes you requested in your first note, let me know what you
think of the above, and I'll make further changes as needed.
Thanks!
Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-02 17:54           ` Neil Horman
@ 2009-11-02 18:51             ` Ingo Molnar
  2009-11-03  0:23               ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Ingo Molnar @ 2009-11-02 18:51 UTC (permalink / raw)
  To: Neil Horman
  Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa, Linus Torvalds


* Neil Horman <nhorman@tuxdriver.com> wrote:

> > Have you ensured that no rlimit gets propagated during task init 
> > into some other value - under the previously correct assumption that 
> > rlimits dont change asynchronously under the feet of tasks?
> 
> I've looked, and the only place that I see the rlim array getting 
> copied is via copy_signal when we're in the clone path.  The entire 
> rlim array is copied from old task_struct to new task_struct under the 
> protection of the current->group_leader task lock, which I also hold 
> when updating via sys_setprlimit, so I think we're safe in this case.

I mean - do we set up any data structure based on a particular rlimit, 
that can get out of sync with the rlimit being updated?

A prominent example would be the stack limit - we base address layout 
decisions on it. Check arch/x86/mm/mmap.c. RLIM_INFINITY has a special 
meaning plus we also set mmap_base() based on the rlim.

Also, there appears to be almost no security checks in the new syscall! 
We look up a PID but that's it - this code will allow unprivileged users 
to lower various rlimits of system daemons - as if it were their own 
limit. That's a rather big security hole.

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-02 18:51             ` Ingo Molnar
@ 2009-11-03  0:23               ` Neil Horman
  2009-11-04 11:26                 ` Ingo Molnar
  0 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-11-03  0:23 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On Mon, Nov 02, 2009 at 07:51:37PM +0100, Ingo Molnar wrote:
> 
> * Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > > Have you ensured that no rlimit gets propagated during task init 
> > > into some other value - under the previously correct assumption that 
> > > rlimits dont change asynchronously under the feet of tasks?
> > 
> > I've looked, and the only place that I see the rlim array getting 
> > copied is via copy_signal when we're in the clone path.  The entire 
> > rlim array is copied from old task_struct to new task_struct under the 
> > protection of the current->group_leader task lock, which I also hold 
> > when updating via sys_setprlimit, so I think we're safe in this case.
> 
> I mean - do we set up any data structure based on a particular rlimit, 
> that can get out of sync with the rlimit being updated?
> 
> A prominent example would be the stack limit - we base address layout 
> decisions on it. Check arch/x86/mm/mmap.c. RLIM_INFINITY has a special 
> meaning plus we also set mmap_base() based on the rlim.
> 
Ah, I didn't consider those.  Yes it looks like some locking might be needed for
cases like that.  what would you suggest, simply grabbing the task lock before
looking at the rlim array?  That seems a bit heavy handed, especially if we want
to use the locking consistently.  What if we just converted the int array of
rlimit to atomic_t's?  Would that be sufficient, or still to heavy?

> Also, there appears to be almost no security checks in the new syscall! 
> We look up a PID but that's it - this code will allow unprivileged users 
> to lower various rlimits of system daemons - as if it were their own 
> limit. That's a rather big security hole.
> 
Yeah, I kept all the old checks in place, but didn't consider that other
processes might need additional security checks,   I guess the rule needs to be
that the callers uid needs to have CAP_SYS_RESOURCE and must match the uid of
the process being modified or be 0/root.  Is that about right?

Regards
Neil

> 	Ingo
> 

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-03  0:23               ` Neil Horman
@ 2009-11-04 11:26                 ` Ingo Molnar
  2009-11-05 20:48                   ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Ingo Molnar @ 2009-11-04 11:26 UTC (permalink / raw)
  To: Neil Horman
  Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa, Linus Torvalds


* Neil Horman <nhorman@tuxdriver.com> wrote:

> On Mon, Nov 02, 2009 at 07:51:37PM +0100, Ingo Molnar wrote:
> > 
> > * Neil Horman <nhorman@tuxdriver.com> wrote:
> > 
> > > > Have you ensured that no rlimit gets propagated during task init 
> > > > into some other value - under the previously correct assumption that 
> > > > rlimits dont change asynchronously under the feet of tasks?
> > > 
> > > I've looked, and the only place that I see the rlim array getting 
> > > copied is via copy_signal when we're in the clone path.  The 
> > > entire rlim array is copied from old task_struct to new 
> > > task_struct under the protection of the current->group_leader task 
> > > lock, which I also hold when updating via sys_setprlimit, so I 
> > > think we're safe in this case.
> > 
> > I mean - do we set up any data structure based on a particular 
> > rlimit, that can get out of sync with the rlimit being updated?
> > 
> > A prominent example would be the stack limit - we base address 
> > layout decisions on it. Check arch/x86/mm/mmap.c. RLIM_INFINITY has 
> > a special meaning plus we also set mmap_base() based on the rlim.
> 
> Ah, I didn't consider those.  Yes it looks like some locking might be 
> needed for cases like that.  what would you suggest, simply grabbing 
> the task lock before looking at the rlim array?  That seems a bit 
> heavy handed, especially if we want to use the locking consistently.  
> What if we just converted the int array of rlimit to atomic_t's?  
> Would that be sufficient, or still to heavy?

The main problem isnt even atomicity (word sized, naturally aligned 
variables are read/written atomic already), but logical coherency and 
races: how robust is it to change the rlimit 'under' a task that is 
running those VM routines on another CPU right now? How robust is it to 
change a task from RLIM_INFINITY and affecting fundamental properties of 
its layout?

The answer might easily be: "it causes no security problems and we dont 
care about self-inflicted damage" - but we have to consider each usage 
site individually and list them in the changelog i suspect.

I checked some other rlimit uses (the VFS ones) and most of them seemed 
to be fine, at first glance.

What we do here is to introduce a completely new mode of access to an 
ancient and quite fundamental data structure of the kernel, so i think 
all the usage sites and side-effects should be thought through.

I wouldnt go so far to suggest explicit, heavy-handed locking - _most_ 
of the uses are single-use. I just wanted to point out the possibilities 
that should be considered before we can have warm fuzzy feelings about 
your patch.

Maybe a read wrapper that does an ACCESS_ONCE() would be prudent, in 
case compilers do something silly in the future.

> > Also, there appears to be almost no security checks in the new 
> > syscall! We look up a PID but that's it - this code will allow 
> > unprivileged users to lower various rlimits of system daemons - as 
> > if it were their own limit. That's a rather big security hole.
> 
> Yeah, I kept all the old checks in place, but didn't consider that 
> other processes might need additional security checks, I guess the 
> rule needs to be that the callers uid needs to have CAP_SYS_RESOURCE 
> and must match the uid of the process being modified or be 0/root.  Is 
> that about right?

I think the regular ptrace or signal security checks could be reused 
(sans the legacy components).
 
Those tend to be a (tiny) bit more than just a uid+capability check - 
they are a [fse]uid check, i.e. the path of denial should be something 
like:

        if ((cred->uid != tcred->euid ||
             cred->uid != tcred->suid ||
             cred->uid != tcred->uid  ||
             cred->gid != tcred->egid ||
             cred->gid != tcred->sgid ||
             cred->gid != tcred->gid) &&
            !capable(CAP_SYS_RESOURCE)) {

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-04 11:26                 ` Ingo Molnar
@ 2009-11-05 20:48                   ` Neil Horman
  2009-11-06  9:26                     ` Ingo Molnar
  0 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-11-05 20:48 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On Wed, Nov 04, 2009 at 12:26:32PM +0100, Ingo Molnar wrote:
> 
> * Neil Horman <nhorman@tuxdriver.com> wrote:
> 
> > On Mon, Nov 02, 2009 at 07:51:37PM +0100, Ingo Molnar wrote:
> > > 
> > > * Neil Horman <nhorman@tuxdriver.com> wrote:
> > > 
> > > > > Have you ensured that no rlimit gets propagated during task init 
> > > > > into some other value - under the previously correct assumption that 
> > > > > rlimits dont change asynchronously under the feet of tasks?
> > > > 
> > > > I've looked, and the only place that I see the rlim array getting 
> > > > copied is via copy_signal when we're in the clone path.  The 
> > > > entire rlim array is copied from old task_struct to new 
> > > > task_struct under the protection of the current->group_leader task 
> > > > lock, which I also hold when updating via sys_setprlimit, so I 
> > > > think we're safe in this case.
> > > 
> > > I mean - do we set up any data structure based on a particular 
> > > rlimit, that can get out of sync with the rlimit being updated?
> > > 
> > > A prominent example would be the stack limit - we base address 
> > > layout decisions on it. Check arch/x86/mm/mmap.c. RLIM_INFINITY has 
> > > a special meaning plus we also set mmap_base() based on the rlim.
> > 
> > Ah, I didn't consider those.  Yes it looks like some locking might be 
> > needed for cases like that.  what would you suggest, simply grabbing 
> > the task lock before looking at the rlim array?  That seems a bit 
> > heavy handed, especially if we want to use the locking consistently.  
> > What if we just converted the int array of rlimit to atomic_t's?  
> > Would that be sufficient, or still to heavy?

Just to provide a quick update on this, it appears that (unbeknowst to me), 
Jiri Slaby got almost this exact same feature in via the linux-next tree:
commits
ba9ba971a9241250646091935d77d2f31b7c15af
4a4a4e5f51d866284db401ea4d8ba5f0c91cc1eb
c1b9b7eaf7386a7f142d59a2bb433ac8217b0ad1

It still likely needs an audit to make sure theres no race with task access on
the rlimit array, but it doesn't currently require additional security checks
because the only access for a process to another processes limits is by writing
to the /proc/<pid>/limits file, as I had initial proposed.  I think theres still
value in the sysscall, so I'll keep going with that aspect, but the rest of the
work appears done.

Regards

Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-05 20:48                   ` Neil Horman
@ 2009-11-06  9:26                     ` Ingo Molnar
  2009-11-06 10:00                       ` Jiri Slaby
  2009-11-09  8:54                       ` Jiri Slaby
  0 siblings, 2 replies; 107+ messages in thread
From: Ingo Molnar @ 2009-11-06  9:26 UTC (permalink / raw)
  To: Neil Horman, Jiri Slaby, Stephen Rothwell
  Cc: linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa, Linus Torvalds


* Neil Horman <nhorman@tuxdriver.com> wrote:

> On Wed, Nov 04, 2009 at 12:26:32PM +0100, Ingo Molnar wrote:
> > 
> > * Neil Horman <nhorman@tuxdriver.com> wrote:
> > 
> > > On Mon, Nov 02, 2009 at 07:51:37PM +0100, Ingo Molnar wrote:
> > > > 
> > > > * Neil Horman <nhorman@tuxdriver.com> wrote:
> > > > 
> > > > > > Have you ensured that no rlimit gets propagated during task init 
> > > > > > into some other value - under the previously correct assumption that 
> > > > > > rlimits dont change asynchronously under the feet of tasks?
> > > > > 
> > > > > I've looked, and the only place that I see the rlim array getting 
> > > > > copied is via copy_signal when we're in the clone path.  The 
> > > > > entire rlim array is copied from old task_struct to new 
> > > > > task_struct under the protection of the current->group_leader task 
> > > > > lock, which I also hold when updating via sys_setprlimit, so I 
> > > > > think we're safe in this case.
> > > > 
> > > > I mean - do we set up any data structure based on a particular 
> > > > rlimit, that can get out of sync with the rlimit being updated?
> > > > 
> > > > A prominent example would be the stack limit - we base address 
> > > > layout decisions on it. Check arch/x86/mm/mmap.c. RLIM_INFINITY has 
> > > > a special meaning plus we also set mmap_base() based on the rlim.
> > > 
> > > Ah, I didn't consider those.  Yes it looks like some locking might be 
> > > needed for cases like that.  what would you suggest, simply grabbing 
> > > the task lock before looking at the rlim array?  That seems a bit 
> > > heavy handed, especially if we want to use the locking consistently.  
> > > What if we just converted the int array of rlimit to atomic_t's?  
> > > Would that be sufficient, or still to heavy?
> 
> Just to provide a quick update on this, it appears that (unbeknowst to me), 
> Jiri Slaby got almost this exact same feature in via the linux-next tree:
> commits
> ba9ba971a9241250646091935d77d2f31b7c15af
> 4a4a4e5f51d866284db401ea4d8ba5f0c91cc1eb
> c1b9b7eaf7386a7f142d59a2bb433ac8217b0ad1
> 
> It still likely needs an audit to make sure theres no race with task 
> access on the rlimit array, but it doesn't currently require 
> additional security checks because the only access for a process to 
> another processes limits is by writing to the /proc/<pid>/limits file, 
> as I had initial proposed.  I think theres still value in the 
> sysscall, so I'll keep going with that aspect, but the rest of the 
> work appears done.

(Cc:-ed Jiri)

Jiri, i think your patches are incomplete for the same reasons i 
outlined to Neil.

Also, the locking there looks messy:

+       /* optimization: 'current' doesn't need locking, e.g. setrlimit */
+       if (tsk != current) {
+               /* protect tsk->signal and tsk->sighand from disappearing */
+               read_lock(&tasklist_lock);
+               if (!tsk->sighand) {
+                       retval = -ESRCH;
+                       goto out;
+               }
        }

Neil's splitup into a helper function looks _far_ cleaner.

I'm also wondering, how did these commits get into linux-next? It 
appears that that the 'writable_limits' tree got added by sfr to 
linux-next on Oct 26 just based on Jiri's request, without acks/review 
from the people generally involved with this code.

Stephen, this is the Nth incident of linux-next merging random new 
feature trees on its own, without apparently having pinged/Cc:-ed the 
maintainers/developers involved and without you having thought through 
the stuff you merge. (Perfmon was perhaps the worst incident, about a 
year ago - but there's been other cases as well since then.)

As things stand now you are treating linux-next as your own tree in 
essence, merging/unmerging trees to your own desire, allowing 
unreviewed/unacked commits into linux-next - which is fine but then 
please lets not call it the 'next Linux' but sfr-next or so ...

Btw., this is not against Jiri's tree - i think out of Jiri's and Neil's 
patches a nice rlimits feature could be done for 2.6.33 - but IMHO this 
chaotic (non-)quality merge process of linux-next cannot go on like this 
...

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-06  9:26                     ` Ingo Molnar
@ 2009-11-06 10:00                       ` Jiri Slaby
  2009-11-08 10:36                         ` Ingo Molnar
  2009-11-09  8:54                       ` Jiri Slaby
  1 sibling, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-06 10:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/06/2009 10:26 AM, Ingo Molnar wrote:
> Jiri, i think your patches are incomplete for the same reasons i 
> outlined to Neil.

I'll examine that. Thanks for pointing out.

> Also, the locking there looks messy:
> 
> +       /* optimization: 'current' doesn't need locking, e.g. setrlimit */
> +       if (tsk != current) {
> +               /* protect tsk->signal and tsk->sighand from disappearing */
> +               read_lock(&tasklist_lock);
> +               if (!tsk->sighand) {
> +                       retval = -ESRCH;
> +                       goto out;
> +               }
>         }
> 
> Neil's splitup into a helper function looks _far_ cleaner.

Then, I think, we should join our efforts.

> I'm also wondering, how did these commits get into linux-next? It 
> appears that that the 'writable_limits' tree got added by sfr to 
> linux-next on Oct 26 just based on Jiri's request, without acks/review 
> from the people generally involved with this code.

I posted the patches three times. The first, we discussed with Oleg
Nesterov the whole thing (with you in CC btw) and I resent changed code
(v2) based on Oleg's input. Then, after a month and a half I reposted
whole patchset simply because nobody cared/commented. Waited another 10
days and got pissed off (that I'm ignored for no obvious reason) so that
I asked Stephen (publicly) to include it in the -next. He did, I
wouldn't say it's all his fault. I must add that selinux security guys
cooperated with me on the first patches.

I hoped for anybody's raised voice: nobody's :(. Is there anything I did
wrong? Who are the people to get an ACK from in this case?

Thanks.

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-06 10:00                       ` Jiri Slaby
@ 2009-11-08 10:36                         ` Ingo Molnar
  2009-11-09  0:10                           ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Ingo Molnar @ 2009-11-08 10:36 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds


* Jiri Slaby <jirislaby@gmail.com> wrote:

> On 11/06/2009 10:26 AM, Ingo Molnar wrote:
> > Jiri, i think your patches are incomplete for the same reasons i 
> > outlined to Neil.
> 
> I'll examine that. Thanks for pointing out.
> 
> > Also, the locking there looks messy:
> > 
> > +       /* optimization: 'current' doesn't need locking, e.g. setrlimit */
> > +       if (tsk != current) {
> > +               /* protect tsk->signal and tsk->sighand from disappearing */
> > +               read_lock(&tasklist_lock);
> > +               if (!tsk->sighand) {
> > +                       retval = -ESRCH;
> > +                       goto out;
> > +               }
> >         }
> > 
> > Neil's splitup into a helper function looks _far_ cleaner.
> 
> Then, I think, we should join our efforts.

i think your commits could be enhanced to include Neil's splitup (and 
keeping your write extension for /proc/*/limits), and the new syscall 
(with a security check), hm?

Without dropping your current commits - they already have testing value.

> > I'm also wondering, how did these commits get into linux-next? It 
> > appears that that the 'writable_limits' tree got added by sfr to 
> > linux-next on Oct 26 just based on Jiri's request, without 
> > acks/review from the people generally involved with this code.
> 
> I posted the patches three times. The first, we discussed with Oleg 
> Nesterov the whole thing (with you in CC btw) and I resent changed 
> code (v2) based on Oleg's input. Then, after a month and a half I 
> reposted whole patchset simply because nobody cared/commented. Waited 
> another 10 days and got pissed off (that I'm ignored for no obvious 
> reason) so that I asked Stephen (publicly) to include it in the -next. 
> He did, I wouldn't say it's all his fault. I must add that selinux 
> security guys cooperated with me on the first patches.
> 
> I hoped for anybody's raised voice: nobody's :(. Is there anything I 
> did wrong? Who are the people to get an ACK from in this case?

Nah, it's just me grumbling about the sieve that our review process is 
;-) This command could be useful in the future for constructing Cc: 
lines:

  scripts/get_maintainer.pl -f kernel/sys.c

that's all.

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-08 10:36                         ` Ingo Molnar
@ 2009-11-09  0:10                           ` Neil Horman
  2009-11-09  8:32                             ` Jiri Slaby
  0 siblings, 1 reply; 107+ messages in thread
From: Neil Horman @ 2009-11-09  0:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jiri Slaby, Stephen Rothwell, linux-kernel, akpm, marcin.slusarz,
	tglx, mingo, hpa, Linus Torvalds

On Sun, Nov 08, 2009 at 11:36:29AM +0100, Ingo Molnar wrote:
> 
> * Jiri Slaby <jirislaby@gmail.com> wrote:
> 
> > On 11/06/2009 10:26 AM, Ingo Molnar wrote:
> > > Jiri, i think your patches are incomplete for the same reasons i 
> > > outlined to Neil.
> > 
> > I'll examine that. Thanks for pointing out.
> > 
> > > Also, the locking there looks messy:
> > > 
> > > +       /* optimization: 'current' doesn't need locking, e.g. setrlimit */
> > > +       if (tsk != current) {
> > > +               /* protect tsk->signal and tsk->sighand from disappearing */
> > > +               read_lock(&tasklist_lock);
> > > +               if (!tsk->sighand) {
> > > +                       retval = -ESRCH;
> > > +                       goto out;
> > > +               }
> > >         }
> > > 
> > > Neil's splitup into a helper function looks _far_ cleaner.
> > 
> > Then, I think, we should join our efforts.
> 
> i think your commits could be enhanced to include Neil's splitup (and 
> keeping your write extension for /proc/*/limits), and the new syscall 
> (with a security check), hm?
> 
> Without dropping your current commits - they already have testing value.
> 
That seems like a reasonable approach to me.  Jiri, would you like to do that or
shall I?  I'm happy to but it will take me a few days (I've got a bug I need to
focus on first).

Regards
Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09  0:10                           ` Neil Horman
@ 2009-11-09  8:32                             ` Jiri Slaby
  2009-11-09 13:34                               ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-09  8:32 UTC (permalink / raw)
  To: Neil Horman
  Cc: Ingo Molnar, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/09/2009 01:10 AM, Neil Horman wrote:
> That seems like a reasonable approach to me.  Jiri, would you like to do that or
> shall I?  I'm happy to but it will take me a few days (I've got a bug I need to
> focus on first).

Yup, I'll do that. v7 is the newest version you sent, right?

Thanks.

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-06  9:26                     ` Ingo Molnar
  2009-11-06 10:00                       ` Jiri Slaby
@ 2009-11-09  8:54                       ` Jiri Slaby
  2009-11-09  9:01                         ` Ingo Molnar
  1 sibling, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-09  8:54 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/06/2009 10:26 AM, Ingo Molnar wrote:
> Also, the locking there looks messy:
> 
> +       /* optimization: 'current' doesn't need locking, e.g. setrlimit */
> +       if (tsk != current) {
> +               /* protect tsk->signal and tsk->sighand from disappearing */
> +               read_lock(&tasklist_lock);
> +               if (!tsk->sighand) {
> +                       retval = -ESRCH;
> +                       goto out;
> +               }
>         }
> 
> Neil's splitup into a helper function looks _far_ cleaner.

Oops, I don't understand here. Looking at
http://patchwork.kernel.org/patch/54863/
and
http://decibel.fi.muni.cz/cgi-bin/gitweb.cgi?p=linux.git;a=commitdiff;h=ba9ba971a9241250646091935d77d2f31b7c15af

They are almost identical. That 'if' above is added by:
http://decibel.fi.muni.cz/cgi-bin/gitweb.cgi?p=linux.git;a=commitdiff;h=bab65b1e6c0fd797a1ecdb32911faa82947effd0

The latter commit is to avoid performance penalty introduced for
setrlimit syscall by the added lock (as it's unneeded for 'current'). If
you find that crappy, there is no problem to drop it, indeed.

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09  8:54                       ` Jiri Slaby
@ 2009-11-09  9:01                         ` Ingo Molnar
  2009-11-09  9:22                           ` Jiri Slaby
  2009-11-09 15:56                           ` Jiri Slaby
  0 siblings, 2 replies; 107+ messages in thread
From: Ingo Molnar @ 2009-11-09  9:01 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds


* Jiri Slaby <jirislaby@gmail.com> wrote:

> On 11/06/2009 10:26 AM, Ingo Molnar wrote:
> > Also, the locking there looks messy:
> > 
> > +       /* optimization: 'current' doesn't need locking, e.g. setrlimit */
> > +       if (tsk != current) {
> > +               /* protect tsk->signal and tsk->sighand from disappearing */
> > +               read_lock(&tasklist_lock);
> > +               if (!tsk->sighand) {
> > +                       retval = -ESRCH;
> > +                       goto out;
> > +               }
> >         }
> > 
> > Neil's splitup into a helper function looks _far_ cleaner.
> 
> Oops, I don't understand here. Looking at
> http://patchwork.kernel.org/patch/54863/
> and
> http://decibel.fi.muni.cz/cgi-bin/gitweb.cgi?p=linux.git;a=commitdiff;h=ba9ba971a9241250646091935d77d2f31b7c15af
> 
> They are almost identical. That 'if' above is added by:
> http://decibel.fi.muni.cz/cgi-bin/gitweb.cgi?p=linux.git;a=commitdiff;h=bab65b1e6c0fd797a1ecdb32911faa82947effd0
> 
> The latter commit is to avoid performance penalty introduced for 
> setrlimit syscall by the added lock (as it's unneeded for 'current'). 
> If you find that crappy, there is no problem to drop it, indeed.

Looks a bit ugly but i agree that it's a real speedup as before this 
change we never had to take the tasklist lock for current task rlimit 
setting. (which was the only method possible.)

So i guess renaming setrlimit to do_setrlimit and adding the syscall 
from Neil's patch should bring the two series into sync, right?

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09  9:01                         ` Ingo Molnar
@ 2009-11-09  9:22                           ` Jiri Slaby
  2009-11-09  9:26                             ` Ingo Molnar
  2009-11-09 15:56                           ` Jiri Slaby
  1 sibling, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-09  9:22 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/09/2009 10:01 AM, Ingo Molnar wrote:
> So i guess renaming setrlimit to do_setrlimit and adding the syscall 
> from Neil's patch should bring the two series into sync, right?

Looks like that. I have a nit, 32-bit programs on 64-bit x86 won't work.
I think we should add compat handlers for ia32 emulation too?

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09  9:22                           ` Jiri Slaby
@ 2009-11-09  9:26                             ` Ingo Molnar
  2009-11-09 13:35                               ` Neil Horman
  0 siblings, 1 reply; 107+ messages in thread
From: Ingo Molnar @ 2009-11-09  9:26 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds


* Jiri Slaby <jirislaby@gmail.com> wrote:

> On 11/09/2009 10:01 AM, Ingo Molnar wrote:
> > So i guess renaming setrlimit to do_setrlimit and adding the syscall 
> > from Neil's patch should bring the two series into sync, right?
> 
> Looks like that. I have a nit, 32-bit programs on 64-bit x86 won't 
> work. I think we should add compat handlers for ia32 emulation too?

Yeah.

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09  8:32                             ` Jiri Slaby
@ 2009-11-09 13:34                               ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-11-09 13:34 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Ingo Molnar, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On Mon, Nov 09, 2009 at 09:32:22AM +0100, Jiri Slaby wrote:
> On 11/09/2009 01:10 AM, Neil Horman wrote:
> > That seems like a reasonable approach to me.  Jiri, would you like to do that or
> > shall I?  I'm happy to but it will take me a few days (I've got a bug I need to
> > focus on first).
> 
> Yup, I'll do that. v7 is the newest version you sent, right?
> 
Correct, thanks!
Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09  9:26                             ` Ingo Molnar
@ 2009-11-09 13:35                               ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-11-09 13:35 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jiri Slaby, Stephen Rothwell, linux-kernel, akpm, marcin.slusarz,
	tglx, mingo, hpa, Linus Torvalds

On Mon, Nov 09, 2009 at 10:26:29AM +0100, Ingo Molnar wrote:
> 
> * Jiri Slaby <jirislaby@gmail.com> wrote:
> 
> > On 11/09/2009 10:01 AM, Ingo Molnar wrote:
> > > So i guess renaming setrlimit to do_setrlimit and adding the syscall 
> > > from Neil's patch should bring the two series into sync, right?
> > 
> > Looks like that. I have a nit, 32-bit programs on 64-bit x86 won't 
> > work. I think we should add compat handlers for ia32 emulation too?
> 
> Yeah.
> 
> 	Ingo
> 

Concur.
Neil


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09  9:01                         ` Ingo Molnar
  2009-11-09  9:22                           ` Jiri Slaby
@ 2009-11-09 15:56                           ` Jiri Slaby
  2009-11-09 16:40                             ` Oleg Nesterov
  1 sibling, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-09 15:56 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds, Oleg Nesterov

On 11/09/2009 10:01 AM, Ingo Molnar wrote:
> So i guess renaming setrlimit to do_setrlimit and adding the syscall 
> from Neil's patch should bring the two series into sync, right?

Actually, not really. It can't work for few reasons (below) now.

+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	unsigned long flags;
+	struct task_struct *tsk;
+	struct pid *ppid;
+	int retval = -EINVAL;

(here should be some sort of security checking as spotted by Ingo
already). And I would move the out-of-bounds resource check here as well
to reduce the fail path handling.

+	ppid = find_get_pid(pid);
+	if (!ppid)
+		goto out;
+
+	tsk = get_pid_task(ppid, PIDTYPE_PID);
+
+	if (!tsk)
+		goto out_put_pid;
+
+	if (resource >= RLIM_NLIMITS)
+		goto out_put_all;
+
+	retval = -EBUSY;
+	if (!lock_task_sighand(tsk, &flags))

X task_lock below cannot nest inside sighand (according to Oleg)
X ->sighand/signal might be NULL here (and below) AFAICT
So we need tasklist_lock for reading and check sighand != NULL.

+		goto out_put_all;
+
+	else {
+		struct rlimit val;
+
+		task_lock(tsk->group_leader);
+		val = current->signal->rlim[resource];

Well, you meant tsk->signal->rlim[resource] :).

+		task_unlock(tsk->group_leader);
+		retval = copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
+	}

If I'm totally overlooking something, please let me know, otherwise I'll
fix that in the way I wrote above. (The same holds for setprlimit.)

Thanks.

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09 15:56                           ` Jiri Slaby
@ 2009-11-09 16:40                             ` Oleg Nesterov
  2009-11-09 17:15                               ` Jiri Slaby
  0 siblings, 1 reply; 107+ messages in thread
From: Oleg Nesterov @ 2009-11-09 16:40 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Ingo Molnar, Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/09, Jiri Slaby wrote:
>
> +	ppid = find_get_pid(pid);
> +	if (!ppid)
> +		goto out;
> +
> +	tsk = get_pid_task(ppid, PIDTYPE_PID);
> +
> +	if (!tsk)
> +		goto out_put_pid;
> +
> +	if (resource >= RLIM_NLIMITS)
> +		goto out_put_all;
> +
> +	retval = -EBUSY;
> +	if (!lock_task_sighand(tsk, &flags))
>
> X task_lock below cannot nest inside sighand (according to Oleg)

Yes, this is deadlockable.

Also, I don't understand why wthis code get's both pid and task_struct().

And the "if (resource >= RLIM_NLIMITS)" check is racy afaics, see
http://marc.info/?l=linux-kernel&m=125200862124872

Oleg.


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09 16:40                             ` Oleg Nesterov
@ 2009-11-09 17:15                               ` Jiri Slaby
  2009-11-09 17:26                                 ` Linus Torvalds
  2009-11-09 17:36                                 ` Oleg Nesterov
  0 siblings, 2 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-09 17:15 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ingo Molnar, Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/09/2009 05:40 PM, Oleg Nesterov wrote:
> Also, I don't understand why wthis code get's both pid and task_struct().

And what do you suggest? I, with my knowledge and after fast
investigation, see no other option.

> And the "if (resource >= RLIM_NLIMITS)" check is racy afaics, see
> http://marc.info/?l=linux-kernel&m=125200862124872

Your patch (which I have in my series btw) is likely needed for the
setprlimit syscall, having this on my mind again from now on. But the
'if' above is a different story. 'resource' is an index here.

And as a bonus, what I found out now is that /proc/*/limits
(proc_info_read->proc_pid_limits) doesn't necessarily reflect current
limits. Since task_lock(current->group_leader) is not held, values of
one limit may be from the old as well as the currently updated one.

Am I right and do we care at all (since it's not atomic anyway in the
sense of reading 2 small chunks from that file)?

Thanks for the input.

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09 17:15                               ` Jiri Slaby
@ 2009-11-09 17:26                                 ` Linus Torvalds
  2009-11-09 17:36                                 ` Oleg Nesterov
  1 sibling, 0 replies; 107+ messages in thread
From: Linus Torvalds @ 2009-11-09 17:26 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Oleg Nesterov, Ingo Molnar, Neil Horman, Stephen Rothwell,
	linux-kernel, akpm, marcin.slusarz, tglx, mingo, hpa



On Mon, 9 Nov 2009, Jiri Slaby wrote:

> On 11/09/2009 05:40 PM, Oleg Nesterov wrote:
> > Also, I don't understand why wthis code get's both pid and task_struct().
> 
> And what do you suggest? I, with my knowledge and after fast
> investigation, see no other option.

Isn't

	task = find_task_by_vpid(pid);

what you want?

		Linus

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09 17:15                               ` Jiri Slaby
  2009-11-09 17:26                                 ` Linus Torvalds
@ 2009-11-09 17:36                                 ` Oleg Nesterov
  2009-11-18 14:51                                   ` Jiri Slaby
  1 sibling, 1 reply; 107+ messages in thread
From: Oleg Nesterov @ 2009-11-09 17:36 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Ingo Molnar, Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/09, Jiri Slaby wrote:
>
> On 11/09/2009 05:40 PM, Oleg Nesterov wrote:
> > Also, I don't understand why wthis code get's both pid and task_struct().
>
> And what do you suggest?

Well, I didn't see this patch, except the part quoted in your email...
(btw, thanks for ccing me).

We can call find_task_by_vpid() under rcu and do get_task_struct().
Or, given that we need tasklist anyway we can do find_task_by_vpid()
under tasklist and do not get() at all.

> > And the "if (resource >= RLIM_NLIMITS)" check is racy afaics, see
> > http://marc.info/?l=linux-kernel&m=125200862124872
>
> Your patch (which I have in my series btw) is likely needed for the
> setprlimit syscall, having this on my mind again from now on. But the
> 'if' above is a different story. 'resource' is an index here.

Yes, thanks, I misread this check ;)

> And as a bonus, what I found out now is that /proc/*/limits
> (proc_info_read->proc_pid_limits) doesn't necessarily reflect current
> limits. Since task_lock(current->group_leader) is not held, values of
> one limit may be from the old as well as the currently updated one.
>
> Am I right and do we care at all

I don't know, but personally I think we don't care.

Oleg.


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-09 17:36                                 ` Oleg Nesterov
@ 2009-11-18 14:51                                   ` Jiri Slaby
  2009-11-18 14:51                                     ` [PATCH 01/16] core: posix-cpu-timers, cleanup rlimits usage Jiri Slaby
                                                       ` (16 more replies)
  0 siblings, 17 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ingo Molnar, Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/09/2009 06:36 PM, Oleg Nesterov wrote:
> We can call find_task_by_vpid() under rcu and do get_task_struct().
> Or, given that we need tasklist anyway we can do find_task_by_vpid()
> under tasklist and do not get() at all.

Thanks. I'll post patches as a reply to this email. They are on the top
of the previous series which were already in -next. They're here (on the
same thread level):
http://lkml.org/lkml/2009/9/3/296

All patches available as:
git://decibel.fi.muni.cz/~xslaby/linux writable_limits-testing
or on web:
http://decibel.fi.muni.cz/cgi-bin/gitweb.cgi?p=linux.git;a=shortlog;h=refs/heads/writable_limits-testing

If somebody would like to pick them up, I have also a -next based
version (fanotify+recvmsg syscalls collision).

What I did with the Neil's patches:
- changed syscall bodies according to comments and
- added entries to syscall_table_32.S
- implemented compat syscalls
- cleaned up rlim usage in posix cpu timers. Note that cpu and
  rttime are not guaranteed (concurrent setrlimit set might
  be overwritten by the stores there).
- reviewed rlim usages all over the tree

On the last point: I added explicit ACCESS_ONCE all over there. If you
don't like I will trash it. The only remaining weird user is in
kernel/acct.c:
        /*
         * Accounting records are not subject to resource limits.
         */
        flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
        current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
        file->f_op->write(file, (char *)&ac,
                               sizeof(acct_t), &file->f_pos);
        current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;

It means that threads of the process with PACCT caps have unlimited file
size for a short while. If there is setrlimit in between, it gets wiped
out as well. I don't know what to do with that.

Please review.

thanks,
-- 
js
Faculty of Informatics, Masaryk University
Suse Labs, Novell

^ permalink raw reply	[flat|nested] 107+ messages in thread

* [PATCH 01/16] core: posix-cpu-timers, cleanup rlimits usage
  2009-11-18 14:51                                   ` Jiri Slaby
@ 2009-11-18 14:51                                     ` Jiri Slaby
  2009-11-18 16:48                                       ` Peter Zijlstra
  2009-11-18 14:51                                     ` [PATCH 02/16] core: do security check under task_lock Jiri Slaby
                                                       ` (15 subsequent siblings)
  16 siblings, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	Peter Zijlstra

Fetch rlimit (both hard and soft) values only once and work on them.
It removes many accesses through sig structure and makes the code
cleaner.

Mostly a preparation for writable resource limits support.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 kernel/posix-cpu-timers.c |   32 +++++++++++++++++---------------
 1 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 102c345..a7dcce1 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -983,6 +983,7 @@ static void check_thread_timers(struct task_struct *tsk,
 	int maxfire;
 	struct list_head *timers = tsk->cpu_timers;
 	struct signal_struct *const sig = tsk->signal;
+	unsigned long soft;
 
 	maxfire = 20;
 	tsk->cputime_expires.prof_exp = cputime_zero;
@@ -1031,9 +1032,9 @@ static void check_thread_timers(struct task_struct *tsk,
 	/*
 	 * Check for the special case thread timers.
 	 */
-	if (sig->rlim[RLIMIT_RTTIME].rlim_cur != RLIM_INFINITY) {
+	soft = sig->rlim[RLIMIT_RTTIME].rlim_cur;
+	if (soft != RLIM_INFINITY) {
 		unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max;
-		unsigned long *soft = &sig->rlim[RLIMIT_RTTIME].rlim_cur;
 
 		if (hard != RLIM_INFINITY &&
 		    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
@@ -1044,14 +1045,13 @@ static void check_thread_timers(struct task_struct *tsk,
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
-		if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) {
+		if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
 			/*
 			 * At the soft limit, send a SIGXCPU every second.
 			 */
-			if (sig->rlim[RLIMIT_RTTIME].rlim_cur
-			    < sig->rlim[RLIMIT_RTTIME].rlim_max) {
-				sig->rlim[RLIMIT_RTTIME].rlim_cur +=
-								USEC_PER_SEC;
+			if (soft < hard) {
+				soft += USEC_PER_SEC;
+				sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
 			}
 			printk(KERN_INFO
 				"RT Watchdog Timeout: %s[%d]\n",
@@ -1122,13 +1122,14 @@ static void check_process_timers(struct task_struct *tsk,
 	unsigned long long sum_sched_runtime, sched_expires;
 	struct list_head *timers = sig->cpu_timers;
 	struct task_cputime cputime;
+	unsigned long cpu_cur_lim = sig->rlim[RLIMIT_CPU].rlim_cur;
 
 	/*
 	 * Don't sample the current process CPU clocks if there are no timers.
 	 */
 	if (list_empty(&timers[CPUCLOCK_PROF]) &&
 	    cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
-	    sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
+	    cpu_cur_lim == RLIM_INFINITY &&
 	    list_empty(&timers[CPUCLOCK_VIRT]) &&
 	    cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
 	    list_empty(&timers[CPUCLOCK_SCHED])) {
@@ -1195,10 +1196,11 @@ static void check_process_timers(struct task_struct *tsk,
 	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
 			 SIGVTALRM);
 
-	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+	if (cpu_cur_lim != RLIM_INFINITY) {
 		unsigned long psecs = cputime_to_secs(ptime);
+		unsigned long hard = sig->rlim[RLIMIT_CPU].rlim_max;
 		cputime_t x;
-		if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) {
+		if (psecs >= hard) {
 			/*
 			 * At the hard limit, we just die.
 			 * No need to calculate anything else now.
@@ -1206,17 +1208,17 @@ static void check_process_timers(struct task_struct *tsk,
 			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
 			return;
 		}
-		if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) {
+		if (psecs >= cpu_cur_lim) {
 			/*
 			 * At the soft limit, send a SIGXCPU every second.
 			 */
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
-			if (sig->rlim[RLIMIT_CPU].rlim_cur
-			    < sig->rlim[RLIMIT_CPU].rlim_max) {
-				sig->rlim[RLIMIT_CPU].rlim_cur++;
+			if (cpu_cur_lim < hard) {
+				cpu_cur_lim++;
+				sig->rlim[RLIMIT_CPU].rlim_cur = cpu_cur_lim;
 			}
 		}
-		x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+		x = secs_to_cputime(cpu_cur_lim);
 		if (cputime_eq(prof_expires, cputime_zero) ||
 		    cputime_lt(x, prof_expires)) {
 			prof_expires = x;
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 02/16] core: do security check under task_lock
  2009-11-18 14:51                                   ` Jiri Slaby
  2009-11-18 14:51                                     ` [PATCH 01/16] core: posix-cpu-timers, cleanup rlimits usage Jiri Slaby
@ 2009-11-18 14:51                                     ` Jiri Slaby
  2009-11-18 21:47                                       ` James Morris
  2009-11-18 14:51                                       ` Jiri Slaby
                                                       ` (14 subsequent siblings)
  16 siblings, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens

Do security_task_setrlimit under task_lock. Other tasks may
change limits under our hands while we are checking limits
inside the function. From now on, they can't.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 kernel/sys.c |   16 +++++++---------
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 605ab9c..0f86199 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1243,7 +1243,7 @@ int setrlimit(struct task_struct *tsk, unsigned int resource,
 		struct rlimit *new_rlim)
 {
 	struct rlimit *old_rlim;
-	int retval;
+	int retval = 0;
 
 	if (new_rlim->rlim_cur > new_rlim->rlim_max)
 		return -EINVAL;
@@ -1260,10 +1260,6 @@ int setrlimit(struct task_struct *tsk, unsigned int resource,
 		}
 	}
 
-	retval = security_task_setrlimit(tsk, resource, new_rlim);
-	if (retval)
-		goto out;
-
 	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
 		/*
 		 * The caller is asking for an immediate RLIMIT_CPU
@@ -1276,11 +1272,13 @@ int setrlimit(struct task_struct *tsk, unsigned int resource,
 
 	old_rlim = tsk->signal->rlim + resource;
 	task_lock(tsk->group_leader);
-	if ((new_rlim->rlim_max <= old_rlim->rlim_max) ||
-				capable(CAP_SYS_RESOURCE))
-		*old_rlim = *new_rlim;
-	else
+	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
+				!capable(CAP_SYS_RESOURCE))
 		retval = -EPERM;
+	if (!retval)
+		retval = security_task_setrlimit(tsk, resource, new_rlim);
+	if (!retval)
+		*old_rlim = *new_rlim;
 	task_unlock(tsk->group_leader);
 
 	if (retval || resource != RLIMIT_CPU)
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
@ 2009-11-18 14:51                                       ` Jiri Slaby
  2009-11-18 14:51                                     ` [PATCH 02/16] core: do security check under task_lock Jiri Slaby
                                                         ` (15 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	linux-ia64

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: linux-ia64@vger.kernel.org
---
 arch/ia64/kernel/perfmon.c  |    2 +-
 arch/ia64/kernel/sys_ia64.c |    2 +-
 arch/ia64/mm/init.c         |    3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f178270..91b8607 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2298,7 +2298,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
 	 * 	return -ENOMEM;
 	 */
-	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+	if (size > ACCESS_ONCE(task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur))
 		return -ENOMEM;
 
 	/*
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 92ed83f..6a2b5d9 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -129,7 +129,7 @@ ia64_brk (unsigned long brk)
 		goto out;
 
 	/* Check against rlimit.. */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	rlim = ACCESS_ONCE(current->signal->rlim[RLIMIT_DATA].rlim_cur);
 	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
 		goto out;
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1857766..fe6d63f 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -91,7 +91,8 @@ dma_mark_clean(void *addr, size_t size)
 inline void
 ia64_set_rbs_bot (void)
 {
-	unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16;
+	unsigned long stack_size = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_STACK].rlim_max) & -16;
 
 	if (stack_size > MAX_USER_STACK_SIZE)
 		stack_size = MAX_USER_STACK_SIZE;
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits
@ 2009-11-18 14:51                                       ` Jiri Slaby
  0 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	linux-ia64

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: linux-ia64@vger.kernel.org
---
 arch/ia64/kernel/perfmon.c  |    2 +-
 arch/ia64/kernel/sys_ia64.c |    2 +-
 arch/ia64/mm/init.c         |    3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f178270..91b8607 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2298,7 +2298,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
 	 * 	return -ENOMEM;
 	 */
-	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+	if (size > ACCESS_ONCE(task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur))
 		return -ENOMEM;
 
 	/*
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 92ed83f..6a2b5d9 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -129,7 +129,7 @@ ia64_brk (unsigned long brk)
 		goto out;
 
 	/* Check against rlimit.. */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	rlim = ACCESS_ONCE(current->signal->rlim[RLIMIT_DATA].rlim_cur);
 	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
 		goto out;
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1857766..fe6d63f 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -91,7 +91,8 @@ dma_mark_clean(void *addr, size_t size)
 inline void
 ia64_set_rbs_bot (void)
 {
-	unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16;
+	unsigned long stack_size = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_STACK].rlim_max) & -16;
 
 	if (stack_size > MAX_USER_STACK_SIZE)
 		stack_size = MAX_USER_STACK_SIZE;
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 04/16] PPC: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
@ 2009-11-18 14:51                                       ` Jiri Slaby
  2009-11-18 14:51                                     ` [PATCH 02/16] core: do security check under task_lock Jiri Slaby
                                                         ` (15 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	Benjamin Herrenschmidt, Paul Mackerras, linuxppc-dev

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@ozlabs.org
---
 arch/powerpc/mm/mmap_64.c                    |    6 ++++--
 arch/powerpc/platforms/cell/spufs/coredump.c |    3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c
index 0d957a4..e96a5f6 100644
--- a/arch/powerpc/mm/mmap_64.c
+++ b/arch/powerpc/mm/mmap_64.c
@@ -47,7 +47,8 @@ static inline int mmap_is_legacy(void)
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+	if (ACCESS_ONCE(current->signal->rlim[RLIMIT_STACK].rlim_cur) ==
+			RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
@@ -77,7 +78,8 @@ static unsigned long mmap_rnd(void)
 
 static inline unsigned long mmap_base(void)
 {
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+	unsigned long gap = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_STACK].rlim_cur);
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index c4d4a19..2abf290 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -54,7 +54,8 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
  */
 static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
 {
-	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+	unsigned long limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_CORE].rlim_cur);
 	ssize_t written;
 
 	if (*foffset + nr > limit)
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 04/16] PPC: use ACCESS_ONCE for rlimits
@ 2009-11-18 14:51                                       ` Jiri Slaby
  0 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: sfr, Jiri Slaby, nhorman, Heiko Carstens, linux-kernel,
	James Morris, linuxppc-dev, mingo, tglx, marcin.slusarz, hpa,
	Paul Mackerras, akpm, torvalds, mingo

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@ozlabs.org
---
 arch/powerpc/mm/mmap_64.c                    |    6 ++++--
 arch/powerpc/platforms/cell/spufs/coredump.c |    3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c
index 0d957a4..e96a5f6 100644
--- a/arch/powerpc/mm/mmap_64.c
+++ b/arch/powerpc/mm/mmap_64.c
@@ -47,7 +47,8 @@ static inline int mmap_is_legacy(void)
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+	if (ACCESS_ONCE(current->signal->rlim[RLIMIT_STACK].rlim_cur) ==
+			RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
@@ -77,7 +78,8 @@ static unsigned long mmap_rnd(void)
 
 static inline unsigned long mmap_base(void)
 {
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+	unsigned long gap = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_STACK].rlim_cur);
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index c4d4a19..2abf290 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -54,7 +54,8 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
  */
 static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
 {
-	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+	unsigned long limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_CORE].rlim_cur);
 	ssize_t written;
 
 	if (*foffset + nr > limit)
-- 
1.6.4.2

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 05/16] S390: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (3 preceding siblings ...)
  2009-11-18 14:51                                       ` Jiri Slaby
@ 2009-11-18 14:51                                     ` Jiri Slaby
  2009-11-18 14:51                                       ` Jiri Slaby
                                                       ` (11 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	Martin Schwidefsky, linux390, linux-s390

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux390@de.ibm.com
Cc: linux-s390@vger.kernel.org
---
 arch/s390/mm/mmap.c |    9 ++++++---
 1 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index f4558cc..90d3216 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -40,7 +40,8 @@
 
 static inline unsigned long mmap_base(void)
 {
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+	unsigned long gap = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_STACK].rlim_cur);
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
@@ -59,9 +60,11 @@ static inline int mmap_is_legacy(void)
 	if (!is_compat_task())
 		return 1;
 #endif
+	if (ACCESS_ONCE(current->signal->rlim[RLIMIT_STACK].rlim_cur) ==
+			RLIM_INFINITY)
+		return 1;
 	return sysctl_legacy_va_layout ||
-	    (current->personality & ADDR_COMPAT_LAYOUT) ||
-	    current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY;
+	    (current->personality & ADDR_COMPAT_LAYOUT);
 }
 
 #ifndef CONFIG_64BIT
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 06/16] SPARC: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
@ 2009-11-18 14:51                                       ` Jiri Slaby
  2009-11-18 14:51                                     ` [PATCH 02/16] core: do security check under task_lock Jiri Slaby
                                                         ` (15 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	David S. Miller, sparclinux

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org
---
 arch/sparc/kernel/sys_sparc_64.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index e2d1024..004ed47 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -361,6 +361,7 @@ EXPORT_SYMBOL(get_fb_unmapped_area);
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	unsigned long random_factor = 0UL;
+	unsigned long gap;
 
 	if (current->flags & PF_RANDOMIZE) {
 		random_factor = get_random_int();
@@ -375,9 +376,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
+	gap = ACCESS_ONCE(current->signal->rlim[RLIMIT_STACK].rlim_cur);
 	if (!test_thread_flag(TIF_32BIT) ||
 	    (current->personality & ADDR_COMPAT_LAYOUT) ||
-	    current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY ||
+	    gap == RLIM_INFINITY ||
 	    sysctl_legacy_va_layout) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
@@ -385,9 +387,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	} else {
 		/* We know it's 32-bit */
 		unsigned long task_size = STACK_TOP32;
-		unsigned long gap;
 
-		gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
 		if (gap < 128 * 1024 * 1024)
 			gap = 128 * 1024 * 1024;
 		if (gap > (task_size / 6 * 5))
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 06/16] SPARC: use ACCESS_ONCE for rlimits
@ 2009-11-18 14:51                                       ` Jiri Slaby
  0 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	David S. Miller, sparclinux

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org
---
 arch/sparc/kernel/sys_sparc_64.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index e2d1024..004ed47 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -361,6 +361,7 @@ EXPORT_SYMBOL(get_fb_unmapped_area);
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	unsigned long random_factor = 0UL;
+	unsigned long gap;
 
 	if (current->flags & PF_RANDOMIZE) {
 		random_factor = get_random_int();
@@ -375,9 +376,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
+	gap = ACCESS_ONCE(current->signal->rlim[RLIMIT_STACK].rlim_cur);
 	if (!test_thread_flag(TIF_32BIT) ||
 	    (current->personality & ADDR_COMPAT_LAYOUT) ||
-	    current->signal->rlim[RLIMIT_STACK].rlim_cur = RLIM_INFINITY ||
+	    gap = RLIM_INFINITY ||
 	    sysctl_legacy_va_layout) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
@@ -385,9 +387,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	} else {
 		/* We know it's 32-bit */
 		unsigned long task_size = STACK_TOP32;
-		unsigned long gap;
 
-		gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
 		if (gap < 128 * 1024 * 1024)
 			gap = 128 * 1024 * 1024;
 		if (gap > (task_size / 6 * 5))
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 07/16] X86: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (5 preceding siblings ...)
  2009-11-18 14:51                                       ` Jiri Slaby
@ 2009-11-18 14:51                                     ` Jiri Slaby
  2009-11-18 14:51                                     ` [PATCH 08/16] FS: " Jiri Slaby
                                                       ` (9 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	x86

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
---
 arch/x86/ia32/ia32_aout.c |    2 +-
 arch/x86/mm/mmap.c        |    6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 2a4d073..239071b 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -297,7 +297,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	 * size limits imposed on them by creating programs with large
 	 * arrays in the data or bss.
 	 */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	rlim = ACCESS_ONCE(current->signal->rlim[RLIMIT_DATA].rlim_cur);
 	if (rlim >= RLIM_INFINITY)
 		rlim = ~0;
 	if (ex.a_data + ex.a_bss > rlim)
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c8191de..5578c84 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,7 +71,8 @@ static int mmap_is_legacy(void)
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)
+	if (ACCESS_ONCE(current->signal->rlim[RLIMIT_STACK].rlim_cur)
+			== RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
@@ -96,7 +97,8 @@ static unsigned long mmap_rnd(void)
 
 static unsigned long mmap_base(void)
 {
-	unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+	unsigned long gap =
+		ACCESS_ONCE(current->signal->rlim[RLIMIT_STACK].rlim_cur);
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 08/16] FS: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (6 preceding siblings ...)
  2009-11-18 14:51                                     ` [PATCH 07/16] X86: " Jiri Slaby
@ 2009-11-18 14:51                                     ` Jiri Slaby
  2009-11-18 14:51                                       ` Jiri Slaby
                                                       ` (8 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	Alexander Viro, linux-fsdevel

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
---
 fs/attr.c        |    3 ++-
 fs/binfmt_aout.c |    2 +-
 fs/binfmt_flat.c |    2 +-
 fs/exec.c        |   10 ++++++----
 fs/fcntl.c       |    3 ++-
 fs/file.c        |    2 +-
 fs/proc/array.c  |    4 ++--
 fs/select.c      |    2 +-
 8 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/fs/attr.c b/fs/attr.c
index 96d394b..4ac22be 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -82,7 +82,8 @@ int inode_newsize_ok(const struct inode *inode, loff_t offset)
 	if (inode->i_size < offset) {
 		unsigned long limit;
 
-		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+		limit = ACCESS_ONCE(current->signal->rlim[RLIMIT_FSIZE].
+				rlim_cur);
 		if (limit != RLIM_INFINITY && offset > limit)
 			goto out_sig;
 		if (offset > inode->i_sb->s_maxbytes)
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf..331e78e 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -246,7 +246,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	 * size limits imposed on them by creating programs with large
 	 * arrays in the data or bss.
 	 */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	rlim = ACCESS_ONCE(current->signal->rlim[RLIMIT_DATA].rlim_cur);
 	if (rlim >= RLIM_INFINITY)
 		rlim = ~0;
 	if (ex.a_data + ex.a_bss > rlim)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a279665..1b6e96b 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -501,7 +501,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 	 * size limits imposed on them by creating programs with large
 	 * arrays in the data or bss.
 	 */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	rlim = ACCESS_ONCE(current->signal->rlim[RLIMIT_DATA].rlim_cur);
 	if (rlim >= RLIM_INFINITY)
 		rlim = ~0;
 	if (data_len + bss_len > rlim) {
diff --git a/fs/exec.c b/fs/exec.c
index ba112bd..3f89090 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -196,7 +196,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		 *    to work from.
 		 */
 		rlim = current->signal->rlim;
-		if (size > rlim[RLIMIT_STACK].rlim_cur / 4) {
+		if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
 			put_page(page);
 			return NULL;
 		}
@@ -575,7 +575,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 
 #ifdef CONFIG_STACK_GROWSUP
 	/* Limit stack size to 1GB */
-	stack_base = current->signal->rlim[RLIMIT_STACK].rlim_max;
+	stack_base = ACCESS_ONCE(current->signal->rlim[RLIMIT_STACK].rlim_max);
 	if (stack_base > (1 << 30))
 		stack_base = 1 << 30;
 
@@ -1503,7 +1503,8 @@ static int format_corename(char *corename, long signr)
 			/* core limit size */
 			case 'c':
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%lu", current->signal->rlim[RLIMIT_CORE].rlim_cur);
+					"%lu", ACCESS_ONCE(current->signal->
+						rlim[RLIMIT_CORE].rlim_cur));
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1762,7 +1763,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	int retval = 0;
 	int flag = 0;
 	int ispipe = 0;
-	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+	unsigned long core_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_CORE].rlim_cur);
 	char **helper_argv = NULL;
 	int helper_argc = 0;
 	int dump_count = 0;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index fc089f2..f03acb5 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -344,7 +344,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	switch (cmd) {
 	case F_DUPFD:
 	case F_DUPFD_CLOEXEC:
-		if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+		if (arg >= ACCESS_ONCE(current->signal->rlim[RLIMIT_NOFILE].
+					rlim_cur))
 			break;
 		err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
 		if (err >= 0) {
diff --git a/fs/file.c b/fs/file.c
index 87e1290..a57fe40 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -257,7 +257,7 @@ int expand_files(struct files_struct *files, int nr)
 	 * N.B. For clone tasks sharing a files structure, this test
 	 * will limit the total number of files that can be opened.
 	 */
-	if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+	if (nr >= ACCESS_ONCE(current->signal->rlim[RLIMIT_NOFILE].rlim_cur))
 		return -EMFILE;
 
 	/* Do we need to expand? */
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 07f77a7..a959763 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -266,7 +266,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = atomic_read(&p->signal->count);
 		qsize = atomic_read(&__task_cred(p)->user->sigpending);
-		qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
+		qlim = ACCESS_ONCE(p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur);
 		unlock_task_sighand(p, &flags);
 	}
 
@@ -491,7 +491,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		cutime = sig->cutime;
 		cstime = sig->cstime;
 		cgtime = sig->cgtime;
-		rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
+		rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
 
 		/* add up live thread stats at the group level */
 		if (whole) {
diff --git a/fs/select.c b/fs/select.c
index fd38ce2..ac05132 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -821,7 +821,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
  	struct poll_list *walk = head;
  	unsigned long todo = nfds;
 
-	if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+	if (nfds > ACCESS_ONCE(current->signal->rlim[RLIMIT_NOFILE].rlim_cur))
 		return -EINVAL;
 
 	len = min_t(unsigned int, nfds, N_STACK_PPS);
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 09/16] MM: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
@ 2009-11-18 14:51                                       ` Jiri Slaby
  2009-11-18 14:51                                     ` [PATCH 02/16] core: do security check under task_lock Jiri Slaby
                                                         ` (15 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	linux-mm

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: linux-mm@kvack.org
---
 mm/filemap.c |    3 ++-
 mm/mlock.c   |   15 +++++++++------
 mm/mmap.c    |   16 ++++++++++------
 mm/mremap.c  |    3 ++-
 4 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index ef169f3..667e62e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1971,7 +1971,8 @@ EXPORT_SYMBOL(iov_iter_single_seg_count);
 inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk)
 {
 	struct inode *inode = file->f_mapping->host;
-	unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	unsigned long limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_FSIZE].rlim_cur);
 
         if (unlikely(*pos < 0))
                 return -EINVAL;
diff --git a/mm/mlock.c b/mm/mlock.c
index bd6f0e4..9fcd392 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -25,7 +25,7 @@ int can_do_mlock(void)
 {
 	if (capable(CAP_IPC_LOCK))
 		return 1;
-	if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
+	if (ACCESS_ONCE(current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) != 0)
 		return 1;
 	return 0;
 }
@@ -490,7 +490,8 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
 	locked = len >> PAGE_SHIFT;
 	locked += current->mm->locked_vm;
 
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur);
 	lock_limit >>= PAGE_SHIFT;
 
 	/* check against resource limits */
@@ -553,7 +554,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
 
 	down_write(&current->mm->mmap_sem);
 
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit = ACCESS_ONCE(current->signal->rlim[RLIMIT_MEMLOCK].
+			rlim_cur);
 	lock_limit >>= PAGE_SHIFT;
 
 	ret = -ENOMEM;
@@ -587,7 +589,8 @@ int user_shm_lock(size_t size, struct user_struct *user)
 	int allowed = 0;
 
 	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur);
 	if (lock_limit == RLIM_INFINITY)
 		allowed = 1;
 	lock_limit >>= PAGE_SHIFT;
@@ -621,12 +624,12 @@ int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
 
 	down_write(&mm->mmap_sem);
 
-	lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	lim = ACCESS_ONCE(rlim[RLIMIT_AS].rlim_cur) >> PAGE_SHIFT;
 	vm   = mm->total_vm + pgsz;
 	if (lim < vm)
 		goto out;
 
-	lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	lim = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
 	vm   = mm->locked_vm + pgsz;
 	if (lim < vm)
 		goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index 73f5e4b..5017ed5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -266,7 +266,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	 * segment grow beyond its set limit the in case where the limit is
 	 * not page aligned -Ram Gupta
 	 */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	rlim = ACCESS_ONCE(current->signal->rlim[RLIMIT_DATA].rlim_cur);
 	if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
 			(mm->end_data - mm->start_data) > rlim)
 		goto out;
@@ -990,7 +990,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 		unsigned long locked, lock_limit;
 		locked = len >> PAGE_SHIFT;
 		locked += mm->locked_vm;
-		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+		lock_limit = ACCESS_ONCE(current->signal->
+				rlim[RLIMIT_MEMLOCK].rlim_cur);
 		lock_limit >>= PAGE_SHIFT;
 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
 			return -EAGAIN;
@@ -1565,7 +1566,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 		return -ENOMEM;
 
 	/* Stack limit test */
-	if (size > rlim[RLIMIT_STACK].rlim_cur)
+	if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
 		return -ENOMEM;
 
 	/* mlock limit tests */
@@ -1573,7 +1574,8 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 		unsigned long locked;
 		unsigned long limit;
 		locked = mm->locked_vm + grow;
-		limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+		limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
+		limit >>= PAGE_SHIFT;
 		if (locked > limit && !capable(CAP_IPC_LOCK))
 			return -ENOMEM;
 	}
@@ -2026,7 +2028,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 		unsigned long locked, lock_limit;
 		locked = len >> PAGE_SHIFT;
 		locked += mm->locked_vm;
-		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+		lock_limit = ACCESS_ONCE(current->signal->
+				rlim[RLIMIT_MEMLOCK].rlim_cur);
 		lock_limit >>= PAGE_SHIFT;
 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
 			return -EAGAIN;
@@ -2240,7 +2243,8 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
 	unsigned long cur = mm->total_vm;	/* pages */
 	unsigned long lim;
 
-	lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	lim = ACCESS_ONCE(current->signal->rlim[RLIMIT_AS].rlim_cur);
+	lim >>= PAGE_SHIFT;
 
 	if (cur + npages > lim)
 		return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 97bff25..809641b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -358,7 +358,8 @@ unsigned long do_mremap(unsigned long addr,
 	if (vma->vm_flags & VM_LOCKED) {
 		unsigned long locked, lock_limit;
 		locked = mm->locked_vm << PAGE_SHIFT;
-		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+		lock_limit = ACCESS_ONCE(current->signal->
+				rlim[RLIMIT_MEMLOCK].rlim_cur);
 		locked += new_len - old_len;
 		ret = -EAGAIN;
 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 09/16] MM: use ACCESS_ONCE for rlimits
@ 2009-11-18 14:51                                       ` Jiri Slaby
  0 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	linux-mm

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: linux-mm@kvack.org
---
 mm/filemap.c |    3 ++-
 mm/mlock.c   |   15 +++++++++------
 mm/mmap.c    |   16 ++++++++++------
 mm/mremap.c  |    3 ++-
 4 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index ef169f3..667e62e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1971,7 +1971,8 @@ EXPORT_SYMBOL(iov_iter_single_seg_count);
 inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk)
 {
 	struct inode *inode = file->f_mapping->host;
-	unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	unsigned long limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_FSIZE].rlim_cur);
 
         if (unlikely(*pos < 0))
                 return -EINVAL;
diff --git a/mm/mlock.c b/mm/mlock.c
index bd6f0e4..9fcd392 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -25,7 +25,7 @@ int can_do_mlock(void)
 {
 	if (capable(CAP_IPC_LOCK))
 		return 1;
-	if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
+	if (ACCESS_ONCE(current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) != 0)
 		return 1;
 	return 0;
 }
@@ -490,7 +490,8 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
 	locked = len >> PAGE_SHIFT;
 	locked += current->mm->locked_vm;
 
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur);
 	lock_limit >>= PAGE_SHIFT;
 
 	/* check against resource limits */
@@ -553,7 +554,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
 
 	down_write(&current->mm->mmap_sem);
 
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit = ACCESS_ONCE(current->signal->rlim[RLIMIT_MEMLOCK].
+			rlim_cur);
 	lock_limit >>= PAGE_SHIFT;
 
 	ret = -ENOMEM;
@@ -587,7 +589,8 @@ int user_shm_lock(size_t size, struct user_struct *user)
 	int allowed = 0;
 
 	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur);
 	if (lock_limit == RLIM_INFINITY)
 		allowed = 1;
 	lock_limit >>= PAGE_SHIFT;
@@ -621,12 +624,12 @@ int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
 
 	down_write(&mm->mmap_sem);
 
-	lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	lim = ACCESS_ONCE(rlim[RLIMIT_AS].rlim_cur) >> PAGE_SHIFT;
 	vm   = mm->total_vm + pgsz;
 	if (lim < vm)
 		goto out;
 
-	lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	lim = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
 	vm   = mm->locked_vm + pgsz;
 	if (lim < vm)
 		goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index 73f5e4b..5017ed5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -266,7 +266,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	 * segment grow beyond its set limit the in case where the limit is
 	 * not page aligned -Ram Gupta
 	 */
-	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	rlim = ACCESS_ONCE(current->signal->rlim[RLIMIT_DATA].rlim_cur);
 	if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
 			(mm->end_data - mm->start_data) > rlim)
 		goto out;
@@ -990,7 +990,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 		unsigned long locked, lock_limit;
 		locked = len >> PAGE_SHIFT;
 		locked += mm->locked_vm;
-		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+		lock_limit = ACCESS_ONCE(current->signal->
+				rlim[RLIMIT_MEMLOCK].rlim_cur);
 		lock_limit >>= PAGE_SHIFT;
 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
 			return -EAGAIN;
@@ -1565,7 +1566,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 		return -ENOMEM;
 
 	/* Stack limit test */
-	if (size > rlim[RLIMIT_STACK].rlim_cur)
+	if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
 		return -ENOMEM;
 
 	/* mlock limit tests */
@@ -1573,7 +1574,8 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 		unsigned long locked;
 		unsigned long limit;
 		locked = mm->locked_vm + grow;
-		limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+		limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
+		limit >>= PAGE_SHIFT;
 		if (locked > limit && !capable(CAP_IPC_LOCK))
 			return -ENOMEM;
 	}
@@ -2026,7 +2028,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 		unsigned long locked, lock_limit;
 		locked = len >> PAGE_SHIFT;
 		locked += mm->locked_vm;
-		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+		lock_limit = ACCESS_ONCE(current->signal->
+				rlim[RLIMIT_MEMLOCK].rlim_cur);
 		lock_limit >>= PAGE_SHIFT;
 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
 			return -EAGAIN;
@@ -2240,7 +2243,8 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
 	unsigned long cur = mm->total_vm;	/* pages */
 	unsigned long lim;
 
-	lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	lim = ACCESS_ONCE(current->signal->rlim[RLIMIT_AS].rlim_cur);
+	lim >>= PAGE_SHIFT;
 
 	if (cur + npages > lim)
 		return 0;
diff --git a/mm/mremap.c b/mm/mremap.c
index 97bff25..809641b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -358,7 +358,8 @@ unsigned long do_mremap(unsigned long addr,
 	if (vma->vm_flags & VM_LOCKED) {
 		unsigned long locked, lock_limit;
 		locked = mm->locked_vm << PAGE_SHIFT;
-		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+		lock_limit = ACCESS_ONCE(current->signal->
+				rlim[RLIMIT_MEMLOCK].rlim_cur);
 		locked += new_len - old_len;
 		ret = -EAGAIN;
 		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-- 
1.6.4.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 10/16] core: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (8 preceding siblings ...)
  2009-11-18 14:51                                       ` Jiri Slaby
@ 2009-11-18 14:51                                     ` Jiri Slaby
       [not found]                                     ` <4B040A03.2020508-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                                                       ` (6 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	Peter Zijlstra

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 kernel/fork.c             |   10 ++++++----
 kernel/perf_event.c       |    3 ++-
 kernel/posix-cpu-timers.c |   16 +++++++++-------
 kernel/sched.c            |    6 ++++--
 kernel/sched_rt.c         |    5 +++--
 kernel/signal.c           |    4 ++--
 kernel/sys.c              |    4 ++--
 7 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 166b8c4..dab13f2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -822,6 +822,8 @@ void __cleanup_sighand(struct sighand_struct *sighand)
  */
 static void posix_cpu_timers_init_group(struct signal_struct *sig)
 {
+	unsigned long cpu_limit;
+
 	/* Thread group counters. */
 	thread_group_cputime_init(sig);
 
@@ -836,9 +838,9 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	sig->cputime_expires.virt_exp = cputime_zero;
 	sig->cputime_expires.sched_exp = 0;
 
-	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-		sig->cputime_expires.prof_exp =
-			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+	cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+	if (cpu_limit != RLIM_INFINITY) {
+		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
 		sig->cputimer.running = 1;
 	}
 
@@ -1028,7 +1030,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
 	retval = -EAGAIN;
 	if (atomic_read(&p->real_cred->user->processes) >=
-			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
+			ACCESS_ONCE(p->signal->rlim[RLIMIT_NPROC].rlim_cur)) {
 		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
 		    p->real_cred->user != INIT_USER)
 			goto bad_fork_free;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643..229ce9a 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2420,7 +2420,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	if (user_locked > user_lock_limit)
 		extra = user_locked - user_lock_limit;
 
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur);
 	lock_limit >>= PAGE_SHIFT;
 	locked = vma->vm_mm->locked_vm + extra;
 
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index a7dcce1..4c11521 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -640,7 +640,7 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				if (expires_le(sig->it[CPUCLOCK_PROF].expires,
 					       exp->cpu))
 					break;
-				i = sig->rlim[RLIMIT_CPU].rlim_cur;
+				i = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 				if (i != RLIM_INFINITY &&
 				    i <= cputime_to_secs(exp->cpu))
 					break;
@@ -1032,9 +1032,10 @@ static void check_thread_timers(struct task_struct *tsk,
 	/*
 	 * Check for the special case thread timers.
 	 */
-	soft = sig->rlim[RLIMIT_RTTIME].rlim_cur;
+	soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
 	if (soft != RLIM_INFINITY) {
-		unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max;
+		unsigned long hard = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].
+				rlim_max);
 
 		if (hard != RLIM_INFINITY &&
 		    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
@@ -1122,7 +1123,7 @@ static void check_process_timers(struct task_struct *tsk,
 	unsigned long long sum_sched_runtime, sched_expires;
 	struct list_head *timers = sig->cpu_timers;
 	struct task_cputime cputime;
-	unsigned long cpu_cur_lim = sig->rlim[RLIMIT_CPU].rlim_cur;
+	unsigned long cpu_cur_lim = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 
 	/*
 	 * Don't sample the current process CPU clocks if there are no timers.
@@ -1198,7 +1199,8 @@ static void check_process_timers(struct task_struct *tsk,
 
 	if (cpu_cur_lim != RLIM_INFINITY) {
 		unsigned long psecs = cputime_to_secs(ptime);
-		unsigned long hard = sig->rlim[RLIMIT_CPU].rlim_max;
+		unsigned long hard =
+			ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
 		cputime_t x;
 		if (psecs >= hard) {
 			/*
@@ -1385,7 +1387,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 			return 1;
 	}
 
-	return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+	return ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur) != RLIM_INFINITY;
 }
 
 /*
@@ -1483,7 +1485,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 		 * If the RLIMIT_CPU timer will expire before the
 		 * ITIMER_PROF timer, we have nothing else to do.
 		 */
-		if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
+		if (ACCESS_ONCE(tsk->signal->rlim[RLIMIT_CPU].rlim_cur)
 		    < cputime_to_secs(*newval))
 			return;
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c11ae0..15172ea 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6072,7 +6072,8 @@ int can_nice(const struct task_struct *p, const int nice)
 	/* convert nice value [19,-20] to rlimit style value [1,40] */
 	int nice_rlim = 20 - nice;
 
-	return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
+	return (nice_rlim <= ACCESS_ONCE(p->signal->
+				rlim[RLIMIT_NICE].rlim_cur) ||
 		capable(CAP_SYS_NICE));
 }
 
@@ -6257,7 +6258,8 @@ recheck:
 
 			if (!lock_task_sighand(p, &flags))
 				return -ESRCH;
-			rlim_rtprio = p->signal->rlim[RLIMIT_RTPRIO].rlim_cur;
+			rlim_rtprio = ACCESS_ONCE(p->signal->
+					rlim[RLIMIT_RTPRIO].rlim_cur);
 			unlock_task_sighand(p, &flags);
 
 			/* can't set/change the rt policy */
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index a4d790c..99d4490 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1683,8 +1683,9 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 	if (!p->signal)
 		return;
 
-	soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur;
-	hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max;
+	/* max may change after cur was read, this will be fixed next tick */
+	soft = ACCESS_ONCE(p->signal->rlim[RLIMIT_RTTIME].rlim_cur);
+	hard = ACCESS_ONCE(p->signal->rlim[RLIMIT_RTTIME].rlim_max);
 
 	if (soft != RLIM_INFINITY) {
 		unsigned long next;
diff --git a/kernel/signal.c b/kernel/signal.c
index 6705320..e33ece0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -208,8 +208,8 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 	user = get_uid(__task_cred(t)->user);
 	atomic_inc(&user->sigpending);
 	if (override_rlimit ||
-	    atomic_read(&user->sigpending) <=
-			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
+	    atomic_read(&user->sigpending) <= ACCESS_ONCE(t->signal->
+	    rlim[RLIMIT_SIGPENDING].rlim_cur))
 		q = kmem_cache_alloc(sigqueue_cachep, flags);
 	if (unlikely(q == NULL)) {
 		atomic_dec(&user->sigpending);
diff --git a/kernel/sys.c b/kernel/sys.c
index 0f86199..52200d4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -572,8 +572,8 @@ static int set_user(struct cred *new)
 		return -EINVAL;
 	}
 
-	if (atomic_read(&new_user->processes) >=
-				current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
+	if (atomic_read(&new_user->processes) >= ACCESS_ONCE(current->signal->
+				rlim[RLIMIT_NPROC].rlim_cur) &&
 			new_user != INIT_USER) {
 		free_uid(new_user);
 		return -EAGAIN;
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 11/16] misc: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                   ` Jiri Slaby
@ 2009-11-18 14:51                                         ` Jiri Slaby
  2009-11-18 14:51                                     ` [PATCH 02/16] core: do security check under task_lock Jiri Slaby
                                                           ` (15 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby-Re5JQEeQqe8AvxtiuMwx3w
  Cc: mingo-X9Un+BFzKDI, nhorman-2XuSBdqkA4R54TAoqtyWWQ,
	sfr-3FnU+UHB4dNDw9hX6IcOSA, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	marcin.slusarz-Re5JQEeQqe8AvxtiuMwx3w,
	tglx-hfZtesqFncYOwBW4kG4KsQ, mingo-H+wXaHxf7aLQT0dZR+AlfA,
	hpa-YMNOUZJC4hwAvxtiuMwx3w,
	torvalds-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b, Jiri Slaby,
	James Morris, Heiko Carstens, Roland Dreier, Sean Hefty,
	Hal Rosenstock, linux-rdma-u79uwXL29TY76Z2rM5mHXA

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
Cc: James Morris <jmorris-gx6/JNMH7DfYtjvyW6yDsg@public.gmane.org>
Cc: Heiko Carstens <heiko.carstens-tA70FqPdS9bQT0dZR+AlfA@public.gmane.org>
Cc: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Cc: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
Cc: Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
Cc: Sean Hefty <sean.hefty-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: Hal Rosenstock <hal.rosenstock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
---
 drivers/infiniband/core/umem.c                 |    3 ++-
 drivers/infiniband/hw/ipath/ipath_user_pages.c |    4 ++--
 ipc/mqueue.c                                   |    4 ++--
 ipc/shm.c                                      |    4 ++--
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 6f7c096..90d806b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -136,7 +136,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	down_write(&current->mm->mmap_sem);
 
 	locked     = npages + current->mm->locked_vm;
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 82878e3..f7db156 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -59,8 +59,8 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages,
 	size_t got;
 	int ret;
 
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
-		PAGE_SHIFT;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
 
 	if (num_pages > lock_limit) {
 		ret = -ENOMEM;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ee9d697..d04869f 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -152,8 +152,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 
 			spin_lock(&mq_lock);
 			if (u->mq_bytes + mq_bytes < u->mq_bytes ||
-		 	    u->mq_bytes + mq_bytes >
-			    p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
+			    u->mq_bytes + mq_bytes > ACCESS_ONCE(p->signal->
+			    rlim[RLIMIT_MSGQUEUE].rlim_cur)) {
 				spin_unlock(&mq_lock);
 				goto out_inode;
 			}
diff --git a/ipc/shm.c b/ipc/shm.c
index 464694e..99de87c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -761,8 +761,8 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 			if (euid != shp->shm_perm.uid &&
 			    euid != shp->shm_perm.cuid)
 				goto out_unlock;
-			if (cmd == SHM_LOCK &&
-			    !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+			if (cmd == SHM_LOCK && !ACCESS_ONCE(current->signal->
+					rlim[RLIMIT_MEMLOCK].rlim_cur))
 				goto out_unlock;
 		}
 
-- 
1.6.4.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 11/16] misc: use ACCESS_ONCE for rlimits
@ 2009-11-18 14:51                                         ` Jiri Slaby
  0 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens,
	Roland Dreier, Sean Hefty, Hal Rosenstock, linux-rdma

Make sure compiler won't do weird things with limits. E.g. fetching
them twice may return 2 different values after writable limits are
implemented.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland Dreier <rolandd@cisco.com>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hal Rosenstock <hal.rosenstock@gmail.com>
Cc: linux-rdma@vger.kernel.org
---
 drivers/infiniband/core/umem.c                 |    3 ++-
 drivers/infiniband/hw/ipath/ipath_user_pages.c |    4 ++--
 ipc/mqueue.c                                   |    4 ++--
 ipc/shm.c                                      |    4 ++--
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 6f7c096..90d806b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -136,7 +136,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	down_write(&current->mm->mmap_sem);
 
 	locked     = npages + current->mm->locked_vm;
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 82878e3..f7db156 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -59,8 +59,8 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages,
 	size_t got;
 	int ret;
 
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
-		PAGE_SHIFT;
+	lock_limit = ACCESS_ONCE(current->signal->
+			rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
 
 	if (num_pages > lock_limit) {
 		ret = -ENOMEM;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index ee9d697..d04869f 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -152,8 +152,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 
 			spin_lock(&mq_lock);
 			if (u->mq_bytes + mq_bytes < u->mq_bytes ||
-		 	    u->mq_bytes + mq_bytes >
-			    p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
+			    u->mq_bytes + mq_bytes > ACCESS_ONCE(p->signal->
+			    rlim[RLIMIT_MSGQUEUE].rlim_cur)) {
 				spin_unlock(&mq_lock);
 				goto out_inode;
 			}
diff --git a/ipc/shm.c b/ipc/shm.c
index 464694e..99de87c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -761,8 +761,8 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 			if (euid != shp->shm_perm.uid &&
 			    euid != shp->shm_perm.cuid)
 				goto out_unlock;
-			if (cmd == SHM_LOCK &&
-			    !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+			if (cmd == SHM_LOCK && !ACCESS_ONCE(current->signal->
+					rlim[RLIMIT_MEMLOCK].rlim_cur))
 				goto out_unlock;
 		}
 
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 12/16] core: rename setrlimit to do_setrlimit
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (10 preceding siblings ...)
       [not found]                                     ` <4B040A03.2020508-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2009-11-18 14:51                                     ` Jiri Slaby
  2009-11-20  6:10                                       ` Américo Wang
  2009-11-18 14:51                                     ` [PATCH 13/16] core: implement getprlimit and setprlimit syscalls Jiri Slaby
                                                       ` (4 subsequent siblings)
  16 siblings, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens

Rename it so that it makes more sense in the field of syscalls
(i.e. do_* is used for functions called by syscall wrappers but
also when called from other paths).

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/base.c           |    2 +-
 include/linux/resource.h |    2 +-
 kernel/sys.c             |    4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index b894170..c6589fb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -576,7 +576,7 @@ static ssize_t limits_write(struct file *file, const char __user *buf,
 		goto put_task;
 	}
 
-	ret = setrlimit(task, i, &new_rlimit);
+	ret = do_setrlimit(task, i, &new_rlimit);
 	if (ret)
 		count = ret;
 
diff --git a/include/linux/resource.h b/include/linux/resource.h
index 4301d67..08beb1a 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -71,7 +71,7 @@ struct rlimit {
 #include <asm/resource.h>
 
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
-int setrlimit(struct task_struct *tsk, unsigned int resource,
+int do_setrlimit(struct task_struct *tsk, unsigned int resource,
 		struct rlimit *new_rlim);
 
 #endif
diff --git a/kernel/sys.c b/kernel/sys.c
index 52200d4..4db6ba6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1239,7 +1239,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
 #endif
 
 /* make sure you are allowed to change @tsk limits before calling this */
-int setrlimit(struct task_struct *tsk, unsigned int resource,
+int do_setrlimit(struct task_struct *tsk, unsigned int resource,
 		struct rlimit *new_rlim)
 {
 	struct rlimit *old_rlim;
@@ -1308,7 +1308,7 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 		return -EINVAL;
 	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
 		return -EFAULT;
-	return setrlimit(current, resource, &new_rlim);
+	return do_setrlimit(current, resource, &new_rlim);
 }
 
 /*
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 13/16] core: implement getprlimit and setprlimit syscalls
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (11 preceding siblings ...)
  2009-11-18 14:51                                     ` [PATCH 12/16] core: rename setrlimit to do_setrlimit Jiri Slaby
@ 2009-11-18 14:51                                     ` Jiri Slaby
  2009-11-20 13:14                                       ` Neil Horman
  2009-11-18 14:52                                     ` [PATCH 14/16] unistd: add __NR_[get|set]prlimit syscall numbers Jiri Slaby
                                                       ` (3 subsequent siblings)
  16 siblings, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:51 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens

This patch adds the code to support the sys_setprlimit and set_getprlimit
syscalls which modify the rlim values of a selected process.

Based on Neil's work. Thank him.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h |    4 ++
 kernel/sys.c             |   86 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 0 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace..6fd7ba6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -702,11 +702,15 @@ asmlinkage long sys_newuname(struct new_utsname __user *name);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long sys_getprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 #if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 asmlinkage long sys_setrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
+asmlinkage long sys_setprlimit(pid_t pid, unsigned int resource,
+				struct rlimit __user *rlim);
 asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
 asmlinkage long sys_umask(int mask);
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 4db6ba6..273cb21 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1213,6 +1213,61 @@ SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	}
 }
 
+static int check_prlimit_permission(struct task_struct *task)
+{
+	const struct cred *cred = current_cred(), *tcred;
+	int ret = 0;
+
+	rcu_read_lock();
+	tcred = __task_cred(task);
+	if ((cred->uid != tcred->euid ||
+	     cred->uid != tcred->suid ||
+	     cred->uid != tcred->uid  ||
+	     cred->gid != tcred->egid ||
+	     cred->gid != tcred->sgid ||
+	     cred->gid != tcred->gid) &&
+	     !capable(CAP_SYS_RESOURCE)) {
+		ret = -EPERM;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+SYSCALL_DEFINE3(getprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	struct rlimit val;
+	struct task_struct *tsk;
+	int ret;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+
+	read_lock(&tasklist_lock);
+
+	tsk = find_task_by_vpid(pid);
+	if (!tsk || !tsk->sighand) {
+		ret = -ESRCH;
+		goto err_unlock;
+	}
+
+	ret = check_prlimit_permission(tsk);
+	if (ret)
+		goto err_unlock;
+
+	task_lock(tsk->group_leader);
+	val = tsk->signal->rlim[resource];
+	task_unlock(tsk->group_leader);
+
+	read_unlock(&tasklist_lock);
+
+	return copy_to_user(rlim, &val, sizeof(*rlim)) ? -EFAULT : 0;
+err_unlock:
+	read_unlock(&tasklist_lock);
+	return ret;
+}
+
+
 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
 
 /*
@@ -1311,6 +1366,37 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
 	return do_setrlimit(current, resource, &new_rlim);
 }
 
+SYSCALL_DEFINE3(setprlimit, pid_t, pid, unsigned int, resource,
+		struct rlimit __user *, rlim)
+{
+	struct task_struct *tsk;
+	struct rlimit new_rlim;
+	int ret;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	rcu_read_lock();
+	tsk = find_task_by_vpid(pid);
+	if (!tsk) {
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+	get_task_struct(tsk);
+	rcu_read_unlock();
+
+	ret = check_prlimit_permission(tsk);
+	if (!ret)
+		ret = do_setrlimit(tsk, resource, &new_rlim);
+
+	put_task_struct(tsk);
+
+	return ret;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 14/16] unistd: add __NR_[get|set]prlimit syscall numbers
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (12 preceding siblings ...)
  2009-11-18 14:51                                     ` [PATCH 13/16] core: implement getprlimit and setprlimit syscalls Jiri Slaby
@ 2009-11-18 14:52                                     ` Jiri Slaby
  2009-11-18 14:52                                     ` [PATCH 15/16] COMPAT: add get/put_compat_rlimit Jiri Slaby
                                                       ` (2 subsequent siblings)
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:52 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens

From: Neil Horman <nhorman@tuxdriver.com>

Add __NR_[get|set]prlimit syscall numbers to asm-generic. Add them
also to asm-x86.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/unistd_32.h   |    4 +++-
 arch/x86/include/asm/unistd_64.h   |    4 ++++
 arch/x86/kernel/syscall_table_32.S |    2 ++
 include/asm-generic/unistd.h       |    6 +++++-
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6fb3c20..06dbb34 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -342,10 +342,12 @@
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
 #define __NR_perf_event_open	336
+#define __NR_getprlimit		337
+#define __NR_setprlimit		338
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 337
+#define NR_syscalls 339
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 8d3ad0a..48ea56c 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -661,6 +661,10 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open			298
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_getprlimit				299
+__SYSCALL(__NR_getprlimit, sys_getprlimit)
+#define __NR_setprlimit				300
+__SYSCALL(__NR_setprlimit, sys_setprlimit)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 0157cd2..2dd45cd 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -336,3 +336,5 @@ ENTRY(sys_call_table)
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
 	.long sys_perf_event_open
+	.long sys_getprlimit
+	.long sys_setprlimit
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index d76b66a..950587d 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -622,9 +622,13 @@ __SYSCALL(__NR_move_pages, sys_move_pages)
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open 241
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_getprlimit 242
+__SYSCALL(__NR_getprlimit, sys_getprlimit)
+#define __NR_setprlimit 243
+__SYSCALL(__NR_setprlimit, sys_setprlimit)
 
 #undef __NR_syscalls
-#define __NR_syscalls 242
+#define __NR_syscalls 244
 
 /*
  * All syscalls below here should go away really,
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 15/16] COMPAT: add get/put_compat_rlimit
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (13 preceding siblings ...)
  2009-11-18 14:52                                     ` [PATCH 14/16] unistd: add __NR_[get|set]prlimit syscall numbers Jiri Slaby
@ 2009-11-18 14:52                                     ` Jiri Slaby
  2009-12-30 23:55                                       ` Arnd Bergmann
  2009-11-18 14:52                                     ` [PATCH 16/16] x86: add ia32 compat prlimit syscalls Jiri Slaby
  2009-11-18 23:15                                     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Oleg Nesterov
  16 siblings, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:52 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens

Extract those functions from compat_sys_[gs]etrlimit for later
use with newly added rlimit syscalls.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 kernel/compat.c |   61 ++++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/kernel/compat.c b/kernel/compat.c
index f6c204f..af15719 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -274,6 +274,39 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
 	return ret;
 }
 
+static int get_compat_rlimit(struct rlimit *dst,
+		const struct compat_rlimit __user *src)
+{
+	if (!access_ok(VERIFY_READ, src, sizeof(*src)) ||
+			__get_user(dst->rlim_cur, &src->rlim_cur) ||
+			__get_user(dst->rlim_max, &src->rlim_max))
+		return -EFAULT;
+
+	if (dst->rlim_cur == COMPAT_RLIM_INFINITY)
+		dst->rlim_cur = RLIM_INFINITY;
+	if (dst->rlim_max == COMPAT_RLIM_INFINITY)
+		dst->rlim_max = RLIM_INFINITY;
+	return 0;
+}
+
+static int put_compat_rlimit(const struct rlimit *src,
+		struct compat_rlimit __user *dst)
+{
+	struct rlimit r = *src;
+
+	if (r.rlim_cur > COMPAT_RLIM_INFINITY)
+		r.rlim_cur = COMPAT_RLIM_INFINITY;
+	if (r.rlim_max > COMPAT_RLIM_INFINITY)
+		r.rlim_max = COMPAT_RLIM_INFINITY;
+
+	if (!access_ok(VERIFY_WRITE, dst, sizeof(*dst)) ||
+	    __put_user(r.rlim_cur, &dst->rlim_cur) ||
+	    __put_user(r.rlim_max, &dst->rlim_max))
+		return -EFAULT;
+
+	return 0;
+}
+
 asmlinkage long compat_sys_setrlimit(unsigned int resource,
 		struct compat_rlimit __user *rlim)
 {
@@ -284,17 +317,12 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource,
 	if (resource >= RLIM_NLIMITS)
 		return -EINVAL;
 
-	if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) ||
-	    __get_user(r.rlim_cur, &rlim->rlim_cur) ||
-	    __get_user(r.rlim_max, &rlim->rlim_max))
-		return -EFAULT;
+	ret = get_compat_rlimit(&r, rlim);
+	if (ret)
+		return ret;
 
-	if (r.rlim_cur == COMPAT_RLIM_INFINITY)
-		r.rlim_cur = RLIM_INFINITY;
-	if (r.rlim_max == COMPAT_RLIM_INFINITY)
-		r.rlim_max = RLIM_INFINITY;
 	set_fs(KERNEL_DS);
-	ret = sys_setrlimit(resource, (struct rlimit __user *) &r);
+	ret = sys_setrlimit(resource, (struct rlimit __force __user *)&r);
 	set_fs(old_fs);
 	return ret;
 }
@@ -336,19 +364,10 @@ asmlinkage long compat_sys_getrlimit (unsigned int resource,
 	mm_segment_t old_fs = get_fs();
 
 	set_fs(KERNEL_DS);
-	ret = sys_getrlimit(resource, (struct rlimit __user *) &r);
+	ret = sys_getrlimit(resource, (struct rlimit __force __user *)&r);
 	set_fs(old_fs);
-	if (!ret) {
-		if (r.rlim_cur > COMPAT_RLIM_INFINITY)
-			r.rlim_cur = COMPAT_RLIM_INFINITY;
-		if (r.rlim_max > COMPAT_RLIM_INFINITY)
-			r.rlim_max = COMPAT_RLIM_INFINITY;
-
-		if (!access_ok(VERIFY_WRITE, rlim, sizeof(*rlim)) ||
-		    __put_user(r.rlim_cur, &rlim->rlim_cur) ||
-		    __put_user(r.rlim_max, &rlim->rlim_max))
-			return -EFAULT;
-	}
+	if (!ret)
+		ret = put_compat_rlimit(&r, rlim);
 	return ret;
 }
 
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 16/16] x86: add ia32 compat prlimit syscalls
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (14 preceding siblings ...)
  2009-11-18 14:52                                     ` [PATCH 15/16] COMPAT: add get/put_compat_rlimit Jiri Slaby
@ 2009-11-18 14:52                                     ` Jiri Slaby
  2009-11-18 23:15                                     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Oleg Nesterov
  16 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-18 14:52 UTC (permalink / raw)
  To: jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, Jiri Slaby, James Morris, Heiko Carstens

To support 32/64-bit compatibility (rlimit structure contains 2 longs)
for prlimit syscalls, add compat wrappers for them.

Signed-off-by: Jiri Slaby <jslaby@novell.com>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/ia32/ia32entry.S |    2 ++
 kernel/compat.c           |   32 ++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 581b056..c61ced2 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -841,4 +841,6 @@ ia32_sys_call_table:
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
 	.quad sys_perf_event_open
+	.quad compat_sys_getprlimit
+	.quad compat_sys_setprlimit
 ia32_syscall_end:
diff --git a/kernel/compat.c b/kernel/compat.c
index af15719..97fda34 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -371,6 +371,38 @@ asmlinkage long compat_sys_getrlimit (unsigned int resource,
 	return ret;
 }
 
+asmlinkage long compat_sys_setprlimit(pid_t pid, unsigned int resource,
+		struct compat_rlimit __user *rlim)
+{
+	mm_segment_t old_fs = get_fs ();
+	struct rlimit r;
+	int ret;
+
+	ret = get_compat_rlimit(&r, rlim);
+	if (ret)
+		return ret;
+
+	set_fs(KERNEL_DS);
+	ret = sys_setprlimit(pid, resource, (struct rlimit __force __user *)&r);
+	set_fs(old_fs);
+	return ret;
+}
+
+asmlinkage long compat_sys_getprlimit(pid_t pid, unsigned int resource,
+		struct compat_rlimit __user *rlim)
+{
+	mm_segment_t old_fs = get_fs();
+	struct rlimit r;
+	int ret;
+
+	set_fs(KERNEL_DS);
+	ret = sys_getprlimit(pid, resource, (struct rlimit __force __user *)&r);
+	set_fs(old_fs);
+	if (!ret)
+		ret = put_compat_rlimit(&r, rlim);
+	return ret;
+}
+
 int put_compat_rusage(const struct rusage *r, struct compat_rusage __user *ru)
 {
 	if (!access_ok(VERIFY_WRITE, ru, sizeof(*ru)) ||
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 09/16] MM: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                       ` Jiri Slaby
@ 2009-11-18 15:29                                         ` Linus Torvalds
  -1 siblings, 0 replies; 107+ messages in thread
From: Linus Torvalds @ 2009-11-18 15:29 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: jirislaby, Ingo Molnar, nhorman, sfr, Linux Kernel Mailing List,
	Andrew Morton, marcin.slusarz, tglx, mingo, H. Peter Anvin,
	James Morris, Heiko Carstens, linux-mm


I hate these patches, but not because they start using ACCESS_ONCE() per 
se, but because they turn an already much too complex expression into the 
ExpressionFromHell(tm).

The fact that you had to split a single expression over multiple lines in 
multiple places should really have made you realize that something is 
wrong.

So I really would suggest that rather than this kind of mess:

On Wed, 18 Nov 2009, Jiri Slaby wrote:
>
> -	unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
> +	unsigned long limit = ACCESS_ONCE(current->signal->
> +			rlim[RLIMIT_FSIZE].rlim_cur);

into something more like

	static inline unsigned long tsk_get_rlimit(struct task_struct *tsk, int limit)
	{
		return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
	}

	static inline unsigned long get_rlimit(int limit)
	{
		return tsk_get_rlimit(current, limit);
	}

and then instead of adding ACCESS_ONCE() to many places that are already 
ugly, you'd have made the above kind of expression be

	unsigned long limit = get_rlimit(RLIMIG_FSIZE);

instead.

Doesn't that look saner?

Yeah, yeah, there's a few places that actually take the address of 
'tsk->signal->rlim' and do this all by hand, so you'd not get rid of all 
of these things and it's not a matter of wrapping things in some new fancy 
abstraction layer, but at least you'd get rid of the overly complex 
expressions that span multiple lines.

With that, I'd probably like the series a whole lot better.

Which is not to say that I'm entirely convinced about get/setprlimit() in 
the first place, but that's a whole different issue.

		Linus

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 09/16] MM: use ACCESS_ONCE for rlimits
@ 2009-11-18 15:29                                         ` Linus Torvalds
  0 siblings, 0 replies; 107+ messages in thread
From: Linus Torvalds @ 2009-11-18 15:29 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: jirislaby, Ingo Molnar, nhorman, sfr, Linux Kernel Mailing List,
	Andrew Morton, marcin.slusarz, tglx, mingo, H. Peter Anvin,
	James Morris, Heiko Carstens, linux-mm


I hate these patches, but not because they start using ACCESS_ONCE() per 
se, but because they turn an already much too complex expression into the 
ExpressionFromHell(tm).

The fact that you had to split a single expression over multiple lines in 
multiple places should really have made you realize that something is 
wrong.

So I really would suggest that rather than this kind of mess:

On Wed, 18 Nov 2009, Jiri Slaby wrote:
>
> -	unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
> +	unsigned long limit = ACCESS_ONCE(current->signal->
> +			rlim[RLIMIT_FSIZE].rlim_cur);

into something more like

	static inline unsigned long tsk_get_rlimit(struct task_struct *tsk, int limit)
	{
		return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
	}

	static inline unsigned long get_rlimit(int limit)
	{
		return tsk_get_rlimit(current, limit);
	}

and then instead of adding ACCESS_ONCE() to many places that are already 
ugly, you'd have made the above kind of expression be

	unsigned long limit = get_rlimit(RLIMIG_FSIZE);

instead.

Doesn't that look saner?

Yeah, yeah, there's a few places that actually take the address of 
'tsk->signal->rlim' and do this all by hand, so you'd not get rid of all 
of these things and it's not a matter of wrapping things in some new fancy 
abstraction layer, but at least you'd get rid of the overly complex 
expressions that span multiple lines.

With that, I'd probably like the series a whole lot better.

Which is not to say that I'm entirely convinced about get/setprlimit() in 
the first place, but that's a whole different issue.

		Linus

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 01/16] core: posix-cpu-timers, cleanup rlimits usage
  2009-11-18 14:51                                     ` [PATCH 01/16] core: posix-cpu-timers, cleanup rlimits usage Jiri Slaby
@ 2009-11-18 16:48                                       ` Peter Zijlstra
  0 siblings, 0 replies; 107+ messages in thread
From: Peter Zijlstra @ 2009-11-18 16:48 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, torvalds, James Morris,
	Heiko Carstens


Would've been nice to start a new thread for the new series.


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/16] SPARC: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                       ` Jiri Slaby
@ 2009-11-18 17:55                                         ` David Miller
  -1 siblings, 0 replies; 107+ messages in thread
From: David Miller @ 2009-11-18 17:55 UTC (permalink / raw)
  To: jslaby
  Cc: jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, torvalds, jmorris,
	heiko.carstens, sparclinux

From: Jiri Slaby <jslaby@novell.com>
Date: Wed, 18 Nov 2009 15:51:52 +0100

> Make sure compiler won't do weird things with limits. E.g. fetching
> them twice may return 2 different values after writable limits are
> implemented.
> 
> Signed-off-by: Jiri Slaby <jslaby@novell.com>

Acked-by: David S. Miller <davem@davemloft.net>

But I wonder have we really seen the compiler create this
kind of situation?  Or is this patch series based upon the
fact that it "could happen"?

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/16] SPARC: use ACCESS_ONCE for rlimits
@ 2009-11-18 17:55                                         ` David Miller
  0 siblings, 0 replies; 107+ messages in thread
From: David Miller @ 2009-11-18 17:55 UTC (permalink / raw)
  To: jslaby
  Cc: jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, torvalds, jmorris,
	heiko.carstens, sparclinux

From: Jiri Slaby <jslaby@novell.com>
Date: Wed, 18 Nov 2009 15:51:52 +0100

> Make sure compiler won't do weird things with limits. E.g. fetching
> them twice may return 2 different values after writable limits are
> implemented.
> 
> Signed-off-by: Jiri Slaby <jslaby@novell.com>

Acked-by: David S. Miller <davem@davemloft.net>

But I wonder have we really seen the compiler create this
kind of situation?  Or is this patch series based upon the
fact that it "could happen"?

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/16] SPARC: use ACCESS_ONCE for rlimits
  2009-11-18 17:55                                         ` David Miller
@ 2009-11-18 18:09                                           ` Linus Torvalds
  -1 siblings, 0 replies; 107+ messages in thread
From: Linus Torvalds @ 2009-11-18 18:09 UTC (permalink / raw)
  To: David Miller
  Cc: jslaby, jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, jmorris, heiko.carstens,
	sparclinux



On Wed, 18 Nov 2009, David Miller wrote:
> 
> But I wonder have we really seen the compiler create this
> kind of situation?  Or is this patch series based upon the
> fact that it "could happen"?

We have seen things like that in practice - where the compiler re-loads a 
value twice, rather than use a copy like the source code did.

That said, it's rare, to the point of being _almost_ unheard of. It's much 
more common that gcc generates bad code by doing the reverse (trying to 
keep things in registers and spilling, instead of just re-generating the 
value). There are very very few cases where ACCESS_ONCE() actually matters 
for correctness.

Because in practice, the value is either modified some way (and spilling 
it is cheaper than re-computing the modification), or there's just some 
operation that might act as a memory barrier and alias the original memory 
location so gcc wouldn't dare re-load anyway.

However, one of the nice things about ACCESS_ONCE() is that it's also a 
big flag for "this value is loaded without locking, on purpose".

So even if it doesn't then actually change code generation significantly 
(most common end result especially on x86 that has most ALU instructions 
taking memory operations: gcc generates slightly worse code due to getting 
nervous about 'volatile' and not combining instructions), it's a big 
honking piece of programmer documentation: look out!

It's basically a heads-up for lockless programming like RCU. As such, it 
can be something scary, but when it's done right, it's a good thing. And I 
think that for rlimits, we do have a good reason to say "sure, somebody 
else may change the limit values concurrently, but we don't really care: 
we just want _one_ value, whether it's the old or the new one".

That said, the patch you Ack'ed is in the series of patches that I hated, 
and Nak'ed for other reasons (namely "-EEXPRESSIONTOOCOMPLICATEDTOLIVE").

			Linus

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/16] SPARC: use ACCESS_ONCE for rlimits
@ 2009-11-18 18:09                                           ` Linus Torvalds
  0 siblings, 0 replies; 107+ messages in thread
From: Linus Torvalds @ 2009-11-18 18:09 UTC (permalink / raw)
  To: David Miller
  Cc: jslaby, jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, jmorris, heiko.carstens,
	sparclinux



On Wed, 18 Nov 2009, David Miller wrote:
> 
> But I wonder have we really seen the compiler create this
> kind of situation?  Or is this patch series based upon the
> fact that it "could happen"?

We have seen things like that in practice - where the compiler re-loads a 
value twice, rather than use a copy like the source code did.

That said, it's rare, to the point of being _almost_ unheard of. It's much 
more common that gcc generates bad code by doing the reverse (trying to 
keep things in registers and spilling, instead of just re-generating the 
value). There are very very few cases where ACCESS_ONCE() actually matters 
for correctness.

Because in practice, the value is either modified some way (and spilling 
it is cheaper than re-computing the modification), or there's just some 
operation that might act as a memory barrier and alias the original memory 
location so gcc wouldn't dare re-load anyway.

However, one of the nice things about ACCESS_ONCE() is that it's also a 
big flag for "this value is loaded without locking, on purpose".

So even if it doesn't then actually change code generation significantly 
(most common end result especially on x86 that has most ALU instructions 
taking memory operations: gcc generates slightly worse code due to getting 
nervous about 'volatile' and not combining instructions), it's a big 
honking piece of programmer documentation: look out!

It's basically a heads-up for lockless programming like RCU. As such, it 
can be something scary, but when it's done right, it's a good thing. And I 
think that for rlimits, we do have a good reason to say "sure, somebody 
else may change the limit values concurrently, but we don't really care: 
we just want _one_ value, whether it's the old or the new one".

That said, the patch you Ack'ed is in the series of patches that I hated, 
and Nak'ed for other reasons (namely "-EEXPRESSIONTOOCOMPLICATEDTOLIVE").

			Linus

^ permalink raw reply	[flat|nested] 107+ messages in thread

* RE: [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits
  2009-11-18 14:51                                       ` Jiri Slaby
@ 2009-11-18 18:56                                         ` Luck, Tony
  -1 siblings, 0 replies; 107+ messages in thread
From: Luck, Tony @ 2009-11-18 18:56 UTC (permalink / raw)
  To: Jiri Slaby, jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, James Morris, Heiko Carstens, linux-ia64

> Make sure compiler won't do weird things with limits. E.g. fetching
> them twice may return 2 different values after writable limits are
> implemented.

-	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+	if (size > ACCESS_ONCE(task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur))

I don't see how this helps.  If someone else is changing limits while
we are looking at them, then there is a race.  We either get the old
or the new value.  Using ACCESS_ONCE (which on ia64 forces a "volatile"
access, which will make the compiler generate "ld.acq" rather than a
plain "ld") won't make any difference to this race.

Please explain what issue you see with the current code.

-Tony


^ permalink raw reply	[flat|nested] 107+ messages in thread

* RE: [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits
@ 2009-11-18 18:56                                         ` Luck, Tony
  0 siblings, 0 replies; 107+ messages in thread
From: Luck, Tony @ 2009-11-18 18:56 UTC (permalink / raw)
  To: Jiri Slaby, jirislaby
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, James Morris, Heiko Carstens, linux-ia64

> Make sure compiler won't do weird things with limits. E.g. fetching
> them twice may return 2 different values after writable limits are
> implemented.

-	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+	if (size > ACCESS_ONCE(task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur))

I don't see how this helps.  If someone else is changing limits while
we are looking at them, then there is a race.  We either get the old
or the new value.  Using ACCESS_ONCE (which on ia64 forces a "volatile"
access, which will make the compiler generate "ld.acq" rather than a
plain "ld") won't make any difference to this race.

Please explain what issue you see with the current code.

-Tony


^ permalink raw reply	[flat|nested] 107+ messages in thread

* RE: [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits
  2009-11-18 18:56                                         ` Luck, Tony
@ 2009-11-18 19:48                                           ` Linus Torvalds
  -1 siblings, 0 replies; 107+ messages in thread
From: Linus Torvalds @ 2009-11-18 19:48 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Jiri Slaby, jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, James Morris, Heiko Carstens,
	linux-ia64



On Wed, 18 Nov 2009, Luck, Tony wrote:
>
> > Make sure compiler won't do weird things with limits. E.g. fetching
> > them twice may return 2 different values after writable limits are
> > implemented.
> 
> -	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
> +	if (size > ACCESS_ONCE(task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur))
> 
> I don't see how this helps.  If someone else is changing limits while
> we are looking at them, then there is a race.  We either get the old
> or the new value.  Using ACCESS_ONCE (which on ia64 forces a "volatile"
> access, which will make the compiler generate "ld.acq" rather than a
> plain "ld") won't make any difference to this race.
> 
> Please explain what issue you see with the current code.

The problem may not be in _that_ particular code, but imagine code like 
this:

	if (a > MEMORY) {
		do1;
		do2;
		do3;
	} else {
		do2;
	}

where the compiler could actually turn this into (having noticed that 
neither "do1" nor "do2" can alias with MEMORY):

	if (a > MEMORY)
		do1;
	do2;
	if (a > MEMORY)
		do3;

and now what you end up having is a situation where it's possible that 
"do1" gets executed but "do3" does not (or vice versa).

Notice how when you look at the code, it looks impossible, and then you 
get subtle security bugs.

Now, you may say that "but _my_ code doesn't have that "else" statement", 
and maybe you're right. In fact, maybe the source code was really just

	if (a > MEMORY)
		return something();
	return do_something_else();

and you are _sure_ that the ACCESS_ONCE() cannot possibly be needed. But 
what if those 'something()' and 'do_something_else()' were inlines, and 
the compiler internally turns it into

	if (a > MEMORY) {
		ret = something();
	} else {
		ret = do_something_else();
	}
	return ret;

and you now hit the case above where part of it was shared after all, and 
the compiler for some strange reason (register reload, whatever) ends up 
doing it as two conditionals after all?

The thing is, you can't _prove_ that the compiler won't do it, especially 
if you end up changing the code later (without thinking about the fact 
that you're loading things without locking).

So the rule is: if you access unlocked values, you use ACCESS_ONCE(). You 
don't say "but it can't matter". Because you simply don't know.

			Linus

^ permalink raw reply	[flat|nested] 107+ messages in thread

* RE: [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits
@ 2009-11-18 19:48                                           ` Linus Torvalds
  0 siblings, 0 replies; 107+ messages in thread
From: Linus Torvalds @ 2009-11-18 19:48 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Jiri Slaby, jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, James Morris, Heiko Carstens,
	linux-ia64



On Wed, 18 Nov 2009, Luck, Tony wrote:
>
> > Make sure compiler won't do weird things with limits. E.g. fetching
> > them twice may return 2 different values after writable limits are
> > implemented.
> 
> -	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
> +	if (size > ACCESS_ONCE(task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur))
> 
> I don't see how this helps.  If someone else is changing limits while
> we are looking at them, then there is a race.  We either get the old
> or the new value.  Using ACCESS_ONCE (which on ia64 forces a "volatile"
> access, which will make the compiler generate "ld.acq" rather than a
> plain "ld") won't make any difference to this race.
> 
> Please explain what issue you see with the current code.

The problem may not be in _that_ particular code, but imagine code like 
this:

	if (a > MEMORY) {
		do1;
		do2;
		do3;
	} else {
		do2;
	}

where the compiler could actually turn this into (having noticed that 
neither "do1" nor "do2" can alias with MEMORY):

	if (a > MEMORY)
		do1;
	do2;
	if (a > MEMORY)
		do3;

and now what you end up having is a situation where it's possible that 
"do1" gets executed but "do3" does not (or vice versa).

Notice how when you look at the code, it looks impossible, and then you 
get subtle security bugs.

Now, you may say that "but _my_ code doesn't have that "else" statement", 
and maybe you're right. In fact, maybe the source code was really just

	if (a > MEMORY)
		return something();
	return do_something_else();

and you are _sure_ that the ACCESS_ONCE() cannot possibly be needed. But 
what if those 'something()' and 'do_something_else()' were inlines, and 
the compiler internally turns it into

	if (a > MEMORY) {
		ret = something();
	} else {
		ret = do_something_else();
	}
	return ret;

and you now hit the case above where part of it was shared after all, and 
the compiler for some strange reason (register reload, whatever) ends up 
doing it as two conditionals after all?

The thing is, you can't _prove_ that the compiler won't do it, especially 
if you end up changing the code later (without thinking about the fact 
that you're loading things without locking).

So the rule is: if you access unlocked values, you use ACCESS_ONCE(). You 
don't say "but it can't matter". Because you simply don't know.

			Linus

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 02/16] core: do security check under task_lock
  2009-11-18 14:51                                     ` [PATCH 02/16] core: do security check under task_lock Jiri Slaby
@ 2009-11-18 21:47                                       ` James Morris
  0 siblings, 0 replies; 107+ messages in thread
From: James Morris @ 2009-11-18 21:47 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: jirislaby, Ingo Molnar, nhorman, Stephen Rothwell, linux-kernel,
	Andrew Morton, marcin.slusarz, tglx, mingo, hpa, Linus Torvalds,
	Heiko Carstens, linux-security-module

On Wed, 18 Nov 2009, Jiri Slaby wrote:

> Do security_task_setrlimit under task_lock. Other tasks may
> change limits under our hands while we are checking limits
> inside the function. From now on, they can't.
> 

Acked-by: James Morris <jmorris@namei.org>


> Signed-off-by: Jiri Slaby <jslaby@novell.com>
> Cc: James Morris <jmorris@namei.org>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Ingo Molnar <mingo@elte.hu>
> ---
>  kernel/sys.c |   16 +++++++---------
>  1 files changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 605ab9c..0f86199 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -1243,7 +1243,7 @@ int setrlimit(struct task_struct *tsk, unsigned int resource,
>  		struct rlimit *new_rlim)
>  {
>  	struct rlimit *old_rlim;
> -	int retval;
> +	int retval = 0;
>  
>  	if (new_rlim->rlim_cur > new_rlim->rlim_max)
>  		return -EINVAL;
> @@ -1260,10 +1260,6 @@ int setrlimit(struct task_struct *tsk, unsigned int resource,
>  		}
>  	}
>  
> -	retval = security_task_setrlimit(tsk, resource, new_rlim);
> -	if (retval)
> -		goto out;
> -
>  	if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
>  		/*
>  		 * The caller is asking for an immediate RLIMIT_CPU
> @@ -1276,11 +1272,13 @@ int setrlimit(struct task_struct *tsk, unsigned int resource,
>  
>  	old_rlim = tsk->signal->rlim + resource;
>  	task_lock(tsk->group_leader);
> -	if ((new_rlim->rlim_max <= old_rlim->rlim_max) ||
> -				capable(CAP_SYS_RESOURCE))
> -		*old_rlim = *new_rlim;
> -	else
> +	if ((new_rlim->rlim_max > old_rlim->rlim_max) &&
> +				!capable(CAP_SYS_RESOURCE))
>  		retval = -EPERM;
> +	if (!retval)
> +		retval = security_task_setrlimit(tsk, resource, new_rlim);
> +	if (!retval)
> +		*old_rlim = *new_rlim;
>  	task_unlock(tsk->group_leader);
>  
>  	if (retval || resource != RLIMIT_CPU)
> -- 
> 1.6.4.2
> 

-- 
James Morris
<jmorris@namei.org>

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-18 14:51                                   ` Jiri Slaby
                                                       ` (15 preceding siblings ...)
  2009-11-18 14:52                                     ` [PATCH 16/16] x86: add ia32 compat prlimit syscalls Jiri Slaby
@ 2009-11-18 23:15                                     ` Oleg Nesterov
  2009-11-19 15:43                                       ` Jiri Slaby
  16 siblings, 1 reply; 107+ messages in thread
From: Oleg Nesterov @ 2009-11-18 23:15 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Ingo Molnar, Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/18, Jiri Slaby wrote:
>
> On the last point: I added explicit ACCESS_ONCE all over there. If you
> don't like I will trash it.

Just curious, why?

I mean, do you have any example of the "bad" behaviour which is
fixed by ACCESS_ONCE() ?

> The only remaining weird user is in
> kernel/acct.c:
>         /*
>          * Accounting records are not subject to resource limits.
>          */
>         flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
>         current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
>         file->f_op->write(file, (char *)&ac,
>                                sizeof(acct_t), &file->f_pos);
>         current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
>
> It means that threads of the process with PACCT caps have unlimited file
> size for a short while. If there is setrlimit in between, it gets wiped
> out as well.

This is called when the whole thread-group exits, there are no
live threads except current.

We don't care if the new rlimit is lost afaics, but if RLIMIT_FSIZE
is changed in between ->write() can fail. Not sure what can we do,
perhaps just ignore this problem ;)

At least, given that do_acct_process() does override_creds(), an
ordinary user can't fool the accounting.

Oleg.


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits
  2009-11-18 19:48                                           ` Linus Torvalds
@ 2009-11-19  2:28                                             ` Ingo Molnar
  -1 siblings, 0 replies; 107+ messages in thread
From: Ingo Molnar @ 2009-11-19  2:28 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Luck, Tony, Jiri Slaby, jirislaby, nhorman, sfr, linux-kernel,
	akpm, marcin.slusarz, tglx, mingo, hpa, James Morris,
	Heiko Carstens, linux-ia64


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> So the rule is: if you access unlocked values, you use ACCESS_ONCE(). 
> You don't say "but it can't matter". Because you simply don't know.

Most of the time we are being lax about it, especially when it's some 
global value we are accessing, which can only be changed as a sysadmin 
via a sysctl or so.

[ For example we access pid_max in kernel/pid.c, outside of any lock and 
  without ACCESS_ONCE() - but that particular case is not a big deal 
  because changes to pid_max via a sysctl are so rare and are 
  privileged, and because the effects of any race there are benign. ]

But this patch series is about setrlimit, which makes the per task 
rlimit value pretty SMP-volatile (a parallel, unprivileged setrlimit can 
race with usage of the value elsewhere) - and the rlimits have security 
relevance as well so some extra care in accessing them outside of locks 
is prudent IMO.

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits
@ 2009-11-19  2:28                                             ` Ingo Molnar
  0 siblings, 0 replies; 107+ messages in thread
From: Ingo Molnar @ 2009-11-19  2:28 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Luck, Tony, Jiri Slaby, jirislaby, nhorman, sfr, linux-kernel,
	akpm, marcin.slusarz, tglx, mingo, hpa, James Morris,
	Heiko Carstens, linux-ia64


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> So the rule is: if you access unlocked values, you use ACCESS_ONCE(). 
> You don't say "but it can't matter". Because you simply don't know.

Most of the time we are being lax about it, especially when it's some 
global value we are accessing, which can only be changed as a sysadmin 
via a sysctl or so.

[ For example we access pid_max in kernel/pid.c, outside of any lock and 
  without ACCESS_ONCE() - but that particular case is not a big deal 
  because changes to pid_max via a sysctl are so rare and are 
  privileged, and because the effects of any race there are benign. ]

But this patch series is about setrlimit, which makes the per task 
rlimit value pretty SMP-volatile (a parallel, unprivileged setrlimit can 
race with usage of the value elsewhere) - and the rlimits have security 
relevance as well so some extra care in accessing them outside of locks 
is prudent IMO.

	Ingo

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)
  2009-11-18 23:15                                     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Oleg Nesterov
@ 2009-11-19 15:43                                       ` Jiri Slaby
  2009-11-20  2:11                                         ` acct_file_reopen() && do_acct_process() (Was: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)) Oleg Nesterov
  0 siblings, 1 reply; 107+ messages in thread
From: Jiri Slaby @ 2009-11-19 15:43 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ingo Molnar, Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/19/2009 12:15 AM, Oleg Nesterov wrote:
> On 11/18, Jiri Slaby wrote:
>>
>> On the last point: I added explicit ACCESS_ONCE all over there. If you
>> don't like I will trash it.
> 
> Just curious, why?
> 
> I mean, do you have any example of the "bad" behaviour which is
> fixed by ACCESS_ONCE() ?

Hi,

no, I haven't seen any errors caused by that yet. I added those in the
"just in case compiler starts to do weird things" manner.

>> The only remaining weird user is in
>> kernel/acct.c:
>>         /*
>>          * Accounting records are not subject to resource limits.
>>          */
>>         flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
>>         current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
>>         file->f_op->write(file, (char *)&ac,
>>                                sizeof(acct_t), &file->f_pos);
>>         current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
>>
>> It means that threads of the process with PACCT caps have unlimited file
>> size for a short while. If there is setrlimit in between, it gets wiped
>> out as well.
> 
> This is called when the whole thread-group exits, there are no
> live threads except current.

Not really, it is called from umount, sys_acct and other paths.

> At least, given that do_acct_process() does override_creds(), an
> ordinary user can't fool the accounting.

Agreed. That path can be executed only by a user with (at least) PACCT
or SYS_ADMIN caps. Hopefully.

Thanks.

^ permalink raw reply	[flat|nested] 107+ messages in thread

* acct_file_reopen() && do_acct_process() (Was: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7))
  2009-11-19 15:43                                       ` Jiri Slaby
@ 2009-11-20  2:11                                         ` Oleg Nesterov
  2009-11-20 10:27                                           ` Jiri Slaby
  0 siblings, 1 reply; 107+ messages in thread
From: Oleg Nesterov @ 2009-11-20  2:11 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Ingo Molnar, Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/19, Jiri Slaby wrote:
>
> On 11/19/2009 12:15 AM, Oleg Nesterov wrote:
> > On 11/18, Jiri Slaby wrote:
> >>
> >> kernel/acct.c:
> >>         /*
> >>          * Accounting records are not subject to resource limits.
> >>          */
> >>         flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
> >>         current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
> >>         file->f_op->write(file, (char *)&ac,
> >>                                sizeof(acct_t), &file->f_pos);
> >>         current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
> >>
> >> It means that threads of the process with PACCT caps have unlimited file
> >> size for a short while. If there is setrlimit in between, it gets wiped
> >> out as well.
> >
> > This is called when the whole thread-group exits, there are no
> > live threads except current.
>
> Not really, it is called from umount, sys_acct and other paths.

Hmm. you are right. Do you know why acct_file_reopen() does

	if (old_acct)
		do_acct_process();

???

This looks just strange. What is the point ? If the caller doesn't
exit, we shouldn't account it?

And this is just wrong, no? Even if we forget about rlim, since
do_acct_process() does override_creds() + revert_creds(), any
__task_cred() in between is fooled?

Probably I greatly misread something in acct.c, otherwise I can't
see why, say, mntput() should ever record the caller in acct file.

IOW: could someone explain why the patch below is wrong?

Oleg.

--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -206,7 +206,6 @@ static void acct_file_reopen(struct bsd_
 	if (old_acct) {
 		mnt_unpin(old_acct->f_path.mnt);
 		spin_unlock(&acct_lock);
-		do_acct_process(acct, old_ns, old_acct);
 		filp_close(old_acct, NULL);
 		spin_lock(&acct_lock);
 	}


^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 12/16] core: rename setrlimit to do_setrlimit
  2009-11-18 14:51                                     ` [PATCH 12/16] core: rename setrlimit to do_setrlimit Jiri Slaby
@ 2009-11-20  6:10                                       ` Américo Wang
  0 siblings, 0 replies; 107+ messages in thread
From: Américo Wang @ 2009-11-20  6:10 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, torvalds, James Morris,
	Heiko Carstens

On Wed, Nov 18, 2009 at 10:51 PM, Jiri Slaby <jslaby@novell.com> wrote:
> Rename it so that it makes more sense in the field of syscalls
> (i.e. do_* is used for functions called by syscall wrappers but
> also when called from other paths).
>
> Signed-off-by: Jiri Slaby <jslaby@novell.com>
> Cc: James Morris <jmorris@namei.org>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Ingo Molnar <mingo@elte.hu>


Sounds good.

Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>

> ---
>  fs/proc/base.c           |    2 +-
>  include/linux/resource.h |    2 +-
>  kernel/sys.c             |    4 ++--
>  3 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index b894170..c6589fb 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -576,7 +576,7 @@ static ssize_t limits_write(struct file *file, const char __user *buf,
>                goto put_task;
>        }
>
> -       ret = setrlimit(task, i, &new_rlimit);
> +       ret = do_setrlimit(task, i, &new_rlimit);
>        if (ret)
>                count = ret;
>
> diff --git a/include/linux/resource.h b/include/linux/resource.h
> index 4301d67..08beb1a 100644
> --- a/include/linux/resource.h
> +++ b/include/linux/resource.h
> @@ -71,7 +71,7 @@ struct rlimit {
>  #include <asm/resource.h>
>
>  int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
> -int setrlimit(struct task_struct *tsk, unsigned int resource,
> +int do_setrlimit(struct task_struct *tsk, unsigned int resource,
>                struct rlimit *new_rlim);
>
>  #endif
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 52200d4..4db6ba6 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -1239,7 +1239,7 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
>  #endif
>
>  /* make sure you are allowed to change @tsk limits before calling this */
> -int setrlimit(struct task_struct *tsk, unsigned int resource,
> +int do_setrlimit(struct task_struct *tsk, unsigned int resource,
>                struct rlimit *new_rlim)
>  {
>        struct rlimit *old_rlim;
> @@ -1308,7 +1308,7 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
>                return -EINVAL;
>        if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
>                return -EFAULT;
> -       return setrlimit(current, resource, &new_rlim);
> +       return do_setrlimit(current, resource, &new_rlim);
>  }
>
>  /*

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: acct_file_reopen() && do_acct_process() (Was: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7))
  2009-11-20  2:11                                         ` acct_file_reopen() && do_acct_process() (Was: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)) Oleg Nesterov
@ 2009-11-20 10:27                                           ` Jiri Slaby
  0 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2009-11-20 10:27 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Ingo Molnar, Neil Horman, Stephen Rothwell, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, Linus Torvalds

On 11/20/2009 03:11 AM, Oleg Nesterov wrote:
> Hmm. you are right. Do you know why acct_file_reopen() does
> 
> 	if (old_acct)
> 		do_acct_process();
> 
> ???
> 
> This looks just strange. What is the point ? If the caller doesn't
> exit, we shouldn't account it?

I have no idea. I just checked free and net bsds and they record only
exiting tasks. Maybe someone added it to record the process which
disabled the acct (last entry). Dunno.

> And this is just wrong, no? Even if we forget about rlim, since
> do_acct_process() does override_creds() + revert_creds(), any
> __task_cred() in between is fooled?
> 
> Probably I greatly misread something in acct.c, otherwise I can't
> see why, say, mntput() should ever record the caller in acct file.

>From how I understand the code, it is the last mntput before the fs gets
unmounted. It's to close the acct file. But I don't understand why it
accounts.

> IOW: could someone explain why the patch below is wrong?

For me, it makes sense. But that's not important ;).

> --- a/kernel/acct.c
> +++ b/kernel/acct.c
> @@ -206,7 +206,6 @@ static void acct_file_reopen(struct bsd_
>  	if (old_acct) {
>  		mnt_unpin(old_acct->f_path.mnt);
>  		spin_unlock(&acct_lock);
> -		do_acct_process(acct, old_ns, old_acct);
>  		filp_close(old_acct, NULL);
>  		spin_lock(&acct_lock);

thanks,
-- 
js
Faculty of Informatics, Masaryk University
Suse Labs, Novell

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 13/16] core: implement getprlimit and setprlimit syscalls
  2009-11-18 14:51                                     ` [PATCH 13/16] core: implement getprlimit and setprlimit syscalls Jiri Slaby
@ 2009-11-20 13:14                                       ` Neil Horman
  0 siblings, 0 replies; 107+ messages in thread
From: Neil Horman @ 2009-11-20 13:14 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: jirislaby, mingo, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, James Morris, Heiko Carstens

On Wed, Nov 18, 2009 at 03:51:59PM +0100, Jiri Slaby wrote:
> This patch adds the code to support the sys_setprlimit and set_getprlimit
> syscalls which modify the rlim values of a selected process.
> 
> Based on Neil's work. Thank him.
> 
> Signed-off-by: Jiri Slaby <jslaby@novell.com>
> Cc: Neil Horman <nhorman@tuxdriver.com>
> Cc: James Morris <jmorris@namei.org>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Ingo Molnar <mingo@elte.hu>
> ---

Looks good, thanks.
Acked-by: Neil Horman <nhorman@tuxdriver.com>

> 

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 15/16] COMPAT: add get/put_compat_rlimit
  2009-11-18 14:52                                     ` [PATCH 15/16] COMPAT: add get/put_compat_rlimit Jiri Slaby
@ 2009-12-30 23:55                                       ` Arnd Bergmann
  2010-01-06  9:35                                         ` Jiri Slaby
  0 siblings, 1 reply; 107+ messages in thread
From: Arnd Bergmann @ 2009-12-30 23:55 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: jirislaby, mingo, nhorman, sfr, linux-kernel, akpm,
	marcin.slusarz, tglx, mingo, hpa, torvalds, James Morris,
	Heiko Carstens

On Wednesday 18 November 2009, Jiri Slaby wrote:
> @@ -284,17 +317,12 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource,
>         if (resource >= RLIM_NLIMITS)
>                 return -EINVAL;
>  
> -       if (!access_ok(VERIFY_READ, rlim, sizeof(*rlim)) ||
> -           __get_user(r.rlim_cur, &rlim->rlim_cur) ||
> -           __get_user(r.rlim_max, &rlim->rlim_max))
> -               return -EFAULT;
> +       ret = get_compat_rlimit(&r, rlim);
> +       if (ret)
> +               return ret;
>  
> -       if (r.rlim_cur == COMPAT_RLIM_INFINITY)
> -               r.rlim_cur = RLIM_INFINITY;
> -       if (r.rlim_max == COMPAT_RLIM_INFINITY)
> -               r.rlim_max = RLIM_INFINITY;
>         set_fs(KERNEL_DS);
> -       ret = sys_setrlimit(resource, (struct rlimit __user *) &r);
> +       ret = sys_setrlimit(resource, (struct rlimit __force __user *)&r);
>         set_fs(old_fs);
>         return ret;

Since you are already rewriting the whole function here, it would be
nice if you could just call do_setrlimit() with the kernel pointer
instead of the set_fs() and __force tricks. For getrlimit, it may
be easier to just open-code the whole function, and for your new
functions, you could pass the pid into do_setrlimit instead of the
task in order to reduce code duplication between compat_sys_setprlimit
and sys_setprlimit.
If you interpret a pid argument of zero as 'current', we can deprecate
the kernel implementation of setrlimit and getrlimit for future
architecture ports.

Yes, I realize my reply is late in this thread, but I assume your patch
is still current since it hasn't made it into 2.6.33.

	Arnd

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 15/16] COMPAT: add get/put_compat_rlimit
  2009-12-30 23:55                                       ` Arnd Bergmann
@ 2010-01-06  9:35                                         ` Jiri Slaby
  0 siblings, 0 replies; 107+ messages in thread
From: Jiri Slaby @ 2010-01-06  9:35 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: mingo, nhorman, sfr, linux-kernel, akpm, marcin.slusarz, tglx,
	mingo, hpa, torvalds, James Morris, Heiko Carstens

On 12/31/2009 12:55 AM, Arnd Bergmann wrote:
> On Wednesday 18 November 2009, Jiri Slaby wrote:
>>         set_fs(KERNEL_DS);
>> -       ret = sys_setrlimit(resource, (struct rlimit __user *) &r);
>> +       ret = sys_setrlimit(resource, (struct rlimit __force __user *)&r);
>>         set_fs(old_fs);
>>         return ret;
> 
> Since you are already rewriting the whole function here, it would be
> nice if you could just call do_setrlimit() with the kernel pointer
> instead of the set_fs() and __force tricks. For getrlimit, it may
> be easier to just open-code the whole function, and for your new
> functions, you could pass the pid into do_setrlimit instead of the
> task in order to reduce code duplication between compat_sys_setprlimit
> and sys_setprlimit.

Hmm, using pid_t wouldn't work well with pid namespaces. E.g. a call
from /proc code. But certainly some cleanups may be performed, at least
in {compat_,}sys_setrlimit case: pushing (resource >= RLIM_NLIMITS) test
down and calling do_setrlimit from compat_sys_setrlimit is
straightforward. Will look at the rest too.

> Yes, I realize my reply is late in this thread, but I assume your patch
> is still current since it hasn't made it into 2.6.33.

Yup, as you expressed, it's still not upstream, hence it can be tuned up
easily.

Thanks,
-- 
js

^ permalink raw reply	[flat|nested] 107+ messages in thread

end of thread, other threads:[~2010-01-06  9:35 UTC | newest]

Thread overview: 107+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-28 20:06 [PATCH] proc: augment /proc/pid/limits to allow setting of process limits Neil Horman
2009-09-28 22:44 ` Andrew Morton
2009-09-29  1:14   ` Neil Horman
2009-09-29 20:25   ` [PATCH] proc: augment /proc/pid/limits to allow setting of process limits (v2) Neil Horman
2009-09-29 20:46     ` Andrew Morton
2009-09-30  0:59       ` Neil Horman
2009-10-01 17:15 ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v3) Neil Horman
2009-10-01 17:16   ` [PATCH 1/3] " Neil Horman
2009-10-04 12:14     ` Marcin Slusarz
2009-10-04 16:50       ` Neil Horman
2009-10-04 20:04         ` Marcin Slusarz
2009-10-04 23:10           ` Neil Horman
2009-10-04 20:30     ` Marcin Slusarz
2009-10-01 17:21   ` [PATCH 2/3] " Neil Horman
2009-10-01 17:22   ` [PATCH 3/3] " Neil Horman
2009-10-05  0:26   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v4) Neil Horman
2009-10-05  0:53     ` [PATCH 1/3] " Neil Horman
2009-10-08 21:32       ` Marcin Slusarz
2009-10-09  2:00         ` Neil Horman
2009-10-05  0:54     ` [PATCH 2/3] " Neil Horman
2009-10-05  1:57       ` Américo Wang
2009-10-05 12:32         ` Neil Horman
2009-10-05  0:54     ` [PATCH 3/3] " Neil Horman
2009-10-12 16:13   ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Neil Horman
2009-10-12 16:20     ` [PATCH 1/3] " Neil Horman
2009-10-12 16:25     ` [PATCH 2/3] " Neil Horman
2009-10-12 16:27     ` [PATCH 3/3] " Neil Horman
2009-10-12 20:13     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v6) Neil Horman
2009-10-12 20:20       ` [PATCH 1/3] " Neil Horman
2009-10-12 20:23       ` [PATCH 2/3] " Neil Horman
2009-10-12 20:25       ` [PATCH 3/3] " Neil Horman
2009-10-20  0:52       ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Neil Horman
2009-10-20  0:53         ` [PATCH 1/3] " Neil Horman
2009-10-20  0:54         ` [PATCH 2/3] " Neil Horman
2009-11-02 15:10           ` Ingo Molnar
2009-11-02 17:40             ` Neil Horman
2009-10-20  0:55         ` [PATCH 3/3] " Neil Horman
2009-10-28 14:44         ` [PATCH 0/3] " Neil Horman
2009-10-30 18:24           ` Neil Horman
2009-11-02 15:25         ` Ingo Molnar
2009-11-02 17:54           ` Neil Horman
2009-11-02 18:51             ` Ingo Molnar
2009-11-03  0:23               ` Neil Horman
2009-11-04 11:26                 ` Ingo Molnar
2009-11-05 20:48                   ` Neil Horman
2009-11-06  9:26                     ` Ingo Molnar
2009-11-06 10:00                       ` Jiri Slaby
2009-11-08 10:36                         ` Ingo Molnar
2009-11-09  0:10                           ` Neil Horman
2009-11-09  8:32                             ` Jiri Slaby
2009-11-09 13:34                               ` Neil Horman
2009-11-09  8:54                       ` Jiri Slaby
2009-11-09  9:01                         ` Ingo Molnar
2009-11-09  9:22                           ` Jiri Slaby
2009-11-09  9:26                             ` Ingo Molnar
2009-11-09 13:35                               ` Neil Horman
2009-11-09 15:56                           ` Jiri Slaby
2009-11-09 16:40                             ` Oleg Nesterov
2009-11-09 17:15                               ` Jiri Slaby
2009-11-09 17:26                                 ` Linus Torvalds
2009-11-09 17:36                                 ` Oleg Nesterov
2009-11-18 14:51                                   ` Jiri Slaby
2009-11-18 14:51                                     ` [PATCH 01/16] core: posix-cpu-timers, cleanup rlimits usage Jiri Slaby
2009-11-18 16:48                                       ` Peter Zijlstra
2009-11-18 14:51                                     ` [PATCH 02/16] core: do security check under task_lock Jiri Slaby
2009-11-18 21:47                                       ` James Morris
2009-11-18 14:51                                     ` [PATCH 03/16] IA64: use ACCESS_ONCE for rlimits Jiri Slaby
2009-11-18 14:51                                       ` Jiri Slaby
2009-11-18 18:56                                       ` Luck, Tony
2009-11-18 18:56                                         ` Luck, Tony
2009-11-18 19:48                                         ` Linus Torvalds
2009-11-18 19:48                                           ` Linus Torvalds
2009-11-19  2:28                                           ` Ingo Molnar
2009-11-19  2:28                                             ` Ingo Molnar
2009-11-18 14:51                                     ` [PATCH 04/16] PPC: " Jiri Slaby
2009-11-18 14:51                                       ` Jiri Slaby
2009-11-18 14:51                                     ` [PATCH 05/16] S390: " Jiri Slaby
2009-11-18 14:51                                     ` [PATCH 06/16] SPARC: " Jiri Slaby
2009-11-18 14:51                                       ` Jiri Slaby
2009-11-18 17:55                                       ` David Miller
2009-11-18 17:55                                         ` David Miller
2009-11-18 18:09                                         ` Linus Torvalds
2009-11-18 18:09                                           ` Linus Torvalds
2009-11-18 14:51                                     ` [PATCH 07/16] X86: " Jiri Slaby
2009-11-18 14:51                                     ` [PATCH 08/16] FS: " Jiri Slaby
2009-11-18 14:51                                     ` [PATCH 09/16] MM: " Jiri Slaby
2009-11-18 14:51                                       ` Jiri Slaby
2009-11-18 15:29                                       ` Linus Torvalds
2009-11-18 15:29                                         ` Linus Torvalds
2009-11-18 14:51                                     ` [PATCH 10/16] core: " Jiri Slaby
     [not found]                                     ` <4B040A03.2020508-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2009-11-18 14:51                                       ` [PATCH 11/16] misc: " Jiri Slaby
2009-11-18 14:51                                         ` Jiri Slaby
2009-11-18 14:51                                     ` [PATCH 12/16] core: rename setrlimit to do_setrlimit Jiri Slaby
2009-11-20  6:10                                       ` Américo Wang
2009-11-18 14:51                                     ` [PATCH 13/16] core: implement getprlimit and setprlimit syscalls Jiri Slaby
2009-11-20 13:14                                       ` Neil Horman
2009-11-18 14:52                                     ` [PATCH 14/16] unistd: add __NR_[get|set]prlimit syscall numbers Jiri Slaby
2009-11-18 14:52                                     ` [PATCH 15/16] COMPAT: add get/put_compat_rlimit Jiri Slaby
2009-12-30 23:55                                       ` Arnd Bergmann
2010-01-06  9:35                                         ` Jiri Slaby
2009-11-18 14:52                                     ` [PATCH 16/16] x86: add ia32 compat prlimit syscalls Jiri Slaby
2009-11-18 23:15                                     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7) Oleg Nesterov
2009-11-19 15:43                                       ` Jiri Slaby
2009-11-20  2:11                                         ` acct_file_reopen() && do_acct_process() (Was: [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v7)) Oleg Nesterov
2009-11-20 10:27                                           ` Jiri Slaby
2009-10-12 21:58     ` [PATCH 0/3] extend get/setrlimit to support setting rlimits external to a process (v5) Andrew Morton
2009-10-13  0:06       ` Neil Horman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.