linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
@ 2009-01-23 17:59 narendramind
  2009-01-26 19:04 ` Arnd Bergmann
  2009-01-26 19:23 ` Arnd Bergmann
  0 siblings, 2 replies; 8+ messages in thread
From: narendramind @ 2009-01-23 17:59 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel; +Cc: torvalds

x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
Problem Description:
  The following issue affects the setrlimit() and getrlimit() system calls on
  Linux 2.6.13 (and earlier) on x86. The Problem is filed at kernel.org bug 5042
  (http://bugzilla.kernel.org/show_bug.cgi?id=5042)

  With setrlimit()/getrlimit(), resource limits can not be set > 2^32-1 on
  x86 as internally, resource limits are represented in the 'rlimit' structure 
  (defined in include/linux/resource.h) as unsigned longs, meaning 32 bits on
  x86.  The most pertinent limit here is RLIMIT_FSIZE, which specifies the
  maximum size to which a file can grow: to be useful, this limit must be
  represented using a type that is as wide as the type used to represent 
  file offsets, i.e., as wide as a 64-bit off_t.

  Current versions of glibc (e.g., 2.3.5) deal with this situation somewhat 
  strangely: if a program compiled with FILE_OFFSET_BITS set to 64 
  (i.e., off_t is thus 'long long' -- 64 bits) tries to set a resource
  limit to a value larger than can be represented in a 32-bit unsigned long,then
  the glibc wrapper for setrlimit() silently converts the limit value to 
  RLIM_INFINITY. In other words, the requested resource limit setting is
  silently ignored. One could argue that perhaps the glibc wrapper should
  give an error, rather than silently turning a very large limit into infinity;
  however, the glibc developers instead seem to have decided on the current
  behaviour as a means of dealing with what is fundamentally a kernel problem.)

  (NOTE: This problem is not merely a theoretical one facing programmers 
  developing new applications. Since many x86 distributions compile all file
  utilities with -D_FILE_OFFSET_BITS=64, this issue can bite end-users as well,
  if they expect to be able to set resource limits greater than 2^32-1.)

  The solution to this problem would require new setrlimit64() and 
  getrlimit64() system calls on x86, and the existing 32-bit system calls 
  would need to be retained so that existing binaries would still run.

Design Approach:
    Add two system calls sys_setrlimit64()/sys_getrlimit64().
    And a type 'struct rlimit64' to accomodate more no. of limits <= 2^64-1

Implementation Details:
    Inclusions: struct rlimit64, struct rlimit64 rlim64[RLIM64_NRLIMITS] to 
        task_struct

Test Results:
  Test results are posted as Comment#6 to 
      http://bugzilla.kernel.org/show_bug.cgi?id=5042
  System Info (uname -a): 
    Linux infinity 2.6.29-rc2rlim64 #2 SMP 
        Sat Jan 17 16:57:07 IST 2009 i686 GNU/Linux
    CPU: Intel Centrino Duo
getrlimit64: Limits in the Kernel ....
retval            : 0
rlim              | max64 = ffffffffffffffff
rlim              | cur64 = ffffffffffffffff

setrlimit64: setting the following limits ...
retval            : 0
rlim              | max64 = 1122334455667788
rlim              | cur64 = 1122334455667788

getrlimit64: Limits in the Kernel set ....
retval            : 0
rlim              | max64 = 1122334455667788
rlim              | cur64 = 1122334455667788

    Signed-off-by: Narendra Prasad Madanapalli <narendramind@gmail.com>
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S
--- linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S	2009-01-17 09:54:06.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S	2009-01-17 19:15:52.000000000 +0530
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
+	.long sys_setrlimit64
+	.long sys_getrlimit64
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/asm-generic/resource.h linux-2.6.29-rc2-rlim64/include/asm-generic/resource.h
--- linux-2.6.29-rc2/include/asm-generic/resource.h	2009-01-17 09:54:59.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/asm-generic/resource.h	2009-01-17 19:16:48.000000000 +0530
@@ -46,6 +46,7 @@
 #define RLIMIT_RTPRIO		14	/* maximum realtime priority */
 #define RLIMIT_RTTIME		15	/* timeout for RT tasks in us */
 #define RLIM_NLIMITS		16
+#define RLIM64_NLIMITS		2
 
 /*
  * SuS says limits have to be unsigned.
@@ -56,6 +57,9 @@
 #ifndef RLIM_INFINITY
 # define RLIM_INFINITY		(~0UL)
 #endif
+#ifndef RLIM64_INFINITY
+# define RLIM64_INFINITY        (~0ULL)
+#endif
 
 /*
  * RLIMIT_STACK default maximum - some architectures override it:
@@ -89,6 +93,12 @@
 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 }
 
+#define INIT_RLIMITS64                                                    \
+{                                                                         \
+	[0]                     = {  RLIM64_INFINITY,  RLIM64_INFINITY }, \
+	[RLIMIT_FSIZE]          = {  RLIM64_INFINITY,  RLIM64_INFINITY }, \
+}
+
 #endif	/* __KERNEL__ */
 
 #endif
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/init_task.h linux-2.6.29-rc2-rlim64/include/linux/init_task.h
--- linux-2.6.29-rc2/include/linux/init_task.h	2009-01-17 09:55:08.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/init_task.h	2009-01-17 19:16:55.000000000 +0530
@@ -48,6 +48,7 @@ extern struct fs_struct init_fs;
 	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
+	.rlim64         = INIT_RLIMITS64,                               \
 }
 
 extern struct nsproxy init_nsproxy;
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/resource.h linux-2.6.29-rc2-rlim64/include/linux/resource.h
--- linux-2.6.29-rc2/include/linux/resource.h	2009-01-17 09:55:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/resource.h	2009-01-17 19:16:52.000000000 +0530
@@ -45,6 +45,11 @@ struct rlimit {
 	unsigned long	rlim_max;
 };
 
+struct rlimit64 {
+	u64   rlim64_cur;
+	u64   rlim64_max;
+};
+
 #define	PRIO_MIN	(-20)
 #define	PRIO_MAX	20
 
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/sched.h linux-2.6.29-rc2-rlim64/include/linux/sched.h
--- linux-2.6.29-rc2/include/linux/sched.h	2009-01-17 09:55:10.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/sched.h	2009-01-17 19:16:57.000000000 +0530
@@ -572,6 +572,7 @@ struct signal_struct {
 	 * have no need to disable irqs.
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
+	struct rlimit64 rlim64[RLIM64_NLIMITS];
 
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct pacct_struct pacct;	/* per-process accounting information */
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/ChangeLog linux-2.6.29-rc2-rlim64/kernel/ChangeLog
--- linux-2.6.29-rc2/kernel/ChangeLog	1970-01-01 05:30:00.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/ChangeLog	2009-01-17 19:15:50.000000000 +0530
@@ -0,0 +1,10 @@
+2008-01-17  Narendra Prasad <narendramind@gmail.com>
+    Problem Description:
+        The following issue affects the setrlimit() and getrlimit() system calls on Linux 2.6.13 (and earlier) on x86.
+        The Problem is filed at kernel.org bug 5042 (http://bugzilla.kernel.org/show_bug.cgi?id=5042)
+    Design Approach:
+        Add two system calls sys_setrlimit64()/sys_getrlimit64().
+        And a type 'struct rlimit64' to accomodate more no. of limits <= 2^64-1
+    Implementation Details:
+        Inclusions: struct rlimit64, struct rlimit64
+        rlim64[RLIM64_NRLIMITS] to task_struct
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/fork.c linux-2.6.29-rc2-rlim64/kernel/fork.c
--- linux-2.6.29-rc2/kernel/fork.c	2009-01-17 09:54:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/fork.c	2009-01-17 19:15:49.000000000 +0530
@@ -862,6 +862,7 @@ static int copy_signal(unsigned long clo
 
 	task_lock(current->group_leader);
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
+	memcpy(sig->rlim64, current->signal->rlim64, sizeof sig->rlim64);
 	task_unlock(current->group_leader);
 
 	posix_cpu_timers_init_group(sig);
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/sys.c linux-2.6.29-rc2-rlim64/kernel/sys.c
--- linux-2.6.29-rc2/kernel/sys.c	2009-01-17 09:54:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/sys.c	2009-01-17 19:42:08.000000000 +0530
@@ -1577,6 +1577,132 @@ out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(getrlimit64, unsigned int, resource,
+				struct rlimit64 __user *, rlim)
+{
+	struct rlimit64  value;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+
+	if (resource == RLIMIT_FSIZE) {
+		task_lock(current->group_leader);
+		value = current->signal->rlim64[resource];
+		task_unlock(current->group_leader);
+		return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
+	} else {
+		task_lock(current->group_leader);
+		value.rlim64_max = current->signal->rlim[resource].rlim_max;
+		value.rlim64_cur = current->signal->rlim[resource].rlim_cur;
+		task_unlock(current->group_leader);
+		if (value.rlim64_cur == RLIM_INFINITY)
+			value.rlim64_cur = RLIM64_INFINITY;
+		if (value.rlim64_max == RLIM_INFINITY)
+			value.rlim64_max = RLIM64_INFINITY;
+	/* XX: RLIM_SAVED_MAX ? RLIM_SAVED_CUR ? (See Large-File-Summit) */
+	}
+	return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
+}
+
+SYSCALL_DEFINE2(setrlimit64, unsigned int, resource,
+				struct rlimit64 __user *, rlim)
+{
+	struct rlimit64  new_rlim;
+	struct rlimit    *old_rlim, new_value;
+	unsigned long    it_prof_secs;
+	int              retval;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	if (resource == RLIMIT_FSIZE) {
+		struct rlimit64  *old_rlim;
+		struct rlimit    *old_value;
+
+		old_rlim = current->signal->rlim64 + resource;
+		if (((new_rlim.rlim64_cur > old_rlim->rlim64_max) ||
+			(new_rlim.rlim64_max > old_rlim->rlim64_max)) &&
+			!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+		*old_rlim = new_rlim;
+		if (new_rlim.rlim64_cur > RLIM_INFINITY)
+			new_rlim.rlim64_cur = RLIM_INFINITY;
+		if (new_rlim.rlim64_max > RLIM_INFINITY)
+			new_rlim.rlim64_max = RLIM_INFINITY;
+
+		task_lock(current->group_leader);
+		old_value = (current->signal->rlim + resource);
+		old_value->rlim_max = new_rlim.rlim64_max;
+		old_value->rlim_cur = new_rlim.rlim64_cur;
+		task_unlock(current->group_leader);
+
+		return 0;
+	}
+
+	old_rlim = current->signal->rlim + resource;
+	if (new_rlim.rlim64_cur > RLIM_INFINITY)
+		new_rlim.rlim64_cur = RLIM_INFINITY;
+	if (new_rlim.rlim64_max > RLIM_INFINITY)
+		new_rlim.rlim64_max = RLIM_INFINITY;
+	if (((new_rlim.rlim64_cur > old_rlim->rlim_max) ||
+		(new_rlim.rlim64_max > old_rlim->rlim_max)) &&
+		!capable(CAP_SYS_RESOURCE))
+		return -EPERM;
+	if (resource == RLIMIT_NOFILE) {
+		if (new_rlim.rlim64_cur > INR_OPEN ||
+			new_rlim.rlim64_max > INR_OPEN)
+			return -EPERM;
+	}
+	new_value.rlim_max = new_rlim.rlim64_max;
+	new_value.rlim_cur = new_rlim.rlim64_cur;
+	retval = security_task_setrlimit(resource, &new_value);
+	if (retval)
+		return retval;
+
+	if (resource == RLIMIT_CPU && new_value.rlim_cur == 0) {
+		/*
+		 * The caller is asking for an immediate RLIMIT_CPU
+		 * expiry.  But we use the zero value to mean "it was
+		 * never set".  So let's cheat and make it one second
+		 * instead
+		 */
+		new_value.rlim_cur = 1;
+	}
+
+	task_lock(current->group_leader);
+	*old_rlim = new_value;
+	task_unlock(current->group_leader);
+
+	if (resource != RLIMIT_CPU)
+		goto out;
+
+	/*
+	 * RLIMIT_CPU handling.   Note that the kernel fails to return an error
+	 * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
+	 * very long-standing error, and fixing it now risks breakage of
+	 * applications, so we live with it
+	 */
+	if (new_value.rlim_cur == RLIM_INFINITY)
+		goto out;
+
+	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
+	if (it_prof_secs == 0 || new_value.rlim_cur <= it_prof_secs) {
+		unsigned long  rlim_cur = new_value.rlim_cur;
+		cputime_t      cputime;
+
+		cputime = secs_to_cputime(rlim_cur);
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&current->sighand->siglock);
+		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+		spin_unlock_irq(&current->sighand->siglock);
+		read_unlock(&tasklist_lock);
+	}
+out:
+	return 0;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
  2009-01-23 17:59 [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE limits > 2^32-1 narendramind
@ 2009-01-26 19:04 ` Arnd Bergmann
  2009-01-27  0:46   ` Michael Kerrisk
  2009-01-26 19:23 ` Arnd Bergmann
  1 sibling, 1 reply; 8+ messages in thread
From: Arnd Bergmann @ 2009-01-26 19:04 UTC (permalink / raw)
  To: narendramind; +Cc: linux-kernel, linux-fsdevel, torvalds, linux-abi, linux-arch

On Friday 23 January 2009, narendramind@spikesource.com wrote:
> 
>   The solution to this problem would require new setrlimit64() and 
>   getrlimit64() system calls on x86, and the existing 32-bit system calls 
>   would need to be retained so that existing binaries would still run.

When adding new syscalls, please Cc: linux-abi@vger.kernel.org and
linux-arch@vger.kernel.org to get attention from all parties that are
involved.

> diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S
> --- linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S	2009-01-17 09:54:06.000000000 +0530
> +++ linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S	2009-01-17 19:15:52.000000000 +0530
> @@ -332,3 +332,5 @@ ENTRY(sys_call_table)
>  	.long sys_dup3			/* 330 */
>  	.long sys_pipe2
>  	.long sys_inotify_init1
> +	.long sys_setrlimit64
> +	.long sys_getrlimit64

This only adds the calls to the native 32 bit build, but not to
the 32-on-64 compat code in arch/x86/ia32/ia32entry.S (or any of the
other architectures.

> --- linux-2.6.29-rc2/kernel/ChangeLog	1970-01-01 05:30:00.000000000 +0530
> +++ linux-2.6.29-rc2-rlim64/kernel/ChangeLog	2009-01-17 19:15:50.000000000 +0530
> @@ -0,0 +1,10 @@
> +2008-01-17  Narendra Prasad <narendramind@gmail.com>
> +    Problem Description:
> +        The following issue affects the setrlimit() and getrlimit() system calls on Linux 2.6.13 (and earlier) on x86.
> +        The Problem is filed at kernel.org bug 5042 (http://bugzilla.kernel.org/show_bug.cgi?id=5042)
> +    Design Approach:
> +        Add two system calls sys_setrlimit64()/sys_getrlimit64().
> +        And a type 'struct rlimit64' to accomodate more no. of limits <= 2^64-1
> +    Implementation Details:
> +        Inclusions: struct rlimit64, struct rlimit64
> +        rlim64[RLIM64_NRLIMITS] to task_struct

The changelog is the git history, please don't add other files for this.

> +SYSCALL_DEFINE2(setrlimit64, unsigned int, resource,
> +				struct rlimit64 __user *, rlim)
> +{
> +	struct rlimit64  new_rlim;
> +	struct rlimit    *old_rlim, new_value;
> +	unsigned long    it_prof_secs;
> +	int              retval;
> +
> +	if (resource >= RLIM_NLIMITS)
> +		return -EINVAL;
> +	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> +		return -EFAULT;
> +
> +	if (resource == RLIMIT_FSIZE) {
> +		struct rlimit64  *old_rlim;
> +		struct rlimit    *old_value;
> +
> +		old_rlim = current->signal->rlim64 + resource;
> +		if (((new_rlim.rlim64_cur > old_rlim->rlim64_max) ||
> +			(new_rlim.rlim64_max > old_rlim->rlim64_max)) &&
> +			!capable(CAP_SYS_RESOURCE))
> +			return -EPERM;
> +		*old_rlim = new_rlim;
> +		if (new_rlim.rlim64_cur > RLIM_INFINITY)
> +			new_rlim.rlim64_cur = RLIM_INFINITY;
> +		if (new_rlim.rlim64_max > RLIM_INFINITY)
> +			new_rlim.rlim64_max = RLIM_INFINITY;
> +
> +		task_lock(current->group_leader);
> +		old_value = (current->signal->rlim + resource);
> +		old_value->rlim_max = new_rlim.rlim64_max;
> +		old_value->rlim_cur = new_rlim.rlim64_cur;
> +		task_unlock(current->group_leader);
> +
> +		return 0;
> +	}
> +
> +	old_rlim = current->signal->rlim + resource;
> +	if (new_rlim.rlim64_cur > RLIM_INFINITY)
> +		new_rlim.rlim64_cur = RLIM_INFINITY;
> +	if (new_rlim.rlim64_max > RLIM_INFINITY)
> +		new_rlim.rlim64_max = RLIM_INFINITY;
> +	if (((new_rlim.rlim64_cur > old_rlim->rlim_max) ||
> +		(new_rlim.rlim64_max > old_rlim->rlim_max)) &&
> +		!capable(CAP_SYS_RESOURCE))
> +		return -EPERM;
> +	if (resource == RLIMIT_NOFILE) {
> +		if (new_rlim.rlim64_cur > INR_OPEN ||
> +			new_rlim.rlim64_max > INR_OPEN)
> +			return -EPERM;
> +	}
> +	new_value.rlim_max = new_rlim.rlim64_max;
> +	new_value.rlim_cur = new_rlim.rlim64_cur;
> +	retval = security_task_setrlimit(resource, &new_value);
> +	if (retval)
> +		return retval;
> +
> +	if (resource == RLIMIT_CPU && new_value.rlim_cur == 0) {
> +		/*
> +		 * The caller is asking for an immediate RLIMIT_CPU
> +		 * expiry.  But we use the zero value to mean "it was
> +		 * never set".  So let's cheat and make it one second
> +		 * instead
> +		 */
> +		new_value.rlim_cur = 1;
> +	}
> +
> +	task_lock(current->group_leader);
> +	*old_rlim = new_value;
> +	task_unlock(current->group_leader);
> +
> +	if (resource != RLIMIT_CPU)
> +		goto out;
> +
> +	/*
> +	 * RLIMIT_CPU handling.   Note that the kernel fails to return an error
> +	 * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
> +	 * very long-standing error, and fixing it now risks breakage of
> +	 * applications, so we live with it
> +	 */
> +	if (new_value.rlim_cur == RLIM_INFINITY)
> +		goto out;
> +
> +	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
> +	if (it_prof_secs == 0 || new_value.rlim_cur <= it_prof_secs) {
> +		unsigned long  rlim_cur = new_value.rlim_cur;
> +		cputime_t      cputime;
> +
> +		cputime = secs_to_cputime(rlim_cur);
> +		read_lock(&tasklist_lock);
> +		spin_lock_irq(&current->sighand->siglock);
> +		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
> +		spin_unlock_irq(&current->sighand->siglock);
> +		read_unlock(&tasklist_lock);
> +	}
> +out:
> +	return 0;
> +}

This function is rather long, and duplicates most of the existing
set_rlimit syscall. You should consolidate the two so you get no
duplication. You can probably add a
static do_setrlimit(unsigned int resource, struct rlimit64 *rlim);
helper function that gets called by both setrlimit and setrlimit64
(also compat_sys_setrlimit) after the copy_from_user().

	Arnd <><

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
  2009-01-23 17:59 [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE limits > 2^32-1 narendramind
  2009-01-26 19:04 ` Arnd Bergmann
@ 2009-01-26 19:23 ` Arnd Bergmann
  1 sibling, 0 replies; 8+ messages in thread
From: Arnd Bergmann @ 2009-01-26 19:23 UTC (permalink / raw)
  To: Narendra Prasad Madanapalli
  Cc: linux-kernel, linux-fsdevel, linux-api, linux-arch

[hmm: "550 5.7.1 recipient <narendramind@spikesource.com> unknown #291",
trying your @gmail.com. I also need to learn that it's linux-api, not
linux-abi]

On Friday 23 January 2009, narendramind@spikesource.com wrote:
> 
>   The solution to this problem would require new setrlimit64() and 
>   getrlimit64() system calls on x86, and the existing 32-bit system calls 
>   would need to be retained so that existing binaries would still run.

When adding new syscalls, please Cc: linux-abi@vger.kernel.org and
linux-arch@vger.kernel.org to get attention from all parties that are
involved.

> diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S
> --- linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S	2009-01-17 09:54:06.000000000 +0530
> +++ linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S	2009-01-17 19:15:52.000000000 +0530
> @@ -332,3 +332,5 @@ ENTRY(sys_call_table)
>  	.long sys_dup3			/* 330 */
>  	.long sys_pipe2
>  	.long sys_inotify_init1
> +	.long sys_setrlimit64
> +	.long sys_getrlimit64

This only adds the calls to the native 32 bit build, but not to
the 32-on-64 compat code in arch/x86/ia32/ia32entry.S (or any of the
other architectures.

> --- linux-2.6.29-rc2/kernel/ChangeLog	1970-01-01 05:30:00.000000000 +0530
> +++ linux-2.6.29-rc2-rlim64/kernel/ChangeLog	2009-01-17 19:15:50.000000000 +0530
> @@ -0,0 +1,10 @@
> +2008-01-17  Narendra Prasad <narendramind@gmail.com>
> +    Problem Description:
> +        The following issue affects the setrlimit() and getrlimit() system calls on Linux 2.6.13 (and earlier) on x86.
> +        The Problem is filed at kernel.org bug 5042 (http://bugzilla.kernel.org/show_bug.cgi?id=5042)
> +    Design Approach:
> +        Add two system calls sys_setrlimit64()/sys_getrlimit64().
> +        And a type 'struct rlimit64' to accomodate more no. of limits <= 2^64-1
> +    Implementation Details:
> +        Inclusions: struct rlimit64, struct rlimit64
> +        rlim64[RLIM64_NRLIMITS] to task_struct

The changelog is the git history, please don't add other files for this.

> +SYSCALL_DEFINE2(setrlimit64, unsigned int, resource,
> +				struct rlimit64 __user *, rlim)
> +{
> +	struct rlimit64  new_rlim;
> +	struct rlimit    *old_rlim, new_value;
> +	unsigned long    it_prof_secs;
> +	int              retval;
> +
> +	if (resource >= RLIM_NLIMITS)
> +		return -EINVAL;
> +	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
> +		return -EFAULT;
> +
> +	if (resource == RLIMIT_FSIZE) {
> +		struct rlimit64  *old_rlim;
> +		struct rlimit    *old_value;
> +
> +		old_rlim = current->signal->rlim64 + resource;
> +		if (((new_rlim.rlim64_cur > old_rlim->rlim64_max) ||
> +			(new_rlim.rlim64_max > old_rlim->rlim64_max)) &&
> +			!capable(CAP_SYS_RESOURCE))
> +			return -EPERM;
> +		*old_rlim = new_rlim;
> +		if (new_rlim.rlim64_cur > RLIM_INFINITY)
> +			new_rlim.rlim64_cur = RLIM_INFINITY;
> +		if (new_rlim.rlim64_max > RLIM_INFINITY)
> +			new_rlim.rlim64_max = RLIM_INFINITY;
> +
> +		task_lock(current->group_leader);
> +		old_value = (current->signal->rlim + resource);
> +		old_value->rlim_max = new_rlim.rlim64_max;
> +		old_value->rlim_cur = new_rlim.rlim64_cur;
> +		task_unlock(current->group_leader);
> +
> +		return 0;
> +	}
> +
> +	old_rlim = current->signal->rlim + resource;
> +	if (new_rlim.rlim64_cur > RLIM_INFINITY)
> +		new_rlim.rlim64_cur = RLIM_INFINITY;
> +	if (new_rlim.rlim64_max > RLIM_INFINITY)
> +		new_rlim.rlim64_max = RLIM_INFINITY;
> +	if (((new_rlim.rlim64_cur > old_rlim->rlim_max) ||
> +		(new_rlim.rlim64_max > old_rlim->rlim_max)) &&
> +		!capable(CAP_SYS_RESOURCE))
> +		return -EPERM;
> +	if (resource == RLIMIT_NOFILE) {
> +		if (new_rlim.rlim64_cur > INR_OPEN ||
> +			new_rlim.rlim64_max > INR_OPEN)
> +			return -EPERM;
> +	}
> +	new_value.rlim_max = new_rlim.rlim64_max;
> +	new_value.rlim_cur = new_rlim.rlim64_cur;
> +	retval = security_task_setrlimit(resource, &new_value);
> +	if (retval)
> +		return retval;
> +
> +	if (resource == RLIMIT_CPU && new_value.rlim_cur == 0) {
> +		/*
> +		 * The caller is asking for an immediate RLIMIT_CPU
> +		 * expiry.  But we use the zero value to mean "it was
> +		 * never set".  So let's cheat and make it one second
> +		 * instead
> +		 */
> +		new_value.rlim_cur = 1;
> +	}
> +
> +	task_lock(current->group_leader);
> +	*old_rlim = new_value;
> +	task_unlock(current->group_leader);
> +
> +	if (resource != RLIMIT_CPU)
> +		goto out;
> +
> +	/*
> +	 * RLIMIT_CPU handling.   Note that the kernel fails to return an error
> +	 * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
> +	 * very long-standing error, and fixing it now risks breakage of
> +	 * applications, so we live with it
> +	 */
> +	if (new_value.rlim_cur == RLIM_INFINITY)
> +		goto out;
> +
> +	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
> +	if (it_prof_secs == 0 || new_value.rlim_cur <= it_prof_secs) {
> +		unsigned long  rlim_cur = new_value.rlim_cur;
> +		cputime_t      cputime;
> +
> +		cputime = secs_to_cputime(rlim_cur);
> +		read_lock(&tasklist_lock);
> +		spin_lock_irq(&current->sighand->siglock);
> +		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
> +		spin_unlock_irq(&current->sighand->siglock);
> +		read_unlock(&tasklist_lock);
> +	}
> +out:
> +	return 0;
> +}

This function is rather long, and duplicates most of the existing
set_rlimit syscall. You should consolidate the two so you get no
duplication. You can probably add a
static do_setrlimit(unsigned int resource, struct rlimit64 *rlim);
helper function that gets called by both setrlimit and setrlimit64
(also compat_sys_setrlimit) after the copy_from_user().

	Arnd <><

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
  2009-01-26 19:04 ` Arnd Bergmann
@ 2009-01-27  0:46   ` Michael Kerrisk
  2009-01-30 17:57     ` Narendra Prasad Madanapalli
  0 siblings, 1 reply; 8+ messages in thread
From: Michael Kerrisk @ 2009-01-27  0:46 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: narendramind, linux-kernel, linux-fsdevel, torvalds, linux-abi,
	linux-arch

Arnd Bergmann wrote:
> On Friday 23 January 2009, narendramind@spikesource.com wrote:
>>   The solution to this problem would require new setrlimit64() and 
>>   getrlimit64() system calls on x86, and the existing 32-bit system calls 
>>   would need to be retained so that existing binaries would still run.
> 
> When adding new syscalls, please Cc: linux-abi@vger.kernel.org and

Arnd -- I think you meant linux-api, not linux-abi.

-- 
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
man-pages online: http://www.kernel.org/doc/man-pages/online_pages.html
Found a bug? http://www.kernel.org/doc/man-pages/reporting_bugs.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls  to provide FSIZE limits > 2^32-1
  2009-01-27  0:46   ` Michael Kerrisk
@ 2009-01-30 17:57     ` Narendra Prasad Madanapalli
  2009-01-30 18:04       ` Arnd Bergmann
  0 siblings, 1 reply; 8+ messages in thread
From: Narendra Prasad Madanapalli @ 2009-01-30 17:57 UTC (permalink / raw)
  To: arnd; +Cc: linux-kernel, linux-fsdevel, torvalds, linux-api, mtk.manpages

Hi Arnd,

Can you provide me more information on how to port these system calls
to 32-on-64 kernel.

Thanks,
Narendra.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls  to provide FSIZE limits > 2^32-1
  2009-01-30 17:57     ` Narendra Prasad Madanapalli
@ 2009-01-30 18:04       ` Arnd Bergmann
  0 siblings, 0 replies; 8+ messages in thread
From: Arnd Bergmann @ 2009-01-30 18:04 UTC (permalink / raw)
  To: Narendra Prasad Madanapalli
  Cc: linux-kernel, linux-fsdevel, torvalds, linux-api, mtk.manpages

On Friday 30 January 2009, Narendra Prasad Madanapalli wrote:
> Can you provide me more information on how to port these system calls
> to 32-on-64 kernel.

Your interface has compatible calling conventions between 32 and 64
bit user programs, so for x86, all you need to do is to add them to
arch/x86/ia32/ia32entry.S in the same way as you did for
arch/x86/kernel/syscall_table_32.S.
For the other architectures, the respective maintainers will take
care of this if your patch gets accepted. Unfortunately, each
architecture has a different way of adding the compat system
calls.

	Arnd <><

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
@ 2009-01-23 17:53 narendramind
  0 siblings, 0 replies; 8+ messages in thread
From: narendramind @ 2009-01-23 17:53 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel

x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
Problem Description:
  The following issue affects the setrlimit() and getrlimit() system calls on
  Linux 2.6.13 (and earlier) on x86. The Problem is filed at kernel.org bug 5042
  (http://bugzilla.kernel.org/show_bug.cgi?id=5042)

  With setrlimit()/getrlimit(), resource limits can not be set > 2^32-1 on
  x86 as internally, resource limits are represented in the 'rlimit' structure 
  (defined in include/linux/resource.h) as unsigned longs, meaning 32 bits on
  x86.  The most pertinent limit here is RLIMIT_FSIZE, which specifies the
  maximum size to which a file can grow: to be useful, this limit must be
  represented using a type that is as wide as the type used to represent 
  file offsets, i.e., as wide as a 64-bit off_t.

  Current versions of glibc (e.g., 2.3.5) deal with this situation somewhat 
  strangely: if a program compiled with FILE_OFFSET_BITS set to 64 
  (i.e., off_t is thus 'long long' -- 64 bits) tries to set a resource
  limit to a value larger than can be represented in a 32-bit unsigned long,then
  the glibc wrapper for setrlimit() silently converts the limit value to 
  RLIM_INFINITY. In other words, the requested resource limit setting is
  silently ignored. One could argue that perhaps the glibc wrapper should
  give an error, rather than silently turning a very large limit into infinity;
  however, the glibc developers instead seem to have decided on the current
  behaviour as a means of dealing with what is fundamentally a kernel problem.)

  (NOTE: This problem is not merely a theoretical one facing programmers 
  developing new applications. Since many x86 distributions compile all file
  utilities with -D_FILE_OFFSET_BITS=64, this issue can bite end-users as well,
  if they expect to be able to set resource limits greater than 2^32-1.)

  The solution to this problem would require new setrlimit64() and 
  getrlimit64() system calls on x86, and the existing 32-bit system calls 
  would need to be retained so that existing binaries would still run.

Design Approach:
    Add two system calls sys_setrlimit64()/sys_getrlimit64().
    And a type 'struct rlimit64' to accomodate more no. of limits <= 2^64-1

Implementation Details:
    Inclusions: struct rlimit64, struct rlimit64 rlim64[RLIM64_NRLIMITS] to 
        task_struct

Test Results:
  Test results are posted as Comment#6 to 
      http://bugzilla.kernel.org/show_bug.cgi?id=5042
  System Info (uname -a): 
    Linux infinity 2.6.29-rc2rlim64 #2 SMP 
        Sat Jan 17 16:57:07 IST 2009 i686 GNU/Linux
    CPU: Intel Centrino Duo
getrlimit64: Limits in the Kernel ....
retval            : 0
rlim              | max64 = ffffffffffffffff
rlim              | cur64 = ffffffffffffffff

setrlimit64: setting the following limits ...
retval            : 0
rlim              | max64 = 1122334455667788
rlim              | cur64 = 1122334455667788

getrlimit64: Limits in the Kernel set ....
retval            : 0
rlim              | max64 = 1122334455667788
rlim              | cur64 = 1122334455667788

    Signed-off-by: Narendra Prasad Madanapalli <narendramind@gmail.com>
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S
--- linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S	2009-01-17 09:54:06.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S	2009-01-17 19:15:52.000000000 +0530
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
+	.long sys_setrlimit64
+	.long sys_getrlimit64
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/asm-generic/resource.h linux-2.6.29-rc2-rlim64/include/asm-generic/resource.h
--- linux-2.6.29-rc2/include/asm-generic/resource.h	2009-01-17 09:54:59.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/asm-generic/resource.h	2009-01-17 19:16:48.000000000 +0530
@@ -46,6 +46,7 @@
 #define RLIMIT_RTPRIO		14	/* maximum realtime priority */
 #define RLIMIT_RTTIME		15	/* timeout for RT tasks in us */
 #define RLIM_NLIMITS		16
+#define RLIM64_NLIMITS		2
 
 /*
  * SuS says limits have to be unsigned.
@@ -56,6 +57,9 @@
 #ifndef RLIM_INFINITY
 # define RLIM_INFINITY		(~0UL)
 #endif
+#ifndef RLIM64_INFINITY
+# define RLIM64_INFINITY        (~0ULL)
+#endif
 
 /*
  * RLIMIT_STACK default maximum - some architectures override it:
@@ -89,6 +93,12 @@
 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 }
 
+#define INIT_RLIMITS64                                                    \
+{                                                                         \
+	[0]                     = {  RLIM64_INFINITY,  RLIM64_INFINITY }, \
+	[RLIMIT_FSIZE]          = {  RLIM64_INFINITY,  RLIM64_INFINITY }, \
+}
+
 #endif	/* __KERNEL__ */
 
 #endif
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/init_task.h linux-2.6.29-rc2-rlim64/include/linux/init_task.h
--- linux-2.6.29-rc2/include/linux/init_task.h	2009-01-17 09:55:08.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/init_task.h	2009-01-17 19:16:55.000000000 +0530
@@ -48,6 +48,7 @@ extern struct fs_struct init_fs;
 	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
+	.rlim64         = INIT_RLIMITS64,                               \
 }
 
 extern struct nsproxy init_nsproxy;
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/resource.h linux-2.6.29-rc2-rlim64/include/linux/resource.h
--- linux-2.6.29-rc2/include/linux/resource.h	2009-01-17 09:55:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/resource.h	2009-01-17 19:16:52.000000000 +0530
@@ -45,6 +45,11 @@ struct rlimit {
 	unsigned long	rlim_max;
 };
 
+struct rlimit64 {
+	u64   rlim64_cur;
+	u64   rlim64_max;
+};
+
 #define	PRIO_MIN	(-20)
 #define	PRIO_MAX	20
 
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/sched.h linux-2.6.29-rc2-rlim64/include/linux/sched.h
--- linux-2.6.29-rc2/include/linux/sched.h	2009-01-17 09:55:10.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/sched.h	2009-01-17 19:16:57.000000000 +0530
@@ -572,6 +572,7 @@ struct signal_struct {
 	 * have no need to disable irqs.
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
+	struct rlimit64 rlim64[RLIM64_NLIMITS];
 
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct pacct_struct pacct;	/* per-process accounting information */
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/ChangeLog linux-2.6.29-rc2-rlim64/kernel/ChangeLog
--- linux-2.6.29-rc2/kernel/ChangeLog	1970-01-01 05:30:00.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/ChangeLog	2009-01-17 19:15:50.000000000 +0530
@@ -0,0 +1,10 @@
+2008-01-17  Narendra Prasad <narendramind@gmail.com>
+    Problem Description:
+        The following issue affects the setrlimit() and getrlimit() system calls on Linux 2.6.13 (and earlier) on x86.
+        The Problem is filed at kernel.org bug 5042 (http://bugzilla.kernel.org/show_bug.cgi?id=5042)
+    Design Approach:
+        Add two system calls sys_setrlimit64()/sys_getrlimit64().
+        And a type 'struct rlimit64' to accomodate more no. of limits <= 2^64-1
+    Implementation Details:
+        Inclusions: struct rlimit64, struct rlimit64
+        rlim64[RLIM64_NRLIMITS] to task_struct
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/fork.c linux-2.6.29-rc2-rlim64/kernel/fork.c
--- linux-2.6.29-rc2/kernel/fork.c	2009-01-17 09:54:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/fork.c	2009-01-17 19:15:49.000000000 +0530
@@ -862,6 +862,7 @@ static int copy_signal(unsigned long clo
 
 	task_lock(current->group_leader);
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
+	memcpy(sig->rlim64, current->signal->rlim64, sizeof sig->rlim64);
 	task_unlock(current->group_leader);
 
 	posix_cpu_timers_init_group(sig);
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/sys.c linux-2.6.29-rc2-rlim64/kernel/sys.c
--- linux-2.6.29-rc2/kernel/sys.c	2009-01-17 09:54:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/sys.c	2009-01-17 19:42:08.000000000 +0530
@@ -1577,6 +1577,132 @@ out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(getrlimit64, unsigned int, resource,
+				struct rlimit64 __user *, rlim)
+{
+	struct rlimit64  value;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+
+	if (resource == RLIMIT_FSIZE) {
+		task_lock(current->group_leader);
+		value = current->signal->rlim64[resource];
+		task_unlock(current->group_leader);
+		return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
+	} else {
+		task_lock(current->group_leader);
+		value.rlim64_max = current->signal->rlim[resource].rlim_max;
+		value.rlim64_cur = current->signal->rlim[resource].rlim_cur;
+		task_unlock(current->group_leader);
+		if (value.rlim64_cur == RLIM_INFINITY)
+			value.rlim64_cur = RLIM64_INFINITY;
+		if (value.rlim64_max == RLIM_INFINITY)
+			value.rlim64_max = RLIM64_INFINITY;
+	/* XX: RLIM_SAVED_MAX ? RLIM_SAVED_CUR ? (See Large-File-Summit) */
+	}
+	return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
+}
+
+SYSCALL_DEFINE2(setrlimit64, unsigned int, resource,
+				struct rlimit64 __user *, rlim)
+{
+	struct rlimit64  new_rlim;
+	struct rlimit    *old_rlim, new_value;
+	unsigned long    it_prof_secs;
+	int              retval;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	if (resource == RLIMIT_FSIZE) {
+		struct rlimit64  *old_rlim;
+		struct rlimit    *old_value;
+
+		old_rlim = current->signal->rlim64 + resource;
+		if (((new_rlim.rlim64_cur > old_rlim->rlim64_max) ||
+			(new_rlim.rlim64_max > old_rlim->rlim64_max)) &&
+			!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+		*old_rlim = new_rlim;
+		if (new_rlim.rlim64_cur > RLIM_INFINITY)
+			new_rlim.rlim64_cur = RLIM_INFINITY;
+		if (new_rlim.rlim64_max > RLIM_INFINITY)
+			new_rlim.rlim64_max = RLIM_INFINITY;
+
+		task_lock(current->group_leader);
+		old_value = (current->signal->rlim + resource);
+		old_value->rlim_max = new_rlim.rlim64_max;
+		old_value->rlim_cur = new_rlim.rlim64_cur;
+		task_unlock(current->group_leader);
+
+		return 0;
+	}
+
+	old_rlim = current->signal->rlim + resource;
+	if (new_rlim.rlim64_cur > RLIM_INFINITY)
+		new_rlim.rlim64_cur = RLIM_INFINITY;
+	if (new_rlim.rlim64_max > RLIM_INFINITY)
+		new_rlim.rlim64_max = RLIM_INFINITY;
+	if (((new_rlim.rlim64_cur > old_rlim->rlim_max) ||
+		(new_rlim.rlim64_max > old_rlim->rlim_max)) &&
+		!capable(CAP_SYS_RESOURCE))
+		return -EPERM;
+	if (resource == RLIMIT_NOFILE) {
+		if (new_rlim.rlim64_cur > INR_OPEN ||
+			new_rlim.rlim64_max > INR_OPEN)
+			return -EPERM;
+	}
+	new_value.rlim_max = new_rlim.rlim64_max;
+	new_value.rlim_cur = new_rlim.rlim64_cur;
+	retval = security_task_setrlimit(resource, &new_value);
+	if (retval)
+		return retval;
+
+	if (resource == RLIMIT_CPU && new_value.rlim_cur == 0) {
+		/*
+		 * The caller is asking for an immediate RLIMIT_CPU
+		 * expiry.  But we use the zero value to mean "it was
+		 * never set".  So let's cheat and make it one second
+		 * instead
+		 */
+		new_value.rlim_cur = 1;
+	}
+
+	task_lock(current->group_leader);
+	*old_rlim = new_value;
+	task_unlock(current->group_leader);
+
+	if (resource != RLIMIT_CPU)
+		goto out;
+
+	/*
+	 * RLIMIT_CPU handling.   Note that the kernel fails to return an error
+	 * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
+	 * very long-standing error, and fixing it now risks breakage of
+	 * applications, so we live with it
+	 */
+	if (new_value.rlim_cur == RLIM_INFINITY)
+		goto out;
+
+	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
+	if (it_prof_secs == 0 || new_value.rlim_cur <= it_prof_secs) {
+		unsigned long  rlim_cur = new_value.rlim_cur;
+		cputime_t      cputime;
+
+		cputime = secs_to_cputime(rlim_cur);
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&current->sighand->siglock);
+		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+		spin_unlock_irq(&current->sighand->siglock);
+		read_unlock(&tasklist_lock);
+	}
+out:
+	return 0;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
@ 2009-01-23 17:49 narendramind
  0 siblings, 0 replies; 8+ messages in thread
From: narendramind @ 2009-01-23 17:49 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel

x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE  limits > 2^32-1
Problem Description:
  The following issue affects the setrlimit() and getrlimit() system calls on
  Linux 2.6.13 (and earlier) on x86. The Problem is filed at kernel.org bug 5042
  (http://bugzilla.kernel.org/show_bug.cgi?id=5042)

  With setrlimit()/getrlimit(), resource limits can not be set > 2^32-1 on
  x86 as internally, resource limits are represented in the 'rlimit' structure 
  (defined in include/linux/resource.h) as unsigned longs, meaning 32 bits on
  x86.  The most pertinent limit here is RLIMIT_FSIZE, which specifies the
  maximum size to which a file can grow: to be useful, this limit must be
  represented using a type that is as wide as the type used to represent 
  file offsets, i.e., as wide as a 64-bit off_t.

  Current versions of glibc (e.g., 2.3.5) deal with this situation somewhat 
  strangely: if a program compiled with FILE_OFFSET_BITS set to 64 
  (i.e., off_t is thus 'long long' -- 64 bits) tries to set a resource
  limit to a value larger than can be represented in a 32-bit unsigned long,then
  the glibc wrapper for setrlimit() silently converts the limit value to 
  RLIM_INFINITY. In other words, the requested resource limit setting is
  silently ignored. One could argue that perhaps the glibc wrapper should
  give an error, rather than silently turning a very large limit into infinity;
  however, the glibc developers instead seem to have decided on the current
  behaviour as a means of dealing with what is fundamentally a kernel problem.)

  (NOTE: This problem is not merely a theoretical one facing programmers 
  developing new applications. Since many x86 distributions compile all file
  utilities with -D_FILE_OFFSET_BITS=64, this issue can bite end-users as well,
  if they expect to be able to set resource limits greater than 2^32-1.)

  The solution to this problem would require new setrlimit64() and 
  getrlimit64() system calls on x86, and the existing 32-bit system calls 
  would need to be retained so that existing binaries would still run.

Design Approach:
    Add two system calls sys_setrlimit64()/sys_getrlimit64().
    And a type 'struct rlimit64' to accomodate more no. of limits <= 2^64-1

Implementation Details:
    Inclusions: struct rlimit64, struct rlimit64 rlim64[RLIM64_NRLIMITS] to 
        task_struct

Test Results:
  Test results are posted as Comment#6 to 
      http://bugzilla.kernel.org/show_bug.cgi?id=5042
  System Info (uname -a): 
    Linux infinity 2.6.29-rc2rlim64 #2 SMP 
        Sat Jan 17 16:57:07 IST 2009 i686 GNU/Linux
    CPU: Intel Centrino Duo
getrlimit64: Limits in the Kernel ....
retval            : 0
rlim              | max64 = ffffffffffffffff
rlim              | cur64 = ffffffffffffffff

setrlimit64: setting the following limits ...
retval            : 0
rlim              | max64 = 1122334455667788
rlim              | cur64 = 1122334455667788

getrlimit64: Limits in the Kernel set ....
retval            : 0
rlim              | max64 = 1122334455667788
rlim              | cur64 = 1122334455667788

    Signed-off-by: Narendra Prasad Madanapalli <narendramind@gmail.com>
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S
--- linux-2.6.29-rc2/arch/x86/kernel/syscall_table_32.S	2009-01-17 09:54:06.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/arch/x86/kernel/syscall_table_32.S	2009-01-17 19:15:52.000000000 +0530
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
+	.long sys_setrlimit64
+	.long sys_getrlimit64
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/asm-generic/resource.h linux-2.6.29-rc2-rlim64/include/asm-generic/resource.h
--- linux-2.6.29-rc2/include/asm-generic/resource.h	2009-01-17 09:54:59.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/asm-generic/resource.h	2009-01-17 19:16:48.000000000 +0530
@@ -46,6 +46,7 @@
 #define RLIMIT_RTPRIO		14	/* maximum realtime priority */
 #define RLIMIT_RTTIME		15	/* timeout for RT tasks in us */
 #define RLIM_NLIMITS		16
+#define RLIM64_NLIMITS		2
 
 /*
  * SuS says limits have to be unsigned.
@@ -56,6 +57,9 @@
 #ifndef RLIM_INFINITY
 # define RLIM_INFINITY		(~0UL)
 #endif
+#ifndef RLIM64_INFINITY
+# define RLIM64_INFINITY        (~0ULL)
+#endif
 
 /*
  * RLIMIT_STACK default maximum - some architectures override it:
@@ -89,6 +93,12 @@
 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
 }
 
+#define INIT_RLIMITS64                                                    \
+{                                                                         \
+	[0]                     = {  RLIM64_INFINITY,  RLIM64_INFINITY }, \
+	[RLIMIT_FSIZE]          = {  RLIM64_INFINITY,  RLIM64_INFINITY }, \
+}
+
 #endif	/* __KERNEL__ */
 
 #endif
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/init_task.h linux-2.6.29-rc2-rlim64/include/linux/init_task.h
--- linux-2.6.29-rc2/include/linux/init_task.h	2009-01-17 09:55:08.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/init_task.h	2009-01-17 19:16:55.000000000 +0530
@@ -48,6 +48,7 @@ extern struct fs_struct init_fs;
 	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
+	.rlim64         = INIT_RLIMITS64,                               \
 }
 
 extern struct nsproxy init_nsproxy;
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/resource.h linux-2.6.29-rc2-rlim64/include/linux/resource.h
--- linux-2.6.29-rc2/include/linux/resource.h	2009-01-17 09:55:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/resource.h	2009-01-17 19:16:52.000000000 +0530
@@ -45,6 +45,11 @@ struct rlimit {
 	unsigned long	rlim_max;
 };
 
+struct rlimit64 {
+	u64   rlim64_cur;
+	u64   rlim64_max;
+};
+
 #define	PRIO_MIN	(-20)
 #define	PRIO_MAX	20
 
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/include/linux/sched.h linux-2.6.29-rc2-rlim64/include/linux/sched.h
--- linux-2.6.29-rc2/include/linux/sched.h	2009-01-17 09:55:10.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/include/linux/sched.h	2009-01-17 19:16:57.000000000 +0530
@@ -572,6 +572,7 @@ struct signal_struct {
 	 * have no need to disable irqs.
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
+	struct rlimit64 rlim64[RLIM64_NLIMITS];
 
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct pacct_struct pacct;	/* per-process accounting information */
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/ChangeLog linux-2.6.29-rc2-rlim64/kernel/ChangeLog
--- linux-2.6.29-rc2/kernel/ChangeLog	1970-01-01 05:30:00.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/ChangeLog	2009-01-17 19:15:50.000000000 +0530
@@ -0,0 +1,10 @@
+2008-01-17  Narendra Prasad <narendramind@gmail.com>
+    Problem Description:
+        The following issue affects the setrlimit() and getrlimit() system calls on Linux 2.6.13 (and earlier) on x86.
+        The Problem is filed at kernel.org bug 5042 (http://bugzilla.kernel.org/show_bug.cgi?id=5042)
+    Design Approach:
+        Add two system calls sys_setrlimit64()/sys_getrlimit64().
+        And a type 'struct rlimit64' to accomodate more no. of limits <= 2^64-1
+    Implementation Details:
+        Inclusions: struct rlimit64, struct rlimit64
+        rlim64[RLIM64_NRLIMITS] to task_struct
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/fork.c linux-2.6.29-rc2-rlim64/kernel/fork.c
--- linux-2.6.29-rc2/kernel/fork.c	2009-01-17 09:54:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/fork.c	2009-01-17 19:15:49.000000000 +0530
@@ -862,6 +862,7 @@ static int copy_signal(unsigned long clo
 
 	task_lock(current->group_leader);
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
+	memcpy(sig->rlim64, current->signal->rlim64, sizeof sig->rlim64);
 	task_unlock(current->group_leader);
 
 	posix_cpu_timers_init_group(sig);
diff -uNrp -X linux-2.6.29-rc2/Documentation/dontdiff linux-2.6.29-rc2/kernel/sys.c linux-2.6.29-rc2-rlim64/kernel/sys.c
--- linux-2.6.29-rc2/kernel/sys.c	2009-01-17 09:54:04.000000000 +0530
+++ linux-2.6.29-rc2-rlim64/kernel/sys.c	2009-01-17 19:42:08.000000000 +0530
@@ -1577,6 +1577,132 @@ out:
 	return 0;
 }
 
+SYSCALL_DEFINE2(getrlimit64, unsigned int, resource,
+				struct rlimit64 __user *, rlim)
+{
+	struct rlimit64  value;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+
+	if (resource == RLIMIT_FSIZE) {
+		task_lock(current->group_leader);
+		value = current->signal->rlim64[resource];
+		task_unlock(current->group_leader);
+		return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
+	} else {
+		task_lock(current->group_leader);
+		value.rlim64_max = current->signal->rlim[resource].rlim_max;
+		value.rlim64_cur = current->signal->rlim[resource].rlim_cur;
+		task_unlock(current->group_leader);
+		if (value.rlim64_cur == RLIM_INFINITY)
+			value.rlim64_cur = RLIM64_INFINITY;
+		if (value.rlim64_max == RLIM_INFINITY)
+			value.rlim64_max = RLIM64_INFINITY;
+	/* XX: RLIM_SAVED_MAX ? RLIM_SAVED_CUR ? (See Large-File-Summit) */
+	}
+	return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
+}
+
+SYSCALL_DEFINE2(setrlimit64, unsigned int, resource,
+				struct rlimit64 __user *, rlim)
+{
+	struct rlimit64  new_rlim;
+	struct rlimit    *old_rlim, new_value;
+	unsigned long    it_prof_secs;
+	int              retval;
+
+	if (resource >= RLIM_NLIMITS)
+		return -EINVAL;
+	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
+
+	if (resource == RLIMIT_FSIZE) {
+		struct rlimit64  *old_rlim;
+		struct rlimit    *old_value;
+
+		old_rlim = current->signal->rlim64 + resource;
+		if (((new_rlim.rlim64_cur > old_rlim->rlim64_max) ||
+			(new_rlim.rlim64_max > old_rlim->rlim64_max)) &&
+			!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+		*old_rlim = new_rlim;
+		if (new_rlim.rlim64_cur > RLIM_INFINITY)
+			new_rlim.rlim64_cur = RLIM_INFINITY;
+		if (new_rlim.rlim64_max > RLIM_INFINITY)
+			new_rlim.rlim64_max = RLIM_INFINITY;
+
+		task_lock(current->group_leader);
+		old_value = (current->signal->rlim + resource);
+		old_value->rlim_max = new_rlim.rlim64_max;
+		old_value->rlim_cur = new_rlim.rlim64_cur;
+		task_unlock(current->group_leader);
+
+		return 0;
+	}
+
+	old_rlim = current->signal->rlim + resource;
+	if (new_rlim.rlim64_cur > RLIM_INFINITY)
+		new_rlim.rlim64_cur = RLIM_INFINITY;
+	if (new_rlim.rlim64_max > RLIM_INFINITY)
+		new_rlim.rlim64_max = RLIM_INFINITY;
+	if (((new_rlim.rlim64_cur > old_rlim->rlim_max) ||
+		(new_rlim.rlim64_max > old_rlim->rlim_max)) &&
+		!capable(CAP_SYS_RESOURCE))
+		return -EPERM;
+	if (resource == RLIMIT_NOFILE) {
+		if (new_rlim.rlim64_cur > INR_OPEN ||
+			new_rlim.rlim64_max > INR_OPEN)
+			return -EPERM;
+	}
+	new_value.rlim_max = new_rlim.rlim64_max;
+	new_value.rlim_cur = new_rlim.rlim64_cur;
+	retval = security_task_setrlimit(resource, &new_value);
+	if (retval)
+		return retval;
+
+	if (resource == RLIMIT_CPU && new_value.rlim_cur == 0) {
+		/*
+		 * The caller is asking for an immediate RLIMIT_CPU
+		 * expiry.  But we use the zero value to mean "it was
+		 * never set".  So let's cheat and make it one second
+		 * instead
+		 */
+		new_value.rlim_cur = 1;
+	}
+
+	task_lock(current->group_leader);
+	*old_rlim = new_value;
+	task_unlock(current->group_leader);
+
+	if (resource != RLIMIT_CPU)
+		goto out;
+
+	/*
+	 * RLIMIT_CPU handling.   Note that the kernel fails to return an error
+	 * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
+	 * very long-standing error, and fixing it now risks breakage of
+	 * applications, so we live with it
+	 */
+	if (new_value.rlim_cur == RLIM_INFINITY)
+		goto out;
+
+	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
+	if (it_prof_secs == 0 || new_value.rlim_cur <= it_prof_secs) {
+		unsigned long  rlim_cur = new_value.rlim_cur;
+		cputime_t      cputime;
+
+		cputime = secs_to_cputime(rlim_cur);
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&current->sighand->siglock);
+		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+		spin_unlock_irq(&current->sighand->siglock);
+		read_unlock(&tasklist_lock);
+	}
+out:
+	return 0;
+}
+
 /*
  * It would make sense to put struct rusage in the task_struct,
  * except that would make the task_struct be *really big*.  After

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-01-30 18:05 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-23 17:59 [PATCH 1/1] x86: syscalls: sys_setrlimit64/sys_getrlimit64 calls to provide FSIZE limits > 2^32-1 narendramind
2009-01-26 19:04 ` Arnd Bergmann
2009-01-27  0:46   ` Michael Kerrisk
2009-01-30 17:57     ` Narendra Prasad Madanapalli
2009-01-30 18:04       ` Arnd Bergmann
2009-01-26 19:23 ` Arnd Bergmann
  -- strict thread matches above, loose matches on Subject: below --
2009-01-23 17:53 narendramind
2009-01-23 17:49 narendramind

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).