linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC 01/18] capabilities: track actually used capabilities
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-13 20:32   ` Andy Lutomirski
  2016-06-13 19:44 ` [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max Topi Miettinen
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Alexander Viro, Ingo Molnar, Peter Zijlstra,
	Serge Hallyn, Andrew Morton, Kees Cook, Christoph Lameter,
	Serge E. Hallyn, Andy Shevchenko, Richard W.M. Jones,
	Iago López Galeiras, Chris Metcalf, Andy Lutomirski,
	Jann Horn, open list:FILESYSTEMS (VFS and infrastructure),
	open list:CAPABILITIES

Track what capabilities are actually used and present the current
situation in /proc/self/status.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 fs/exec.c             | 1 +
 fs/proc/array.c       | 1 +
 include/linux/sched.h | 1 +
 kernel/capability.c   | 1 +
 4 files changed, 4 insertions(+)

diff --git a/fs/exec.c b/fs/exec.c
index 887c1c9..ff6f644 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1269,6 +1269,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 		if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
 			set_dumpable(current->mm, suid_dumpable);
 	}
+	cap_clear(current->cap_used);
 
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 88c7de1..cccc9ee 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -343,6 +343,7 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
 	render_cap_t(m, "CapEff:\t", &cap_effective);
 	render_cap_t(m, "CapBnd:\t", &cap_bset);
 	render_cap_t(m, "CapAmb:\t", &cap_ambient);
+	render_cap_t(m, "CapUsd:\t", &p->cap_used);
 }
 
 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e42ada..9c48a08 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1918,6 +1918,7 @@ struct task_struct {
 #ifdef CONFIG_MMU
 	struct task_struct *oom_reaper_list;
 #endif
+	kernel_cap_t	cap_used;	/* Capabilities actually used */
 /* CPU-specific state of this task */
 	struct thread_struct thread;
 /*
diff --git a/kernel/capability.c b/kernel/capability.c
index 45432b5..aad8854 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -380,6 +380,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
 	}
 
 	if (security_capable(current_cred(), ns, cap) == 0) {
+		cap_raise(current->cap_used, cap);
 		current->flags |= PF_SUPERPRIV;
 		return true;
 	}
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
  2016-06-13 19:44 ` [RFC 01/18] capabilities: track actually used capabilities Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-13 20:40   ` Andy Lutomirski
  2016-06-13 19:44 ` [RFC 07/18] limits: track RLIMIT_FSIZE " Topi Miettinen
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Alexander Viro, Ingo Molnar, Peter Zijlstra,
	Andrew Morton, Kees Cook, Cyrill Gorcunov, Alexey Dobriyan,
	John Stultz, Janis Danisevskis, Calvin Owens, Jann Horn,
	open list:FILESYSTEMS (VFS and infrastructure)

Track maximum number of files for the process, present current maximum
in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 fs/file.c             |  4 ++++
 fs/proc/base.c        | 10 ++++++----
 include/linux/sched.h |  7 +++++++
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 6b1acdf..2d0d206 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -547,6 +547,8 @@ repeat:
 	}
 #endif
 
+	bump_rlimit(RLIMIT_NOFILE, fd);
+
 out:
 	spin_unlock(&files->file_lock);
 	return error;
@@ -857,6 +859,8 @@ __releases(&files->file_lock)
 	if (tofree)
 		filp_close(tofree, files);
 
+	bump_rlimit(RLIMIT_NOFILE, fd);
+
 	return fd;
 
 Ebusy:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a11eb71..227997b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -630,8 +630,8 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
 	/*
 	 * print the file header
 	 */
-       seq_printf(m, "%-25s %-20s %-20s %-10s\n",
-		  "Limit", "Soft Limit", "Hard Limit", "Units");
+	seq_printf(m, "%-25s %-20s %-20s %-10s %-20s\n",
+		   "Limit", "Soft Limit", "Hard Limit", "Units", "Max");
 
 	for (i = 0; i < RLIM_NLIMITS; i++) {
 		if (rlim[i].rlim_cur == RLIM_INFINITY)
@@ -647,9 +647,11 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
 			seq_printf(m, "%-20lu ", rlim[i].rlim_max);
 
 		if (lnames[i].unit)
-			seq_printf(m, "%-10s\n", lnames[i].unit);
+			seq_printf(m, "%-10s", lnames[i].unit);
 		else
-			seq_putc(m, '\n');
+			seq_printf(m, "%-10s", "");
+		seq_printf(m, "%-20lu\n",
+			   task->signal->rlim_curmax[i]);
 	}
 
 	return 0;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c48a08..0150380 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -782,6 +782,7 @@ struct signal_struct {
 	 * have no need to disable irqs.
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
+	unsigned long rlim_curmax[RLIM_NLIMITS];
 
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct pacct_struct pacct;	/* per-process accounting information */
@@ -3376,6 +3377,12 @@ static inline unsigned long rlimit_max(unsigned int limit)
 	return task_rlimit_max(current, limit);
 }
 
+static inline void bump_rlimit(unsigned int limit, unsigned long r)
+{
+	if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
+		current->signal->rlim_curmax[limit] = r;
+}
+
 #ifdef CONFIG_CPU_FREQ
 struct update_util_data {
 	void (*func)(struct update_util_data *data,
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC 07/18] limits: track RLIMIT_FSIZE actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
  2016-06-13 19:44 ` [RFC 01/18] capabilities: track actually used capabilities Topi Miettinen
  2016-06-13 19:44 ` [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-13 19:44 ` [RFC 08/18] limits: track RLIMIT_DATA " Topi Miettinen
  2016-06-13 19:44 ` [RFC 09/18] limits: track RLIMIT_CORE " Topi Miettinen
  4 siblings, 0 replies; 13+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Alexander Viro, Andrew Morton, Jan Kara,
	Johannes Weiner, Michal Hocko, Ross Zwisler, Kirill A. Shutemov,
	Mel Gorman, Junichi Nomura, Matthew Wilcox,
	open list:FILESYSTEMS (VFS and infrastructure),
	open list:MEMORY MANAGEMENT

Track maximum file size, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 fs/attr.c    | 2 ++
 mm/filemap.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/attr.c b/fs/attr.c
index 25b24d0..1b620f7 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -116,6 +116,8 @@ int inode_newsize_ok(const struct inode *inode, loff_t offset)
 			return -ETXTBSY;
 	}
 
+	bump_rlimit(RLIMIT_FSIZE, offset);
+
 	return 0;
 out_sig:
 	send_sig(SIGXFSZ, current, 0);
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878..1fa9864 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2447,6 +2447,7 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 			send_sig(SIGXFSZ, current, 0);
 			return -EFBIG;
 		}
+		bump_rlimit(RLIMIT_FSIZE, iocb->ki_pos);
 		iov_iter_truncate(from, limit - (unsigned long)pos);
 	}
 
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC 08/18] limits: track RLIMIT_DATA actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
                   ` (2 preceding siblings ...)
  2016-06-13 19:44 ` [RFC 07/18] limits: track RLIMIT_FSIZE " Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  2016-06-13 19:44 ` [RFC 09/18] limits: track RLIMIT_CORE " Topi Miettinen
  4 siblings, 0 replies; 13+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Alexander Viro, Michal Hocko, Andrew Morton, Vlastimil Babka,
	Cyrill Gorcunov, Eric W. Biederman, Mateusz Guzik, John Stultz,
	Ben Segall, Alexey Dobriyan, Kirill A. Shutemov, Oleg Nesterov,
	Chen Gang, Konstantin Khlebnikov, Andrea Arcangeli,
	Andrey Ryabinin, open list:FILESYSTEMS (VFS and infrastructure),
	open list:MEMORY MANAGEMENT

Track maximum size of data VM, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 arch/x86/ia32/ia32_aout.c | 1 +
 fs/binfmt_aout.c          | 1 +
 fs/binfmt_flat.c          | 1 +
 kernel/sys.c              | 2 ++
 mm/mmap.c                 | 6 +++++-
 5 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index cb26f18..8a7d502 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -398,6 +398,7 @@ beyond_if:
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 =
 	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
 	set_fs(USER_DS);
+	bump_limit(RLIMIT_DATA, ex.a_data + ex.a_bss);
 	return 0;
 }
 
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ae1b540..86c6548 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -330,6 +330,7 @@ beyond_if:
 	regs->gp = ex.a_gpvalue;
 #endif
 	start_thread(regs, ex.a_entry, current->mm->start_stack);
+	bump_limit(RLIMIT_DATA, ex.a_data + ex.a_bss);
 	return 0;
 }
 
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index caf9e39..e309dad 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -792,6 +792,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 			libinfo->lib_list[id].start_brk) +	/* start brk */
 			stack_len);
 
+	bump_limit(RLIMIT_DATA, data_len + bss_len);
 	return 0;
 err:
 	return ret;
diff --git a/kernel/sys.c b/kernel/sys.c
index 89d5be4..6629f6f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1896,6 +1896,8 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
 	if (prctl_map.auxv_size)
 		memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
 
+	bump_limit(RLIMIT_DATA, mm->end_data - mm->start_data);
+
 	up_write(&mm->mmap_sem);
 	return 0;
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index de2c176..61867de 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -228,6 +228,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 		goto out;
 
 set_brk:
+	bump_rlimit(RLIMIT_DATA, (brk - mm->start_brk) +
+		    (mm->end_data - mm->start_data));
 	mm->brk = brk;
 	populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
 	up_write(&mm->mmap_sem);
@@ -2924,8 +2926,10 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
 		mm->exec_vm += npages;
 	else if (is_stack_mapping(flags))
 		mm->stack_vm += npages;
-	else if (is_data_mapping(flags))
+	else if (is_data_mapping(flags)) {
 		mm->data_vm += npages;
+		bump_rlimit(RLIMIT_DATA, mm->data_vm << PAGE_SHIFT);
+	}
 }
 
 static int special_mapping_fault(struct vm_area_struct *vma,
-- 
2.8.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC 09/18] limits: track RLIMIT_CORE actual max
       [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
                   ` (3 preceding siblings ...)
  2016-06-13 19:44 ` [RFC 08/18] limits: track RLIMIT_DATA " Topi Miettinen
@ 2016-06-13 19:44 ` Topi Miettinen
  4 siblings, 0 replies; 13+ messages in thread
From: Topi Miettinen @ 2016-06-13 19:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Topi Miettinen, Alexander Viro,
	open list:FILESYSTEMS (VFS and infrastructure)

Track maximum size of core dump written, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 fs/coredump.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 281b768..abedc99 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -784,20 +784,25 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
 	struct file *file = cprm->file;
 	loff_t pos = file->f_pos;
 	ssize_t n;
+	int r = 0;
+
 	if (cprm->written + nr > cprm->limit)
 		return 0;
 	while (nr) {
 		if (dump_interrupted())
-			return 0;
+			goto err;
 		n = __kernel_write(file, addr, nr, &pos);
 		if (n <= 0)
-			return 0;
+			goto err;
 		file->f_pos = pos;
 		cprm->written += n;
 		cprm->pos += n;
 		nr -= n;
 	}
-	return 1;
+	r = 1;
+ err:
+	bump_rlimit(RLIMIT_CORE, cprm->written);
+	return r;
 }
 EXPORT_SYMBOL(dump_emit);
 
-- 
2.8.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC 01/18] capabilities: track actually used capabilities
  2016-06-13 19:44 ` [RFC 01/18] capabilities: track actually used capabilities Topi Miettinen
@ 2016-06-13 20:32   ` Andy Lutomirski
  2016-06-13 20:45     ` Topi Miettinen
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2016-06-13 20:32 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel, Alexander Viro, Ingo Molnar, Peter Zijlstra,
	Serge Hallyn, Andrew Morton, Kees Cook, Christoph Lameter,
	Serge E. Hallyn, Andy Shevchenko, Richard W.M. Jones,
	Iago López Galeiras, Chris Metcalf, Andy Lutomirski,
	Jann Horn, open list:FILESYSTEMS (VFS and infrastructure),
	open list:CAPABILITIES

On Mon, Jun 13, 2016 at 12:44 PM, Topi Miettinen <toiwoton@gmail.com> wrote:
> Track what capabilities are actually used and present the current
> situation in /proc/self/status.

What for?

What is the intended behavior on fork()?  Whatever the intended
behavior is, there should IMO be a selftest for it.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max
  2016-06-13 19:44 ` [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max Topi Miettinen
@ 2016-06-13 20:40   ` Andy Lutomirski
  2016-06-13 21:13     ` Topi Miettinen
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2016-06-13 20:40 UTC (permalink / raw)
  To: Topi Miettinen, linux-kernel
  Cc: Alexander Viro, Ingo Molnar, Peter Zijlstra, Andrew Morton,
	Kees Cook, Cyrill Gorcunov, Alexey Dobriyan, John Stultz,
	Janis Danisevskis, Calvin Owens, Jann Horn,
	open list:FILESYSTEMS (VFS and infrastructure)

On 06/13/2016 12:44 PM, Topi Miettinen wrote:
> Track maximum number of files for the process, present current maximum
> in /proc/self/limits.

The core part should be its own patch.

Also, you have this weirdly named (and racy!) function bump_rlimit. 
Wouldn't this be nicer if you taught the rlimit code to track the 
*current* usage generically and to derive the max usage from that?

> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index a11eb71..227997b 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -630,8 +630,8 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
>  	/*
>  	 * print the file header
>  	 */
> -       seq_printf(m, "%-25s %-20s %-20s %-10s\n",
> -		  "Limit", "Soft Limit", "Hard Limit", "Units");
> +	seq_printf(m, "%-25s %-20s %-20s %-10s %-20s\n",
> +		   "Limit", "Soft Limit", "Hard Limit", "Units", "Max");

What existing programs, if any, does this break?

>
>  	for (i = 0; i < RLIM_NLIMITS; i++) {
>  		if (rlim[i].rlim_cur == RLIM_INFINITY)
> @@ -647,9 +647,11 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
>  			seq_printf(m, "%-20lu ", rlim[i].rlim_max);
>
>  		if (lnames[i].unit)
> -			seq_printf(m, "%-10s\n", lnames[i].unit);
> +			seq_printf(m, "%-10s", lnames[i].unit);
>  		else
> -			seq_putc(m, '\n');
> +			seq_printf(m, "%-10s", "");
> +		seq_printf(m, "%-20lu\n",
> +			   task->signal->rlim_curmax[i]);
>  	}
>
>  	return 0;
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 9c48a08..0150380 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -782,6 +782,7 @@ struct signal_struct {
>  	 * have no need to disable irqs.
>  	 */
>  	struct rlimit rlim[RLIM_NLIMITS];
> +	unsigned long rlim_curmax[RLIM_NLIMITS];
>
>  #ifdef CONFIG_BSD_PROCESS_ACCT
>  	struct pacct_struct pacct;	/* per-process accounting information */
> @@ -3376,6 +3377,12 @@ static inline unsigned long rlimit_max(unsigned int limit)
>  	return task_rlimit_max(current, limit);
>  }
>
> +static inline void bump_rlimit(unsigned int limit, unsigned long r)
> +{
> +	if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
> +		current->signal->rlim_curmax[limit] = r;
> +}
> +
>  #ifdef CONFIG_CPU_FREQ
>  struct update_util_data {
>  	void (*func)(struct update_util_data *data,
>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC 01/18] capabilities: track actually used capabilities
  2016-06-13 20:32   ` Andy Lutomirski
@ 2016-06-13 20:45     ` Topi Miettinen
  2016-06-13 21:12       ` Andy Lutomirski
  0 siblings, 1 reply; 13+ messages in thread
From: Topi Miettinen @ 2016-06-13 20:45 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Alexander Viro, Ingo Molnar, Peter Zijlstra,
	Serge Hallyn, Andrew Morton, Kees Cook, Christoph Lameter,
	Serge E. Hallyn, Andy Shevchenko, Richard W.M. Jones,
	Iago López Galeiras, Chris Metcalf, Andy Lutomirski,
	Jann Horn, open list:FILESYSTEMS (VFS and infrastructure),
	open list:CAPABILITIES

On 06/13/16 20:32, Andy Lutomirski wrote:
> On Mon, Jun 13, 2016 at 12:44 PM, Topi Miettinen <toiwoton@gmail.com> wrote:
>> Track what capabilities are actually used and present the current
>> situation in /proc/self/status.
> 
> What for?

Excerpt from the cover letter:

"There are many basic ways to control processes, including capabilities,
cgroups and resource limits. However, there are far fewer ways to find out
useful values for the limits, except blind trial and error.

This patch series attempts to fix that by giving at least a nice starting
point from the actual maximum values. I looked where each limit is checked
and added a call to limit bump nearby.


Capabilities
[RFC 01/18] capabilities: track actually used capabilities

Currently, there is no way to know which capabilities are actually used.
Even
the source code is only implicit, in-depth knowledge of each capability must
be used when analyzing a program to judge which capabilities the program
will
exercise."

Should I perhaps cite some of this in the commit?

>
> What is the intended behavior on fork()?  Whatever the intended
> behavior is, there should IMO be a selftest for it.
>
> --Andy
>

The capabilities could be tracked from three points of daemon
initialization sequence onwards:
fork()
setpcap()
exec()

fork() case would be logical as the /proc entry is per task. But if you
consider the tools to set the capabilities (for example systemd unit
files), there can be between fork() and exec() further preparations
which need more capabilities than the program itself needs.

setpcap() is probably the real point after which we are interested if
the capabilities are enough.

The amount of setup between setpcap() and exec() is probably very low.

-Topi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC 01/18] capabilities: track actually used capabilities
  2016-06-13 20:45     ` Topi Miettinen
@ 2016-06-13 21:12       ` Andy Lutomirski
  2016-06-13 21:48         ` Topi Miettinen
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2016-06-13 21:12 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: linux-kernel, Alexander Viro, Ingo Molnar, Peter Zijlstra,
	Serge Hallyn, Andrew Morton, Kees Cook, Christoph Lameter,
	Serge E. Hallyn, Andy Shevchenko, Richard W.M. Jones,
	Iago López Galeiras, Chris Metcalf, Andy Lutomirski,
	Jann Horn, open list:FILESYSTEMS (VFS and infrastructure),
	open list:CAPABILITIES

On Mon, Jun 13, 2016 at 1:45 PM, Topi Miettinen <toiwoton@gmail.com> wrote:
> On 06/13/16 20:32, Andy Lutomirski wrote:
>> On Mon, Jun 13, 2016 at 12:44 PM, Topi Miettinen <toiwoton@gmail.com> wrote:
>>> Track what capabilities are actually used and present the current
>>> situation in /proc/self/status.
>>
>> What for?
>

>
> Capabilities
> [RFC 01/18] capabilities: track actually used capabilities
>
> Currently, there is no way to know which capabilities are actually used.
> Even
> the source code is only implicit, in-depth knowledge of each capability must
> be used when analyzing a program to judge which capabilities the program
> will
> exercise."
>
> Should I perhaps cite some of this in the commit?

Yes, but you should also clarify what users are supposed to do with
this.  Given ambient capabilities, I suspect that you'll find that
your patch doesn't actually work very well.  For example, if you run a
shell script with ambient caps, then you won't notice caps used by
short-lived helper processes.

>
>>
>> What is the intended behavior on fork()?  Whatever the intended
>> behavior is, there should IMO be a selftest for it.
>>
>> --Andy
>>
>
> The capabilities could be tracked from three points of daemon
> initialization sequence onwards:
> fork()
> setpcap()
> exec()
>
> fork() case would be logical as the /proc entry is per task. But if you
> consider the tools to set the capabilities (for example systemd unit
> files), there can be between fork() and exec() further preparations
> which need more capabilities than the program itself needs.
>
> setpcap() is probably the real point after which we are interested if
> the capabilities are enough.
>
> The amount of setup between setpcap() and exec() is probably very low.

When I asked "what is the intended behavior on fork()?", I mean "what
should CapUsed be after fork()?".  The answer should be about four
words long and should have a test case.  There should maybe also be an
explanation of why the intended behavior is useful.

But, as I said above, I think that you may need to rethink this
entirely to make it useful.  You might need to do it per process tree
or per cgroup or something.

--Andy

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max
  2016-06-13 20:40   ` Andy Lutomirski
@ 2016-06-13 21:13     ` Topi Miettinen
  2016-06-13 21:16       ` Andy Lutomirski
  0 siblings, 1 reply; 13+ messages in thread
From: Topi Miettinen @ 2016-06-13 21:13 UTC (permalink / raw)
  To: Andy Lutomirski, linux-kernel
  Cc: Alexander Viro, Ingo Molnar, Peter Zijlstra, Andrew Morton,
	Kees Cook, Cyrill Gorcunov, Alexey Dobriyan, John Stultz,
	Janis Danisevskis, Calvin Owens, Jann Horn,
	open list:FILESYSTEMS (VFS and infrastructure)

On 06/13/16 20:40, Andy Lutomirski wrote:
> On 06/13/2016 12:44 PM, Topi Miettinen wrote:
>> Track maximum number of files for the process, present current maximum
>> in /proc/self/limits.
> 
> The core part should be its own patch.
> 
> Also, you have this weirdly named (and racy!) function bump_rlimit.

I can change the name if you have better suggestions. rlimit_track_max?

The max value is written often but read seldom, if ever. What kind of
locking should I use then?

> Wouldn't this be nicer if you taught the rlimit code to track the
> *current* usage generically and to derive the max usage from that?

Current rlimit code performs checks against current limits. These are
typically done early in the calling function and further checks could
also fail. Thus max should not be updated until much later. Maybe these
could be combined, but not easily if at all.

> 
>> diff --git a/fs/proc/base.c b/fs/proc/base.c
>> index a11eb71..227997b 100644
>> --- a/fs/proc/base.c
>> +++ b/fs/proc/base.c
>> @@ -630,8 +630,8 @@ static int proc_pid_limits(struct seq_file *m,
>> struct pid_namespace *ns,
>>      /*
>>       * print the file header
>>       */
>> -       seq_printf(m, "%-25s %-20s %-20s %-10s\n",
>> -          "Limit", "Soft Limit", "Hard Limit", "Units");
>> +    seq_printf(m, "%-25s %-20s %-20s %-10s %-20s\n",
>> +           "Limit", "Soft Limit", "Hard Limit", "Units", "Max");
> 
> What existing programs, if any, does this break?

Using Debian codesearch for /limits" string, I'd check pam_limits and
rtkit. The max values could be put into a new file if you prefer.

> 
>>
>>      for (i = 0; i < RLIM_NLIMITS; i++) {
>>          if (rlim[i].rlim_cur == RLIM_INFINITY)
>> @@ -647,9 +647,11 @@ static int proc_pid_limits(struct seq_file *m,
>> struct pid_namespace *ns,
>>              seq_printf(m, "%-20lu ", rlim[i].rlim_max);
>>
>>          if (lnames[i].unit)
>> -            seq_printf(m, "%-10s\n", lnames[i].unit);
>> +            seq_printf(m, "%-10s", lnames[i].unit);
>>          else
>> -            seq_putc(m, '\n');
>> +            seq_printf(m, "%-10s", "");
>> +        seq_printf(m, "%-20lu\n",
>> +               task->signal->rlim_curmax[i]);
>>      }
>>
>>      return 0;
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 9c48a08..0150380 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -782,6 +782,7 @@ struct signal_struct {
>>       * have no need to disable irqs.
>>       */
>>      struct rlimit rlim[RLIM_NLIMITS];
>> +    unsigned long rlim_curmax[RLIM_NLIMITS];
>>
>>  #ifdef CONFIG_BSD_PROCESS_ACCT
>>      struct pacct_struct pacct;    /* per-process accounting
>> information */
>> @@ -3376,6 +3377,12 @@ static inline unsigned long rlimit_max(unsigned
>> int limit)
>>      return task_rlimit_max(current, limit);
>>  }
>>
>> +static inline void bump_rlimit(unsigned int limit, unsigned long r)
>> +{
>> +    if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
>> +        current->signal->rlim_curmax[limit] = r;
>> +}
>> +
>>  #ifdef CONFIG_CPU_FREQ
>>  struct update_util_data {
>>      void (*func)(struct update_util_data *data,
>>
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max
  2016-06-13 21:13     ` Topi Miettinen
@ 2016-06-13 21:16       ` Andy Lutomirski
  2016-06-14 15:21         ` Topi Miettinen
  0 siblings, 1 reply; 13+ messages in thread
From: Andy Lutomirski @ 2016-06-13 21:16 UTC (permalink / raw)
  To: Topi Miettinen
  Cc: Andy Lutomirski, linux-kernel, Alexander Viro, Ingo Molnar,
	Peter Zijlstra, Andrew Morton, Kees Cook, Cyrill Gorcunov,
	Alexey Dobriyan, John Stultz, Janis Danisevskis, Calvin Owens,
	Jann Horn, open list:FILESYSTEMS (VFS and infrastructure)

On Mon, Jun 13, 2016 at 2:13 PM, Topi Miettinen <toiwoton@gmail.com> wrote:
> On 06/13/16 20:40, Andy Lutomirski wrote:
>> On 06/13/2016 12:44 PM, Topi Miettinen wrote:
>>> Track maximum number of files for the process, present current maximum
>>> in /proc/self/limits.
>>
>> The core part should be its own patch.
>>
>> Also, you have this weirdly named (and racy!) function bump_rlimit.
>
> I can change the name if you have better suggestions. rlimit_track_max?
>
> The max value is written often but read seldom, if ever. What kind of
> locking should I use then?

Possibly none, but WRITE_ONCE would be good as would a comment
indicating that your code in intentionally racy.  Or you could use
atomic_cmpxchg if that won't kill performance.

rlimit_track_max sounds like a better name to me.

>
>> Wouldn't this be nicer if you taught the rlimit code to track the
>> *current* usage generically and to derive the max usage from that?
>
> Current rlimit code performs checks against current limits. These are
> typically done early in the calling function and further checks could
> also fail. Thus max should not be updated until much later. Maybe these
> could be combined, but not easily if at all.

I mean:  why not actually show the current value in /proc/pid/limits
and track the max via whatever teaches proc about the current value?

>
>>
>>> diff --git a/fs/proc/base.c b/fs/proc/base.c
>>> index a11eb71..227997b 100644
>>> --- a/fs/proc/base.c
>>> +++ b/fs/proc/base.c
>>> @@ -630,8 +630,8 @@ static int proc_pid_limits(struct seq_file *m,
>>> struct pid_namespace *ns,
>>>      /*
>>>       * print the file header
>>>       */
>>> -       seq_printf(m, "%-25s %-20s %-20s %-10s\n",
>>> -          "Limit", "Soft Limit", "Hard Limit", "Units");
>>> +    seq_printf(m, "%-25s %-20s %-20s %-10s %-20s\n",
>>> +           "Limit", "Soft Limit", "Hard Limit", "Units", "Max");
>>
>> What existing programs, if any, does this break?
>
> Using Debian codesearch for /limits" string, I'd check pam_limits and
> rtkit. The max values could be put into a new file if you prefer.

If it actually breaks them, then you need to change the patch so you
don't break them.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC 01/18] capabilities: track actually used capabilities
  2016-06-13 21:12       ` Andy Lutomirski
@ 2016-06-13 21:48         ` Topi Miettinen
  0 siblings, 0 replies; 13+ messages in thread
From: Topi Miettinen @ 2016-06-13 21:48 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Alexander Viro, Ingo Molnar, Peter Zijlstra,
	Serge Hallyn, Andrew Morton, Kees Cook, Christoph Lameter,
	Serge E. Hallyn, Andy Shevchenko, Richard W.M. Jones,
	Iago López Galeiras, Chris Metcalf, Andy Lutomirski,
	Jann Horn, open list:FILESYSTEMS (VFS and infrastructure),
	open list:CAPABILITIES

On 06/13/16 21:12, Andy Lutomirski wrote:
> On Mon, Jun 13, 2016 at 1:45 PM, Topi Miettinen <toiwoton@gmail.com> wrote:
>> On 06/13/16 20:32, Andy Lutomirski wrote:
>>> On Mon, Jun 13, 2016 at 12:44 PM, Topi Miettinen <toiwoton@gmail.com> wrote:
>>>> Track what capabilities are actually used and present the current
>>>> situation in /proc/self/status.
>>>
>>> What for?
>>
> 
>>
>> Capabilities
>> [RFC 01/18] capabilities: track actually used capabilities
>>
>> Currently, there is no way to know which capabilities are actually used.
>> Even
>> the source code is only implicit, in-depth knowledge of each capability must
>> be used when analyzing a program to judge which capabilities the program
>> will
>> exercise."
>>
>> Should I perhaps cite some of this in the commit?
> 
> Yes, but you should also clarify what users are supposed to do with
> this.  Given ambient capabilities, I suspect that you'll find that
> your patch doesn't actually work very well.  For example, if you run a
> shell script with ambient caps, then you won't notice caps used by
> short-lived helper processes.
> 

Right, I suppose this model works well only within a single process, or
where the helper processes are always unprivileged (like Xorg runs
xkbcomp) or less privileged.

>>
>>>
>>> What is the intended behavior on fork()?  Whatever the intended
>>> behavior is, there should IMO be a selftest for it.
>>>
>>> --Andy
>>>
>>
>> The capabilities could be tracked from three points of daemon
>> initialization sequence onwards:
>> fork()
>> setpcap()
>> exec()
>>
>> fork() case would be logical as the /proc entry is per task. But if you
>> consider the tools to set the capabilities (for example systemd unit
>> files), there can be between fork() and exec() further preparations
>> which need more capabilities than the program itself needs.
>>
>> setpcap() is probably the real point after which we are interested if
>> the capabilities are enough.
>>
>> The amount of setup between setpcap() and exec() is probably very low.
> 
> When I asked "what is the intended behavior on fork()?", I mean "what
> should CapUsed be after fork()?".  The answer should be about four
> words long and should have a test case.  There should maybe also be an
> explanation of why the intended behavior is useful.

In this model:
fork: no change
setpcap: no change
exec: reset

But I hadn't thought that much where the reset happens.

> 
> But, as I said above, I think that you may need to rethink this
> entirely to make it useful.  You might need to do it per process tree
> or per cgroup or something.
> 
> --Andy
> 

I'd actually prefer the cgroup approach. Though that's much more work
than this simple patch which already gives somewhat useful information
in limited cases (once the logic is correct).

-Topi


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max
  2016-06-13 21:16       ` Andy Lutomirski
@ 2016-06-14 15:21         ` Topi Miettinen
  0 siblings, 0 replies; 13+ messages in thread
From: Topi Miettinen @ 2016-06-14 15:21 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, linux-kernel, Alexander Viro, Ingo Molnar,
	Peter Zijlstra, Andrew Morton, Kees Cook, Cyrill Gorcunov,
	Alexey Dobriyan, John Stultz, Janis Danisevskis, Calvin Owens,
	Jann Horn, open list:FILESYSTEMS (VFS and infrastructure)

On 06/13/16 21:16, Andy Lutomirski wrote:
> On Mon, Jun 13, 2016 at 2:13 PM, Topi Miettinen <toiwoton@gmail.com> wrote:
>> On 06/13/16 20:40, Andy Lutomirski wrote:
>>> On 06/13/2016 12:44 PM, Topi Miettinen wrote:
>>>> Track maximum number of files for the process, present current maximum
>>>> in /proc/self/limits.
>>>
>>> The core part should be its own patch.
>>>
>>> Also, you have this weirdly named (and racy!) function bump_rlimit.
>>
>> I can change the name if you have better suggestions. rlimit_track_max?
>>
>> The max value is written often but read seldom, if ever. What kind of
>> locking should I use then?
> 
> Possibly none, but WRITE_ONCE would be good as would a comment
> indicating that your code in intentionally racy.  Or you could use
> atomic_cmpxchg if that won't kill performance.
> 
> rlimit_track_max sounds like a better name to me.
> 
>>
>>> Wouldn't this be nicer if you taught the rlimit code to track the
>>> *current* usage generically and to derive the max usage from that?
>>
>> Current rlimit code performs checks against current limits. These are
>> typically done early in the calling function and further checks could
>> also fail. Thus max should not be updated until much later. Maybe these
>> could be combined, but not easily if at all.
> 
> I mean:  why not actually show the current value in /proc/pid/limits
> and track the max via whatever teaches proc about the current value?
> 

That could be interesting data too. In other comments, a new file was
proposed and then your model would be good choice.

>>
>>>
>>>> diff --git a/fs/proc/base.c b/fs/proc/base.c
>>>> index a11eb71..227997b 100644
>>>> --- a/fs/proc/base.c
>>>> +++ b/fs/proc/base.c
>>>> @@ -630,8 +630,8 @@ static int proc_pid_limits(struct seq_file *m,
>>>> struct pid_namespace *ns,
>>>>      /*
>>>>       * print the file header
>>>>       */
>>>> -       seq_printf(m, "%-25s %-20s %-20s %-10s\n",
>>>> -          "Limit", "Soft Limit", "Hard Limit", "Units");
>>>> +    seq_printf(m, "%-25s %-20s %-20s %-10s %-20s\n",
>>>> +           "Limit", "Soft Limit", "Hard Limit", "Units", "Max");
>>>
>>> What existing programs, if any, does this break?
>>
>> Using Debian codesearch for /limits" string, I'd check pam_limits and
>> rtkit. The max values could be put into a new file if you prefer.
> 
> If it actually breaks them, then you need to change the patch so you
> don't break them.
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-06-14 15:21 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1465847065-3577-1-git-send-email-toiwoton@gmail.com>
2016-06-13 19:44 ` [RFC 01/18] capabilities: track actually used capabilities Topi Miettinen
2016-06-13 20:32   ` Andy Lutomirski
2016-06-13 20:45     ` Topi Miettinen
2016-06-13 21:12       ` Andy Lutomirski
2016-06-13 21:48         ` Topi Miettinen
2016-06-13 19:44 ` [RFC 05/18] limits: track and present RLIMIT_NOFILE actual max Topi Miettinen
2016-06-13 20:40   ` Andy Lutomirski
2016-06-13 21:13     ` Topi Miettinen
2016-06-13 21:16       ` Andy Lutomirski
2016-06-14 15:21         ` Topi Miettinen
2016-06-13 19:44 ` [RFC 07/18] limits: track RLIMIT_FSIZE " Topi Miettinen
2016-06-13 19:44 ` [RFC 08/18] limits: track RLIMIT_DATA " Topi Miettinen
2016-06-13 19:44 ` [RFC 09/18] limits: track RLIMIT_CORE " Topi Miettinen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).