Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: Jann Horn <jann@thejh.net>
To: Alexander Viro <viro@zeniv.linux.org.uk>,
	Roland McGrath <roland@hack.frob.com>,
	Oleg Nesterov <oleg@redhat.com>,
	John Johansen <john.johansen@canonical.com>,
	James Morris <james.l.morris@oracle.com>,
	"Serge E. Hallyn" <serge@hallyn.com>,
	Paul Moore <aul@paul-moore.com>,
	Stephen Smalley <sds@tycho.nsa.gov>,
	Eric Paris <eparis@parisplace.org>,
	Casey Schaufler <casey@schaufler-ca.com>,
	Kees Cook <keescook@chromium.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Janis Danisevskis <jdanis@google.com>,
	Seth Forshee <seth.forshee@canonical.com>,
	"Eric . Biederman" <ebiederm@xmission.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Benjamin LaHaise <bcrl@kvack.org>,
	Ben Hutchings <ben@decadent.org.uk>,
	Andy Lutomirski <luto@amacapital.net>,
	Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org,
	linux-security-module@vger.kernel.org, security@kernel.org
Subject: [PATCH v2 2/8] exec: turn self_exec_id into self_privunit
Date: Fri, 23 Sep 2016 22:40:32 +0200
Message-ID: <1474663238-22134-3-git-send-email-jann@thejh.net> (raw)
In-Reply-To: <1474663238-22134-1-git-send-email-jann@thejh.net>

This ensures that self_privunit ("privilege unit locally unique ID")
is only shared by processes that share the mm_struct and the signal_struct;
not just spatially, but also temporally. In other words, if you do execve()
or clone() without CLONE_THREAD, you get a new privunit that has never
been used before.

One reason for doing this is that it prevents an attacker from sending an
arbitrary signal to a parent process after performing 2^32-1 execve()
calls.

The second reason for this is that it permits using the self_exec_luid in
a later patch to check during a ptrace access whether subject and object
are temporally and spatially equal for privilege checking purposes.

The implementation of locally unique IDs is in sched.h and exec.c for now
because those are the only users so far - if anything else wants to use
them in the future, they can be moved elsewhere.

changed in v2:
 - have 2^64 IDs per CPU instead of 2^64 shared ones (luid scheme,
   suggested by Andy Lutomirski)
 - take task_lock for reading in setup_new_exec() while bumping the LUID

Signed-off-by: Jann Horn <jann@thejh.net>
---
 fs/exec.c             | 41 +++++++++++++++++++++++++++++++++++++++--
 include/linux/sched.h | 17 +++++++++++++++--
 kernel/fork.c         |  5 +++--
 kernel/signal.c       |  5 ++++-
 4 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 84430ee..fcc11f0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1281,6 +1281,34 @@ void would_dump(struct linux_binprm *bprm, struct file *file)
 }
 EXPORT_SYMBOL(would_dump);
 
+static DEFINE_PER_CPU(u64, luid_counters);
+
+static int __init init_luid_counters(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		/* value 0 is reserved for init */
+		per_cpu(luid_counters, cpu) = 1;
+	}
+
+	return 0;
+}
+early_initcall(init_luid_counters);
+
+/*
+ * Allocates a new LUID and writes the allocated LUID to @out.
+ * This function must not be called from IRQ context.
+ */
+void fill_luid(struct luid *out)
+{
+	preempt_disable();
+	out->count = raw_cpu_read(luid_counters);
+	raw_cpu_add(luid_counters, 1);
+	out->cpu = smp_processor_id();
+	preempt_enable();
+}
+
 void setup_new_exec(struct linux_binprm * bprm)
 {
 	arch_pick_mmap_layout(current->mm);
@@ -1313,8 +1341,17 @@ void setup_new_exec(struct linux_binprm * bprm)
 	}
 
 	/* An exec changes our domain. We are no longer part of the thread
-	   group */
-	current->self_exec_id++;
+	 * group.
+	 * The privunit luid is regenerated with the tasklist_lock held for
+	 * reading to allow do_notify_parent() (which only runs with
+	 * tasklist_lock held for writing) to inspect privunit IDs of other
+	 * tasks without taking the cred_guard_light (which wouldn't work
+	 * because the tasklist_lock is held).
+	 */
+	read_lock(&tasklist_lock);
+	fill_luid(&current->self_privunit);
+	read_unlock(&tasklist_lock);
+
 	flush_signal_handlers(current, 0);
 	do_close_on_exec(current->files);
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2a1df2f..fa90e36 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1467,6 +1467,19 @@ struct tlbflush_unmap_batch {
 	bool writable;
 };
 
+/* locally unique ID */
+struct luid {
+	u64 count;
+	unsigned int cpu;
+};
+
+void fill_luid(struct luid *out);
+
+static inline bool luid_eq(const struct luid *a, const struct luid *b)
+{
+	return a->count == b->count && a->cpu == b->cpu;
+}
+
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	void *stack;
@@ -1688,8 +1701,8 @@ struct task_struct {
 	struct seccomp seccomp;
 
 /* Thread group tracking */
-   	u32 parent_exec_id;
-   	u32 self_exec_id;
+	struct luid parent_privunit;
+	struct luid self_privunit;
 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
  * mempolicy */
 	spinlock_t alloc_lock;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2d46f3a..e1bd501 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1567,6 +1567,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			p->exit_signal = (clone_flags & CSIGNAL);
 		p->group_leader = p;
 		p->tgid = p->pid;
+		fill_luid(&p->self_privunit);
 	}
 
 	p->nr_dirtied = 0;
@@ -1597,10 +1598,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
 		p->real_parent = current->real_parent;
-		p->parent_exec_id = current->parent_exec_id;
+		p->parent_privunit = current->parent_privunit;
 	} else {
 		p->real_parent = current;
-		p->parent_exec_id = current->self_exec_id;
+		p->parent_privunit = current->self_privunit;
 	}
 
 	spin_lock(&current->sighand->siglock);
diff --git a/kernel/signal.c b/kernel/signal.c
index af21afc..3dbd25b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1566,6 +1566,8 @@ ret:
  * Let a parent know about the death of a child.
  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
  *
+ * Must be called with tasklist_lock held for writing.
+ *
  * Returns true if our parent ignored us and so we've switched to
  * self-reaping.
  */
@@ -1590,7 +1592,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 		 * This is only possible if parent == real_parent.
 		 * Check if it has changed security domain.
 		 */
-		if (tsk->parent_exec_id != tsk->parent->self_exec_id)
+		if (!luid_eq(&tsk->parent_privunit,
+			     &tsk->parent->self_privunit))
 			sig = SIGCHLD;
 	}
 
-- 
2.1.4


  parent reply index

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-23 20:40 [PATCH v2 0/8] Various fixes related to ptrace_may_access() Jann Horn
2016-09-23 20:40 ` [PATCH v2 1/8] exec: introduce cred_guard_light Jann Horn
2016-09-30 15:35   ` Oleg Nesterov
2016-09-30 18:27     ` Eric W. Biederman
2016-10-03 16:02       ` Oleg Nesterov
2016-10-30 21:12     ` Jann Horn
2016-09-23 20:40 ` Jann Horn [this message]
2016-09-23 21:04   ` [PATCH v2 2/8] exec: turn self_exec_id into self_privunit Andy Lutomirski
2016-09-23 21:33     ` Jann Horn
2016-09-30 13:20   ` Oleg Nesterov
2016-09-30 13:44     ` Oleg Nesterov
2016-09-30 18:30       ` Kees Cook
2016-09-30 18:59         ` Jann Horn
2016-09-30 19:05           ` Kees Cook
2016-10-03 16:37         ` Oleg Nesterov
2016-09-23 20:40 ` [PATCH v2 3/8] proc: use open()-time creds for ptrace checks Jann Horn
2016-09-23 20:40 ` [PATCH v2 4/8] futex: don't leak robust_list pointer Jann Horn
2016-09-30 14:52   ` Oleg Nesterov
2016-10-30 17:16     ` Jann Horn
2016-11-02 21:39       ` Jann Horn
2016-11-02 22:47         ` Jann Horn
2016-09-23 20:40 ` [PATCH v2 5/8] proc: lock properly in ptrace_may_access callers Jann Horn
2016-09-23 20:40 ` [PATCH v2 6/8] ptrace: warn on ptrace_may_access without proper locking Jann Horn
2016-09-23 20:40 ` [PATCH v2 7/8] fs/proc: fix attr access check Jann Horn
2016-09-23 20:40 ` [PATCH v2 8/8] Documentation: add security/ptrace_checks.txt Jann Horn
2016-10-02  3:16   ` Krister Johansen
2016-10-30 19:09     ` Jann Horn
2016-10-31  4:14       ` Eric W. Biederman
2016-10-31 13:39         ` Jann Horn
2016-11-03 20:43         ` Krister Johansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1474663238-22134-3-git-send-email-jann@thejh.net \
    --to=jann@thejh.net \
    --cc=akpm@linux-foundation.org \
    --cc=aul@paul-moore.com \
    --cc=bcrl@kvack.org \
    --cc=ben@decadent.org.uk \
    --cc=casey@schaufler-ca.com \
    --cc=ebiederm@xmission.com \
    --cc=eparis@parisplace.org \
    --cc=james.l.morris@oracle.com \
    --cc=jdanis@google.com \
    --cc=john.johansen@canonical.com \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=oleg@redhat.com \
    --cc=roland@hack.frob.com \
    --cc=sds@tycho.nsa.gov \
    --cc=security@kernel.org \
    --cc=serge@hallyn.com \
    --cc=seth.forshee@canonical.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git