linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V4] binder: ipc namespace support for android binder
@ 2018-11-12  9:37 chouryzhou(周威)
  2018-11-12 16:45 ` Todd Kjos
  2018-11-15 22:33 ` Andrew Morton
  0 siblings, 2 replies; 10+ messages in thread
From: chouryzhou(周威) @ 2018-11-12  9:37 UTC (permalink / raw)
  To: tkjos, gregkh
  Cc: arve, tkjos, akpm, dave, devel, linux-kernel,
	chouryzhou(周威)

Currently android's binder is not isolated by ipc namespace. Since binder 
is a form of IPC and therefore should be tied to ipc namespace. With this 
patch, we can run multiple instances of  android container on one host.

This patch move "binder_procs" and "binder_context" into ipc_namespace,
driver will find the context from it when opening. For debugfs, binder_proc
is namespace-aware, but not for binder dead nodes, binder_stats and 
binder_transaction_log_entry (we added ipc inum to trace it).

Signed-off-by: chouryzhou <chouryzhou@tencent.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
---
 drivers/android/binder.c      | 133 ++++++++++++++++++++++++++++++++----------
 include/linux/ipc_namespace.h |  15 +++++
 ipc/namespace.c               |  10 +++-
 3 files changed, 125 insertions(+), 33 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index cb30a524d16d..453265505b04 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -67,6 +67,8 @@
 #include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
+#include <linux/proc_ns.h>
+#include <linux/ipc_namespace.h>
 #include <linux/pid_namespace.h>
 #include <linux/security.h>
 #include <linux/spinlock.h>
@@ -80,13 +82,21 @@
 #include "binder_alloc.h"
 #include "binder_trace.h"
 
+
+#ifndef CONFIG_IPC_NS
+static struct ipc_namespace binder_ipc_ns = {
+       .ns.inum = PROC_IPC_INIT_INO,
+};
+
+#define ipcns  (&binder_ipc_ns)
+#else
+#define ipcns  (current->nsproxy->ipc_ns)
+#endif
+
 static HLIST_HEAD(binder_deferred_list);
 static DEFINE_MUTEX(binder_deferred_lock);
 
 static HLIST_HEAD(binder_devices);
-static HLIST_HEAD(binder_procs);
-static DEFINE_MUTEX(binder_procs_lock);
-
 static HLIST_HEAD(binder_dead_nodes);
 static DEFINE_SPINLOCK(binder_dead_nodes_lock);
 
@@ -233,6 +243,7 @@ struct binder_transaction_log_entry {
        uint32_t return_error;
        uint32_t return_error_param;
        const char *context_name;
+       unsigned int ipc_inum;
 };
 struct binder_transaction_log {
        atomic_t cur;
@@ -263,19 +274,66 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
 }
 
 struct binder_context {
+       struct hlist_node hlist;
        struct binder_node *binder_context_mgr_node;
        struct mutex context_mgr_node_lock;
 
        kuid_t binder_context_mgr_uid;
+       int   device;
        const char *name;
 };
 
 struct binder_device {
        struct hlist_node hlist;
        struct miscdevice miscdev;
-       struct binder_context context;
 };
 
+void binder_exit_ns(struct ipc_namespace *ns)
+{
+       struct binder_context *context;
+       struct hlist_node *tmp;
+
+       mutex_destroy(&ns->binder_procs_lock);
+       hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) {
+               mutex_destroy(&context->context_mgr_node_lock);
+               hlist_del(&context->hlist);
+               kfree(context);
+       }
+}
+
+int binder_init_ns(struct ipc_namespace *ns)
+{
+       int ret;
+       struct binder_device *device;
+
+       mutex_init(&ns->binder_procs_lock);
+       INIT_HLIST_HEAD(&ns->binder_procs);
+       INIT_HLIST_HEAD(&ns->binder_contexts);
+
+       hlist_for_each_entry(device, &binder_devices, hlist) {
+               struct binder_context *context;
+
+               context = kzalloc(sizeof(*context), GFP_KERNEL);
+               if (!context) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               context->device = device->miscdev.minor;
+               context->name = device->miscdev.name;
+               context->binder_context_mgr_uid = INVALID_UID;
+               mutex_init(&context->context_mgr_node_lock);
+
+               hlist_add_head(&context->hlist, &ns->binder_contexts);
+       }
+
+       return 0;
+err:
+       binder_exit_ns(ns);
+       return ret;
+}
+
+
 /**
  * struct binder_work - work enqueued on a worklist
  * @entry:             node enqueued on list
@@ -2728,6 +2786,7 @@ static void binder_transaction(struct binder_proc *proc,
        e->data_size = tr->data_size;
        e->offsets_size = tr->offsets_size;
        e->context_name = proc->context->name;
+       e->ipc_inum = ipcns->ns.inum;
 
        if (reply) {
                binder_inner_proc_lock(proc);
@@ -4922,6 +4981,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
 {
        struct binder_proc *proc;
        struct binder_device *binder_dev;
+       struct binder_context *context;
 
        binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
                     current->group_leader->pid, current->pid);
@@ -4937,7 +4997,15 @@ static int binder_open(struct inode *nodp, struct file *filp)
        proc->default_priority = task_nice(current);
        binder_dev = container_of(filp->private_data, struct binder_device,
                                  miscdev);
-       proc->context = &binder_dev->context;
+       hlist_for_each_entry(context, &ipcns->binder_contexts, hlist) {
+               if (context->device == binder_dev->miscdev.minor) {
+                       proc->context = context;
+                       break;
+               }
+       }
+       if (!proc->context)
+               return -ENOENT;
+
        binder_alloc_init(&proc->alloc);
 
        binder_stats_created(BINDER_STAT_PROC);
@@ -4946,9 +5014,9 @@ static int binder_open(struct inode *nodp, struct file *filp)
        INIT_LIST_HEAD(&proc->waiting_threads);
        filp->private_data = proc;
 
-       mutex_lock(&binder_procs_lock);
-       hlist_add_head(&proc->proc_node, &binder_procs);
-       mutex_unlock(&binder_procs_lock);
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        if (binder_debugfs_dir_entry_proc) {
                char strbuf[11];
@@ -5082,9 +5150,9 @@ static void binder_deferred_release(struct binder_proc *proc)
        struct rb_node *n;
        int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
 
-       mutex_lock(&binder_procs_lock);
+       mutex_lock(&ipcns->binder_procs_lock);
        hlist_del(&proc->proc_node);
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        mutex_lock(&context->context_mgr_node_lock);
        if (context->binder_context_mgr_node &&
@@ -5623,10 +5691,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
        if (last_node)
                binder_put_node(last_node);
 
-       mutex_lock(&binder_procs_lock);
-       hlist_for_each_entry(proc, &binder_procs, proc_node)
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
                print_binder_proc(m, proc, 1);
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        return 0;
 }
@@ -5639,10 +5707,10 @@ static int binder_stats_show(struct seq_file *m, void *unused)
 
        print_binder_stats(m, "", &binder_stats);
 
-       mutex_lock(&binder_procs_lock);
-       hlist_for_each_entry(proc, &binder_procs, proc_node)
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
                print_binder_proc_stats(m, proc);
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        return 0;
 }
@@ -5652,10 +5720,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
        struct binder_proc *proc;
 
        seq_puts(m, "binder transactions:\n");
-       mutex_lock(&binder_procs_lock);
-       hlist_for_each_entry(proc, &binder_procs, proc_node)
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
                print_binder_proc(m, proc, 0);
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        return 0;
 }
@@ -5665,14 +5733,14 @@ static int binder_proc_show(struct seq_file *m, void *unused)
        struct binder_proc *itr;
        int pid = (unsigned long)m->private;
 
-       mutex_lock(&binder_procs_lock);
-       hlist_for_each_entry(itr, &binder_procs, proc_node) {
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
                if (itr->pid == pid) {
                        seq_puts(m, "binder proc state:\n");
                        print_binder_proc(m, itr, 1);
                }
        }
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        return 0;
 }
@@ -5687,12 +5755,12 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
         */
        smp_rmb();
        seq_printf(m,
-                  "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
+                  "%d: %s from %d:%d to %d:%d ipc %d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
                   e->debug_id, (e->call_type == 2) ? "reply" :
                   ((e->call_type == 1) ? "async" : "call "), e->from_proc,
-                  e->from_thread, e->to_proc, e->to_thread, e->context_name,
-                  e->to_node, e->target_handle, e->data_size, e->offsets_size,
-                  e->return_error, e->return_error_param,
+                  e->from_thread, e->to_proc, e->to_thread, e->ipc_inum,
+                  e->context_name, e->to_node, e->target_handle, e->data_size,
+                  e->offsets_size, e->return_error, e->return_error_param,
                   e->return_error_line);
        /*
         * read-barrier to guarantee read of debug_id_done after
@@ -5753,10 +5821,6 @@ static int __init init_binder_device(const char *name)
        binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
        binder_device->miscdev.name = name;
 
-       binder_device->context.binder_context_mgr_uid = INVALID_UID;
-       binder_device->context.name = name;
-       mutex_init(&binder_device->context.context_mgr_node_lock);
-
        ret = misc_register(&binder_device->miscdev);
        if (ret < 0) {
                kfree(binder_device);
@@ -5831,9 +5895,16 @@ static int __init binder_init(void)
                if (ret)
                        goto err_init_binder_device_failed;
        }
+#ifdef CONFIG_IPC_NS
+       ret = binder_init_ns(&init_ipc_ns);
+#else
+       ret = binder_init_ns(&binder_ipc_ns);
+#endif
+       if (ret)
+               goto err_init_namespace_failed;
 
        return ret;
-
+err_init_namespace_failed:
 err_init_binder_device_failed:
        hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
                misc_deregister(&device->miscdev);
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6ab8c1bada3f..d7f850a2ded8 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -63,6 +63,13 @@ struct ipc_namespace {
        unsigned int    mq_msg_default;
        unsigned int    mq_msgsize_default;
 
+#ifdef CONFIG_ANDROID_BINDER_IPC
+       /* next fields are for binder */
+       struct mutex      binder_procs_lock;
+       struct hlist_head binder_procs;
+       struct hlist_head binder_contexts;
+#endif
+
        /* user_ns which owns the ipc ns */
        struct user_namespace *user_ns;
        struct ucounts *ucounts;
@@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
 
+#ifdef CONFIG_ANDROID_BINDER_IPC
+extern int binder_init_ns(struct ipc_namespace *ns);
+extern void binder_exit_ns(struct ipc_namespace *ns);
+#else
+static inline int binder_init_ns(struct ipc_namespace *ns) { return 0; }
+static inline void binder_exit_ns(struct ipc_namespace *ns) { }
+#endif
+
 #if defined(CONFIG_IPC_NS)
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
        struct user_namespace *user_ns, struct ipc_namespace *ns);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 21607791d62c..68c6e983b002 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 
        err = mq_init_ns(ns);
        if (err)
-               goto fail_put;
+               goto fail_init_mq;
+       err = binder_init_ns(ns);
+       if (err)
+               goto fail_init_binder;
 
        sem_init_ns(ns);
        msg_init_ns(ns);
@@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 
        return ns;
 
-fail_put:
+fail_init_binder:
+       mq_put_mnt(ns);
+fail_init_mq:
        put_user_ns(ns->user_ns);
        ns_free_inum(&ns->ns);
 fail_free:
@@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
        sem_exit_ns(ns);
        msg_exit_ns(ns);
        shm_exit_ns(ns);
+       binder_exit_ns(ns);
 
        dec_ipc_namespaces(ns->ucounts);
        put_user_ns(ns->user_ns);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH V4] binder: ipc namespace support for android binder
  2018-11-12  9:37 [PATCH V4] binder: ipc namespace support for android binder chouryzhou(周威)
@ 2018-11-12 16:45 ` Todd Kjos
  2018-11-12 19:24   ` Christian Brauner
  2018-11-15 22:33 ` Andrew Morton
  1 sibling, 1 reply; 10+ messages in thread
From: Todd Kjos @ 2018-11-12 16:45 UTC (permalink / raw)
  To: chouryzhou, christian, Martijn Coenen
  Cc: Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos, akpm,
	dave, open list:ANDROID DRIVERS, LKML

+christian@brauner.io +Martijn Coenen

Christian,

Does this patch work for your container use-cases? If not, please
comment on this thread. Let's discuss at LPC this week.

-Todd

On Mon, Nov 12, 2018 at 1:38 AM chouryzhou(周威) <chouryzhou@tencent.com> wrote:
>
> Currently android's binder is not isolated by ipc namespace. Since binder
> is a form of IPC and therefore should be tied to ipc namespace. With this
> patch, we can run multiple instances of  android container on one host.
>
> This patch move "binder_procs" and "binder_context" into ipc_namespace,
> driver will find the context from it when opening. For debugfs, binder_proc
> is namespace-aware, but not for binder dead nodes, binder_stats and
> binder_transaction_log_entry (we added ipc inum to trace it).
>
> Signed-off-by: chouryzhou <chouryzhou@tencent.com>
> Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  drivers/android/binder.c      | 133 ++++++++++++++++++++++++++++++++----------
>  include/linux/ipc_namespace.h |  15 +++++
>  ipc/namespace.c               |  10 +++-
>  3 files changed, 125 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index cb30a524d16d..453265505b04 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -67,6 +67,8 @@
>  #include <linux/sched/mm.h>
>  #include <linux/seq_file.h>
>  #include <linux/uaccess.h>
> +#include <linux/proc_ns.h>
> +#include <linux/ipc_namespace.h>
>  #include <linux/pid_namespace.h>
>  #include <linux/security.h>
>  #include <linux/spinlock.h>
> @@ -80,13 +82,21 @@
>  #include "binder_alloc.h"
>  #include "binder_trace.h"
>
> +
> +#ifndef CONFIG_IPC_NS
> +static struct ipc_namespace binder_ipc_ns = {
> +       .ns.inum = PROC_IPC_INIT_INO,
> +};
> +
> +#define ipcns  (&binder_ipc_ns)
> +#else
> +#define ipcns  (current->nsproxy->ipc_ns)
> +#endif
> +
>  static HLIST_HEAD(binder_deferred_list);
>  static DEFINE_MUTEX(binder_deferred_lock);
>
>  static HLIST_HEAD(binder_devices);
> -static HLIST_HEAD(binder_procs);
> -static DEFINE_MUTEX(binder_procs_lock);
> -
>  static HLIST_HEAD(binder_dead_nodes);
>  static DEFINE_SPINLOCK(binder_dead_nodes_lock);
>
> @@ -233,6 +243,7 @@ struct binder_transaction_log_entry {
>         uint32_t return_error;
>         uint32_t return_error_param;
>         const char *context_name;
> +       unsigned int ipc_inum;
>  };
>  struct binder_transaction_log {
>         atomic_t cur;
> @@ -263,19 +274,66 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
>  }
>
>  struct binder_context {
> +       struct hlist_node hlist;
>         struct binder_node *binder_context_mgr_node;
>         struct mutex context_mgr_node_lock;
>
>         kuid_t binder_context_mgr_uid;
> +       int   device;
>         const char *name;
>  };
>
>  struct binder_device {
>         struct hlist_node hlist;
>         struct miscdevice miscdev;
> -       struct binder_context context;
>  };
>
> +void binder_exit_ns(struct ipc_namespace *ns)
> +{
> +       struct binder_context *context;
> +       struct hlist_node *tmp;
> +
> +       mutex_destroy(&ns->binder_procs_lock);
> +       hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) {
> +               mutex_destroy(&context->context_mgr_node_lock);
> +               hlist_del(&context->hlist);
> +               kfree(context);
> +       }
> +}
> +
> +int binder_init_ns(struct ipc_namespace *ns)
> +{
> +       int ret;
> +       struct binder_device *device;
> +
> +       mutex_init(&ns->binder_procs_lock);
> +       INIT_HLIST_HEAD(&ns->binder_procs);
> +       INIT_HLIST_HEAD(&ns->binder_contexts);
> +
> +       hlist_for_each_entry(device, &binder_devices, hlist) {
> +               struct binder_context *context;
> +
> +               context = kzalloc(sizeof(*context), GFP_KERNEL);
> +               if (!context) {
> +                       ret = -ENOMEM;
> +                       goto err;
> +               }
> +
> +               context->device = device->miscdev.minor;
> +               context->name = device->miscdev.name;
> +               context->binder_context_mgr_uid = INVALID_UID;
> +               mutex_init(&context->context_mgr_node_lock);
> +
> +               hlist_add_head(&context->hlist, &ns->binder_contexts);
> +       }
> +
> +       return 0;
> +err:
> +       binder_exit_ns(ns);
> +       return ret;
> +}
> +
> +
>  /**
>   * struct binder_work - work enqueued on a worklist
>   * @entry:             node enqueued on list
> @@ -2728,6 +2786,7 @@ static void binder_transaction(struct binder_proc *proc,
>         e->data_size = tr->data_size;
>         e->offsets_size = tr->offsets_size;
>         e->context_name = proc->context->name;
> +       e->ipc_inum = ipcns->ns.inum;
>
>         if (reply) {
>                 binder_inner_proc_lock(proc);
> @@ -4922,6 +4981,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
>  {
>         struct binder_proc *proc;
>         struct binder_device *binder_dev;
> +       struct binder_context *context;
>
>         binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
>                      current->group_leader->pid, current->pid);
> @@ -4937,7 +4997,15 @@ static int binder_open(struct inode *nodp, struct file *filp)
>         proc->default_priority = task_nice(current);
>         binder_dev = container_of(filp->private_data, struct binder_device,
>                                   miscdev);
> -       proc->context = &binder_dev->context;
> +       hlist_for_each_entry(context, &ipcns->binder_contexts, hlist) {
> +               if (context->device == binder_dev->miscdev.minor) {
> +                       proc->context = context;
> +                       break;
> +               }
> +       }
> +       if (!proc->context)
> +               return -ENOENT;
> +
>         binder_alloc_init(&proc->alloc);
>
>         binder_stats_created(BINDER_STAT_PROC);
> @@ -4946,9 +5014,9 @@ static int binder_open(struct inode *nodp, struct file *filp)
>         INIT_LIST_HEAD(&proc->waiting_threads);
>         filp->private_data = proc;
>
> -       mutex_lock(&binder_procs_lock);
> -       hlist_add_head(&proc->proc_node, &binder_procs);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         if (binder_debugfs_dir_entry_proc) {
>                 char strbuf[11];
> @@ -5082,9 +5150,9 @@ static void binder_deferred_release(struct binder_proc *proc)
>         struct rb_node *n;
>         int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
>
> -       mutex_lock(&binder_procs_lock);
> +       mutex_lock(&ipcns->binder_procs_lock);
>         hlist_del(&proc->proc_node);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         mutex_lock(&context->context_mgr_node_lock);
>         if (context->binder_context_mgr_node &&
> @@ -5623,10 +5691,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
>         if (last_node)
>                 binder_put_node(last_node);
>
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc(m, proc, 1);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         return 0;
>  }
> @@ -5639,10 +5707,10 @@ static int binder_stats_show(struct seq_file *m, void *unused)
>
>         print_binder_stats(m, "", &binder_stats);
>
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc_stats(m, proc);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         return 0;
>  }
> @@ -5652,10 +5720,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
>         struct binder_proc *proc;
>
>         seq_puts(m, "binder transactions:\n");
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc(m, proc, 0);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         return 0;
>  }
> @@ -5665,14 +5733,14 @@ static int binder_proc_show(struct seq_file *m, void *unused)
>         struct binder_proc *itr;
>         int pid = (unsigned long)m->private;
>
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(itr, &binder_procs, proc_node) {
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
>                 if (itr->pid == pid) {
>                         seq_puts(m, "binder proc state:\n");
>                         print_binder_proc(m, itr, 1);
>                 }
>         }
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         return 0;
>  }
> @@ -5687,12 +5755,12 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
>          */
>         smp_rmb();
>         seq_printf(m,
> -                  "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
> +                  "%d: %s from %d:%d to %d:%d ipc %d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
>                    e->debug_id, (e->call_type == 2) ? "reply" :
>                    ((e->call_type == 1) ? "async" : "call "), e->from_proc,
> -                  e->from_thread, e->to_proc, e->to_thread, e->context_name,
> -                  e->to_node, e->target_handle, e->data_size, e->offsets_size,
> -                  e->return_error, e->return_error_param,
> +                  e->from_thread, e->to_proc, e->to_thread, e->ipc_inum,
> +                  e->context_name, e->to_node, e->target_handle, e->data_size,
> +                  e->offsets_size, e->return_error, e->return_error_param,
>                    e->return_error_line);
>         /*
>          * read-barrier to guarantee read of debug_id_done after
> @@ -5753,10 +5821,6 @@ static int __init init_binder_device(const char *name)
>         binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
>         binder_device->miscdev.name = name;
>
> -       binder_device->context.binder_context_mgr_uid = INVALID_UID;
> -       binder_device->context.name = name;
> -       mutex_init(&binder_device->context.context_mgr_node_lock);
> -
>         ret = misc_register(&binder_device->miscdev);
>         if (ret < 0) {
>                 kfree(binder_device);
> @@ -5831,9 +5895,16 @@ static int __init binder_init(void)
>                 if (ret)
>                         goto err_init_binder_device_failed;
>         }
> +#ifdef CONFIG_IPC_NS
> +       ret = binder_init_ns(&init_ipc_ns);
> +#else
> +       ret = binder_init_ns(&binder_ipc_ns);
> +#endif
> +       if (ret)
> +               goto err_init_namespace_failed;
>
>         return ret;
> -
> +err_init_namespace_failed:
>  err_init_binder_device_failed:
>         hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
>                 misc_deregister(&device->miscdev);
> diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
> index 6ab8c1bada3f..d7f850a2ded8 100644
> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -63,6 +63,13 @@ struct ipc_namespace {
>         unsigned int    mq_msg_default;
>         unsigned int    mq_msgsize_default;
>
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> +       /* next fields are for binder */
> +       struct mutex      binder_procs_lock;
> +       struct hlist_head binder_procs;
> +       struct hlist_head binder_contexts;
> +#endif
> +
>         /* user_ns which owns the ipc ns */
>         struct user_namespace *user_ns;
>         struct ucounts *ucounts;
> @@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
>  static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
>  #endif
>
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> +extern int binder_init_ns(struct ipc_namespace *ns);
> +extern void binder_exit_ns(struct ipc_namespace *ns);
> +#else
> +static inline int binder_init_ns(struct ipc_namespace *ns) { return 0; }
> +static inline void binder_exit_ns(struct ipc_namespace *ns) { }
> +#endif
> +
>  #if defined(CONFIG_IPC_NS)
>  extern struct ipc_namespace *copy_ipcs(unsigned long flags,
>         struct user_namespace *user_ns, struct ipc_namespace *ns);
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 21607791d62c..68c6e983b002 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>
>         err = mq_init_ns(ns);
>         if (err)
> -               goto fail_put;
> +               goto fail_init_mq;
> +       err = binder_init_ns(ns);
> +       if (err)
> +               goto fail_init_binder;
>
>         sem_init_ns(ns);
>         msg_init_ns(ns);
> @@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>
>         return ns;
>
> -fail_put:
> +fail_init_binder:
> +       mq_put_mnt(ns);
> +fail_init_mq:
>         put_user_ns(ns->user_ns);
>         ns_free_inum(&ns->ns);
>  fail_free:
> @@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
>         sem_exit_ns(ns);
>         msg_exit_ns(ns);
>         shm_exit_ns(ns);
> +       binder_exit_ns(ns);
>
>         dec_ipc_namespaces(ns->ucounts);
>         put_user_ns(ns->user_ns);
> --
> 2.11.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V4] binder: ipc namespace support for android binder
  2018-11-12 16:45 ` Todd Kjos
@ 2018-11-12 19:24   ` Christian Brauner
  0 siblings, 0 replies; 10+ messages in thread
From: Christian Brauner @ 2018-11-12 19:24 UTC (permalink / raw)
  To: Todd Kjos, chouryzhou, Martijn Coenen
  Cc: Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos, akpm,
	dave, open list:ANDROID DRIVERS, LKML

On November 12, 2018 8:45:07 AM PST, Todd Kjos <tkjos@google.com> wrote:
>+christian@brauner.io +Martijn Coenen
>
>Christian,
>
>Does this patch work for your container use-cases? If not, please
>comment on this thread. Let's discuss at LPC this week.

I have not received an answer to my questions in the last version of this patch set. Also it would be good if I could be Cc'ed by default. I can't hunt down all patches.
I do not know of any kernel entity, specifically devices, that change namespaces on open().
This seems like an invitation for all kinds of security bugs.
A device node belongs to one namespace only which is attached to the underlying kobject. Opening the device should never change that.
Please look at how mqueue or shm are doing this. They don't change namespaces on open either.
I have to say that is one of the main reasons why I disagree with that design.


>
>-Todd
>
>On Mon, Nov 12, 2018 at 1:38 AM chouryzhou(周威) <chouryzhou@tencent.com>
>wrote:
>>
>> Currently android's binder is not isolated by ipc namespace. Since
>binder
>> is a form of IPC and therefore should be tied to ipc namespace. With
>this
>> patch, we can run multiple instances of  android container on one
>host.
>>
>> This patch move "binder_procs" and "binder_context" into
>ipc_namespace,
>> driver will find the context from it when opening. For debugfs,
>binder_proc
>> is namespace-aware, but not for binder dead nodes, binder_stats and
>> binder_transaction_log_entry (we added ipc inum to trace it).
>>
>> Signed-off-by: chouryzhou <chouryzhou@tencent.com>
>> Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
>> ---
>>  drivers/android/binder.c      | 133
>++++++++++++++++++++++++++++++++----------
>>  include/linux/ipc_namespace.h |  15 +++++
>>  ipc/namespace.c               |  10 +++-
>>  3 files changed, 125 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
>> index cb30a524d16d..453265505b04 100644
>> --- a/drivers/android/binder.c
>> +++ b/drivers/android/binder.c
>> @@ -67,6 +67,8 @@
>>  #include <linux/sched/mm.h>
>>  #include <linux/seq_file.h>
>>  #include <linux/uaccess.h>
>> +#include <linux/proc_ns.h>
>> +#include <linux/ipc_namespace.h>
>>  #include <linux/pid_namespace.h>
>>  #include <linux/security.h>
>>  #include <linux/spinlock.h>
>> @@ -80,13 +82,21 @@
>>  #include "binder_alloc.h"
>>  #include "binder_trace.h"
>>
>> +
>> +#ifndef CONFIG_IPC_NS
>> +static struct ipc_namespace binder_ipc_ns = {
>> +       .ns.inum = PROC_IPC_INIT_INO,
>> +};
>> +
>> +#define ipcns  (&binder_ipc_ns)
>> +#else
>> +#define ipcns  (current->nsproxy->ipc_ns)
>> +#endif
>> +
>>  static HLIST_HEAD(binder_deferred_list);
>>  static DEFINE_MUTEX(binder_deferred_lock);
>>
>>  static HLIST_HEAD(binder_devices);
>> -static HLIST_HEAD(binder_procs);
>> -static DEFINE_MUTEX(binder_procs_lock);
>> -
>>  static HLIST_HEAD(binder_dead_nodes);
>>  static DEFINE_SPINLOCK(binder_dead_nodes_lock);
>>
>> @@ -233,6 +243,7 @@ struct binder_transaction_log_entry {
>>         uint32_t return_error;
>>         uint32_t return_error_param;
>>         const char *context_name;
>> +       unsigned int ipc_inum;
>>  };
>>  struct binder_transaction_log {
>>         atomic_t cur;
>> @@ -263,19 +274,66 @@ static struct binder_transaction_log_entry
>*binder_transaction_log_add(
>>  }
>>
>>  struct binder_context {
>> +       struct hlist_node hlist;
>>         struct binder_node *binder_context_mgr_node;
>>         struct mutex context_mgr_node_lock;
>>
>>         kuid_t binder_context_mgr_uid;
>> +       int   device;
>>         const char *name;
>>  };
>>
>>  struct binder_device {
>>         struct hlist_node hlist;
>>         struct miscdevice miscdev;
>> -       struct binder_context context;
>>  };
>>
>> +void binder_exit_ns(struct ipc_namespace *ns)
>> +{
>> +       struct binder_context *context;
>> +       struct hlist_node *tmp;
>> +
>> +       mutex_destroy(&ns->binder_procs_lock);
>> +       hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts,
>hlist) {
>> +               mutex_destroy(&context->context_mgr_node_lock);
>> +               hlist_del(&context->hlist);
>> +               kfree(context);
>> +       }
>> +}
>> +
>> +int binder_init_ns(struct ipc_namespace *ns)
>> +{
>> +       int ret;
>> +       struct binder_device *device;
>> +
>> +       mutex_init(&ns->binder_procs_lock);
>> +       INIT_HLIST_HEAD(&ns->binder_procs);
>> +       INIT_HLIST_HEAD(&ns->binder_contexts);
>> +
>> +       hlist_for_each_entry(device, &binder_devices, hlist) {
>> +               struct binder_context *context;
>> +
>> +               context = kzalloc(sizeof(*context), GFP_KERNEL);
>> +               if (!context) {
>> +                       ret = -ENOMEM;
>> +                       goto err;
>> +               }
>> +
>> +               context->device = device->miscdev.minor;
>> +               context->name = device->miscdev.name;
>> +               context->binder_context_mgr_uid = INVALID_UID;
>> +               mutex_init(&context->context_mgr_node_lock);
>> +
>> +               hlist_add_head(&context->hlist,
>&ns->binder_contexts);
>> +       }
>> +
>> +       return 0;
>> +err:
>> +       binder_exit_ns(ns);
>> +       return ret;
>> +}
>> +
>> +
>>  /**
>>   * struct binder_work - work enqueued on a worklist
>>   * @entry:             node enqueued on list
>> @@ -2728,6 +2786,7 @@ static void binder_transaction(struct
>binder_proc *proc,
>>         e->data_size = tr->data_size;
>>         e->offsets_size = tr->offsets_size;
>>         e->context_name = proc->context->name;
>> +       e->ipc_inum = ipcns->ns.inum;
>>
>>         if (reply) {
>>                 binder_inner_proc_lock(proc);
>> @@ -4922,6 +4981,7 @@ static int binder_open(struct inode *nodp,
>struct file *filp)
>>  {
>>         struct binder_proc *proc;
>>         struct binder_device *binder_dev;
>> +       struct binder_context *context;
>>
>>         binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n",
>__func__,
>>                      current->group_leader->pid, current->pid);
>> @@ -4937,7 +4997,15 @@ static int binder_open(struct inode *nodp,
>struct file *filp)
>>         proc->default_priority = task_nice(current);
>>         binder_dev = container_of(filp->private_data, struct
>binder_device,
>>                                   miscdev);
>> -       proc->context = &binder_dev->context;
>> +       hlist_for_each_entry(context, &ipcns->binder_contexts, hlist)
>{
>> +               if (context->device == binder_dev->miscdev.minor) {
>> +                       proc->context = context;
>> +                       break;
>> +               }
>> +       }
>> +       if (!proc->context)
>> +               return -ENOENT;
>> +
>>         binder_alloc_init(&proc->alloc);
>>
>>         binder_stats_created(BINDER_STAT_PROC);
>> @@ -4946,9 +5014,9 @@ static int binder_open(struct inode *nodp,
>struct file *filp)
>>         INIT_LIST_HEAD(&proc->waiting_threads);
>>         filp->private_data = proc;
>>
>> -       mutex_lock(&binder_procs_lock);
>> -       hlist_add_head(&proc->proc_node, &binder_procs);
>> -       mutex_unlock(&binder_procs_lock);
>> +       mutex_lock(&ipcns->binder_procs_lock);
>> +       hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
>> +       mutex_unlock(&ipcns->binder_procs_lock);
>>
>>         if (binder_debugfs_dir_entry_proc) {
>>                 char strbuf[11];
>> @@ -5082,9 +5150,9 @@ static void binder_deferred_release(struct
>binder_proc *proc)
>>         struct rb_node *n;
>>         int threads, nodes, incoming_refs, outgoing_refs,
>active_transactions;
>>
>> -       mutex_lock(&binder_procs_lock);
>> +       mutex_lock(&ipcns->binder_procs_lock);
>>         hlist_del(&proc->proc_node);
>> -       mutex_unlock(&binder_procs_lock);
>> +       mutex_unlock(&ipcns->binder_procs_lock);
>>
>>         mutex_lock(&context->context_mgr_node_lock);
>>         if (context->binder_context_mgr_node &&
>> @@ -5623,10 +5691,10 @@ static int binder_state_show(struct seq_file
>*m, void *unused)
>>         if (last_node)
>>                 binder_put_node(last_node);
>>
>> -       mutex_lock(&binder_procs_lock);
>> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
>> +       mutex_lock(&ipcns->binder_procs_lock);
>> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>>                 print_binder_proc(m, proc, 1);
>> -       mutex_unlock(&binder_procs_lock);
>> +       mutex_unlock(&ipcns->binder_procs_lock);
>>
>>         return 0;
>>  }
>> @@ -5639,10 +5707,10 @@ static int binder_stats_show(struct seq_file
>*m, void *unused)
>>
>>         print_binder_stats(m, "", &binder_stats);
>>
>> -       mutex_lock(&binder_procs_lock);
>> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
>> +       mutex_lock(&ipcns->binder_procs_lock);
>> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>>                 print_binder_proc_stats(m, proc);
>> -       mutex_unlock(&binder_procs_lock);
>> +       mutex_unlock(&ipcns->binder_procs_lock);
>>
>>         return 0;
>>  }
>> @@ -5652,10 +5720,10 @@ static int binder_transactions_show(struct
>seq_file *m, void *unused)
>>         struct binder_proc *proc;
>>
>>         seq_puts(m, "binder transactions:\n");
>> -       mutex_lock(&binder_procs_lock);
>> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
>> +       mutex_lock(&ipcns->binder_procs_lock);
>> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>>                 print_binder_proc(m, proc, 0);
>> -       mutex_unlock(&binder_procs_lock);
>> +       mutex_unlock(&ipcns->binder_procs_lock);
>>
>>         return 0;
>>  }
>> @@ -5665,14 +5733,14 @@ static int binder_proc_show(struct seq_file
>*m, void *unused)
>>         struct binder_proc *itr;
>>         int pid = (unsigned long)m->private;
>>
>> -       mutex_lock(&binder_procs_lock);
>> -       hlist_for_each_entry(itr, &binder_procs, proc_node) {
>> +       mutex_lock(&ipcns->binder_procs_lock);
>> +       hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
>>                 if (itr->pid == pid) {
>>                         seq_puts(m, "binder proc state:\n");
>>                         print_binder_proc(m, itr, 1);
>>                 }
>>         }
>> -       mutex_unlock(&binder_procs_lock);
>> +       mutex_unlock(&ipcns->binder_procs_lock);
>>
>>         return 0;
>>  }
>> @@ -5687,12 +5755,12 @@ static void
>print_binder_transaction_log_entry(struct seq_file *m,
>>          */
>>         smp_rmb();
>>         seq_printf(m,
>> -                  "%d: %s from %d:%d to %d:%d context %s node %d
>handle %d size %d:%d ret %d/%d l=%d",
>> +                  "%d: %s from %d:%d to %d:%d ipc %d context %s node
>%d handle %d size %d:%d ret %d/%d l=%d",
>>                    e->debug_id, (e->call_type == 2) ? "reply" :
>>                    ((e->call_type == 1) ? "async" : "call "),
>e->from_proc,
>> -                  e->from_thread, e->to_proc, e->to_thread,
>e->context_name,
>> -                  e->to_node, e->target_handle, e->data_size,
>e->offsets_size,
>> -                  e->return_error, e->return_error_param,
>> +                  e->from_thread, e->to_proc, e->to_thread,
>e->ipc_inum,
>> +                  e->context_name, e->to_node, e->target_handle,
>e->data_size,
>> +                  e->offsets_size, e->return_error,
>e->return_error_param,
>>                    e->return_error_line);
>>         /*
>>          * read-barrier to guarantee read of debug_id_done after
>> @@ -5753,10 +5821,6 @@ static int __init init_binder_device(const
>char *name)
>>         binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
>>         binder_device->miscdev.name = name;
>>
>> -       binder_device->context.binder_context_mgr_uid = INVALID_UID;
>> -       binder_device->context.name = name;
>> -       mutex_init(&binder_device->context.context_mgr_node_lock);
>> -
>>         ret = misc_register(&binder_device->miscdev);
>>         if (ret < 0) {
>>                 kfree(binder_device);
>> @@ -5831,9 +5895,16 @@ static int __init binder_init(void)
>>                 if (ret)
>>                         goto err_init_binder_device_failed;
>>         }
>> +#ifdef CONFIG_IPC_NS
>> +       ret = binder_init_ns(&init_ipc_ns);
>> +#else
>> +       ret = binder_init_ns(&binder_ipc_ns);
>> +#endif
>> +       if (ret)
>> +               goto err_init_namespace_failed;
>>
>>         return ret;
>> -
>> +err_init_namespace_failed:
>>  err_init_binder_device_failed:
>>         hlist_for_each_entry_safe(device, tmp, &binder_devices,
>hlist) {
>>                 misc_deregister(&device->miscdev);
>> diff --git a/include/linux/ipc_namespace.h
>b/include/linux/ipc_namespace.h
>> index 6ab8c1bada3f..d7f850a2ded8 100644
>> --- a/include/linux/ipc_namespace.h
>> +++ b/include/linux/ipc_namespace.h
>> @@ -63,6 +63,13 @@ struct ipc_namespace {
>>         unsigned int    mq_msg_default;
>>         unsigned int    mq_msgsize_default;
>>
>> +#ifdef CONFIG_ANDROID_BINDER_IPC
>> +       /* next fields are for binder */
>> +       struct mutex      binder_procs_lock;
>> +       struct hlist_head binder_procs;
>> +       struct hlist_head binder_contexts;
>> +#endif
>> +
>>         /* user_ns which owns the ipc ns */
>>         struct user_namespace *user_ns;
>>         struct ucounts *ucounts;
>> @@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
>>  static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
>>  #endif
>>
>> +#ifdef CONFIG_ANDROID_BINDER_IPC
>> +extern int binder_init_ns(struct ipc_namespace *ns);
>> +extern void binder_exit_ns(struct ipc_namespace *ns);
>> +#else
>> +static inline int binder_init_ns(struct ipc_namespace *ns) { return
>0; }
>> +static inline void binder_exit_ns(struct ipc_namespace *ns) { }
>> +#endif
>> +
>>  #if defined(CONFIG_IPC_NS)
>>  extern struct ipc_namespace *copy_ipcs(unsigned long flags,
>>         struct user_namespace *user_ns, struct ipc_namespace *ns);
>> diff --git a/ipc/namespace.c b/ipc/namespace.c
>> index 21607791d62c..68c6e983b002 100644
>> --- a/ipc/namespace.c
>> +++ b/ipc/namespace.c
>> @@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct
>user_namespace *user_ns,
>>
>>         err = mq_init_ns(ns);
>>         if (err)
>> -               goto fail_put;
>> +               goto fail_init_mq;
>> +       err = binder_init_ns(ns);
>> +       if (err)
>> +               goto fail_init_binder;
>>
>>         sem_init_ns(ns);
>>         msg_init_ns(ns);
>> @@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct
>user_namespace *user_ns,
>>
>>         return ns;
>>
>> -fail_put:
>> +fail_init_binder:
>> +       mq_put_mnt(ns);
>> +fail_init_mq:
>>         put_user_ns(ns->user_ns);
>>         ns_free_inum(&ns->ns);
>>  fail_free:
>> @@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
>>         sem_exit_ns(ns);
>>         msg_exit_ns(ns);
>>         shm_exit_ns(ns);
>> +       binder_exit_ns(ns);
>>
>>         dec_ipc_namespaces(ns->ucounts);
>>         put_user_ns(ns->user_ns);
>> --
>> 2.11.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V4] binder: ipc namespace support for android binder
  2018-11-12  9:37 [PATCH V4] binder: ipc namespace support for android binder chouryzhou(周威)
  2018-11-12 16:45 ` Todd Kjos
@ 2018-11-15 22:33 ` Andrew Morton
  2018-11-15 22:54   ` gregkh
  1 sibling, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2018-11-15 22:33 UTC (permalink / raw)
  To: chouryzhou; +Cc: tkjos, gregkh, arve, tkjos, dave, devel, linux-kernel

On Mon, 12 Nov 2018 09:37:51 +0000 chouryzhou(周威) <chouryzhou@tencent.com> wrote:

> Currently android's binder is not isolated by ipc namespace. Since binder 
> is a form of IPC and therefore should be tied to ipc namespace. With this 
> patch, we can run multiple instances of  android container on one host.
> 
> This patch move "binder_procs" and "binder_context" into ipc_namespace,
> driver will find the context from it when opening. For debugfs, binder_proc
> is namespace-aware, but not for binder dead nodes, binder_stats and 
> binder_transaction_log_entry (we added ipc inum to trace it).
> 
> ...
>
>  drivers/android/binder.c      | 133 ++++++++++++++++++++++++++++++++----------
>  include/linux/ipc_namespace.h |  15 +++++
>  ipc/namespace.c               |  10 +++-
>  3 files changed, 125 insertions(+), 33 deletions(-)

Well, it's mainly an android patch so I suggest this be taken via the
android tree.

Acked-by: Andrew Morton <akpm@linux-foundation.org>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V4] binder: ipc namespace support for android binder
  2018-11-15 22:33 ` Andrew Morton
@ 2018-11-15 22:54   ` gregkh
  2018-11-16 19:19     ` Todd Kjos
  0 siblings, 1 reply; 10+ messages in thread
From: gregkh @ 2018-11-15 22:54 UTC (permalink / raw)
  To: Andrew Morton; +Cc: chouryzhou, tkjos, arve, tkjos, dave, devel, linux-kernel

On Thu, Nov 15, 2018 at 02:33:49PM -0800, Andrew Morton wrote:
> On Mon, 12 Nov 2018 09:37:51 +0000 chouryzhou(周威) <chouryzhou@tencent.com> wrote:
> 
> > Currently android's binder is not isolated by ipc namespace. Since binder 
> > is a form of IPC and therefore should be tied to ipc namespace. With this 
> > patch, we can run multiple instances of  android container on one host.
> > 
> > This patch move "binder_procs" and "binder_context" into ipc_namespace,
> > driver will find the context from it when opening. For debugfs, binder_proc
> > is namespace-aware, but not for binder dead nodes, binder_stats and 
> > binder_transaction_log_entry (we added ipc inum to trace it).
> > 
> > ...
> >
> >  drivers/android/binder.c      | 133 ++++++++++++++++++++++++++++++++----------
> >  include/linux/ipc_namespace.h |  15 +++++
> >  ipc/namespace.c               |  10 +++-
> >  3 files changed, 125 insertions(+), 33 deletions(-)
> 
> Well, it's mainly an android patch so I suggest this be taken via the
> android tree.
> 
> Acked-by: Andrew Morton <akpm@linux-foundation.org>
> 

A number of us have talked about this in the plumbers Android track, and
a different proposal for how to solve this has been made that should be
much more resiliant.  So I will drop this patch from my queue and wait
for the patches based on the discussions we had there.

I think there's some notes/slides on the discussion online somewhere,
but it hasn't been published as the conference is still happening,
otherwise I would link to it here...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V4] binder: ipc namespace support for android binder
  2018-11-15 22:54   ` gregkh
@ 2018-11-16 19:19     ` Todd Kjos
  2018-11-20  0:23       ` Christian Brauner
  0 siblings, 1 reply; 10+ messages in thread
From: Todd Kjos @ 2018-11-16 19:19 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: akpm, chouryzhou, Arve Hjønnevåg, Todd Kjos, dave,
	open list:ANDROID DRIVERS, LKML, christian

On Thu, Nov 15, 2018 at 2:54 PM gregkh@linuxfoundation.org
<gregkh@linuxfoundation.org> wrote:
...
>
> A number of us have talked about this in the plumbers Android track, and
> a different proposal for how to solve this has been made that should be
> much more resiliant.  So I will drop this patch from my queue and wait
> for the patches based on the discussions we had there.
>
> I think there's some notes/slides on the discussion online somewhere,
> but it hasn't been published as the conference is still happening,
> otherwise I would link to it here...

Here is a link to the session where you can look at the slides [1].
There was consensus that "binderfs" (slide 5) was the most promising
-- but would be behind a config option to provide backwards
compatibility for non-container use-cases.

The etherpad notes are at [2] (look at "Dynamically Allocated Binder
Devices" section)

Christian Brauner will be sending out more details.

-Todd

[1] https://linuxplumbersconf.org/event/2/contributions/222/
[2] https://etherpad.openstack.org/p/Android

>
> thanks,
>
> greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V4] binder: ipc namespace support for android binder
  2018-11-16 19:19     ` Todd Kjos
@ 2018-11-20  0:23       ` Christian Brauner
  0 siblings, 0 replies; 10+ messages in thread
From: Christian Brauner @ 2018-11-20  0:23 UTC (permalink / raw)
  To: Todd Kjos
  Cc: Greg Kroah-Hartman, akpm, chouryzhou, Arve Hjønnevåg,
	Todd Kjos, dave, open list:ANDROID DRIVERS, LKML

On Fri, Nov 16, 2018 at 11:19:41AM -0800, Todd Kjos wrote:
> On Thu, Nov 15, 2018 at 2:54 PM gregkh@linuxfoundation.org
> <gregkh@linuxfoundation.org> wrote:
> ...
> >
> > A number of us have talked about this in the plumbers Android track, and
> > a different proposal for how to solve this has been made that should be
> > much more resiliant.  So I will drop this patch from my queue and wait
> > for the patches based on the discussions we had there.
> >
> > I think there's some notes/slides on the discussion online somewhere,
> > but it hasn't been published as the conference is still happening,
> > otherwise I would link to it here...
> 
> Here is a link to the session where you can look at the slides [1].
> There was consensus that "binderfs" (slide 5) was the most promising
> -- but would be behind a config option to provide backwards
> compatibility for non-container use-cases.
> 
> The etherpad notes are at [2] (look at "Dynamically Allocated Binder
> Devices" section)
> 
> Christian Brauner will be sending out more details.

Ok, sorry for the delay I got caught up in other work.

The idea is to implement binderfs which I'm starting to work on.
binderfs will be a separate pseudo-filesystem that will be mountable
per-ipc namespace.
This has the advantage that the ipc namespace is attached to the
superblock of the mount and can be easily retrieved by the binder
driver. It also implies that - in contrast to the proposed patch here -
an open() on a given binder device will not be able to change the ipc
namespace of said devices. The obvious corollary is that you can
bind-mount binder devices or the whole binderfs mount between different
ipc namespaces and given the right permissions (CAP_IPC_OWNER in the
owning userns of the ipcns) can see services on the host which is
something that people wanted to be able to do.
Additionally, each binderfs mount will come with a binder-control
device. This device is functionally similar to loop-control and will
allow for dynamic allocation (and possibly deallocation) of binder
devices. With this we can remove the restriction to hard-code the number
of binder devices at compile time.
Backwards compatibility can either be guaranteed by hiding binderfs
behind a compile flag or by special-casing the inital binderfs mount to
pre-create the standard binder devices. The jury is still out on this.

Christian

> 
> -Todd
> 
> [1] https://linuxplumbersconf.org/event/2/contributions/222/
> [2] https://etherpad.openstack.org/p/Android
> 
> >
> > thanks,
> >
> > greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH V4] binder: ipc namespace support for android binder
@ 2018-11-20  3:56 chouryzhou(周威)
  0 siblings, 0 replies; 10+ messages in thread
From: chouryzhou(周威) @ 2018-11-20  3:56 UTC (permalink / raw)
  To: Christian Brauner, Todd Kjos
  Cc: Greg Kroah-Hartman, akpm, Arve Hjønnevåg, Todd Kjos,
	dave, open list:ANDROID DRIVERS, LKML

> On Fri, Nov 16, 2018 at 11:19:41AM -0800, Todd Kjos wrote:
> > On Thu, Nov 15, 2018 at 2:54 PM gregkh@linuxfoundation.org
> > <gregkh@linuxfoundation.org> wrote:
> > ...
> > >
> > > A number of us have talked about this in the plumbers Android track, and
> > > a different proposal for how to solve this has been made that should be
> > > much more resiliant.  So I will drop this patch from my queue and wait
> > > for the patches based on the discussions we had there.
> > >
> > > I think there's some notes/slides on the discussion online somewhere,
> > > but it hasn't been published as the conference is still happening,
> > > otherwise I would link to it here...
> >
> > Here is a link to the session where you can look at the slides [1].
> > There was consensus that "binderfs" (slide 5) was the most promising
> > -- but would be behind a config option to provide backwards
> > compatibility for non-container use-cases.
> >
> > The etherpad notes are at [2] (look at "Dynamically Allocated Binder
> > Devices" section)
> >
> > Christian Brauner will be sending out more details.
> 
> Ok, sorry for the delay I got caught up in other work.
> 
> The idea is to implement binderfs which I'm starting to work on.
> binderfs will be a separate pseudo-filesystem that will be mountable
> per-ipc namespace.
> This has the advantage that the ipc namespace is attached to the
> superblock of the mount and can be easily retrieved by the binder
> driver. It also implies that - in contrast to the proposed patch here -
> an open() on a given binder device will not be able to change the ipc
> namespace of said devices. The obvious corollary is that you can
> bind-mount binder devices or the whole binderfs mount between different
> ipc namespaces and given the right permissions (CAP_IPC_OWNER in the
> owning userns of the ipcns) can see services on the host which is
> something that people wanted to be able to do.
> Additionally, each binderfs mount will come with a binder-control
> device. This device is functionally similar to loop-control and will
> allow for dynamic allocation (and possibly deallocation) of binder
> devices. With this we can remove the restriction to hard-code the number
> of binder devices at compile time.
> Backwards compatibility can either be guaranteed by hiding binderfs
> behind a compile flag or by special-casing the inital binderfs mount to
> pre-create the standard binder devices. The jury is still out on this.
> 
> Christian
> 

Since you are working on this, I will withdraw this patch. We will evaluate 
whether to your solution in our environment after you implement it. 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V4] binder: ipc namespace support for android binder
  2018-11-13  8:12 chouryzhou(周威)
@ 2018-11-13 16:10 ` Todd Kjos
  0 siblings, 0 replies; 10+ messages in thread
From: Todd Kjos @ 2018-11-13 16:10 UTC (permalink / raw)
  To: chouryzhou
  Cc: christian, Martijn Coenen, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, akpm, dave,
	open list:ANDROID DRIVERS, LKML

On Tue, Nov 13, 2018 at 12:12 AM chouryzhou(周威) <chouryzhou@tencent.com> wrote:
>
> > I have not received an answer to my questions in the last version of this patch
> > set. Also it would be good if I could be Cc'ed by default. I can't hunt down all
> > patches.
> > I do not know of any kernel entity, specifically devices, that change namespaces
> > on open().
> > This seems like an invitation for all kinds of security bugs.
> > A device node belongs to one namespace only which is attached to the
> > underlying kobject. Opening the device should never change that.
> > Please look at how mqueue or shm are doing this. They don't change
> > namespaces on open either.
> > I have to say that is one of the main reasons why I disagree with that design.
> >
> >
>
> If we must return the same context when every open in proc, we can only isolate
> binder with mnt namespace instead of ipc namespace, what do you think, Todd?

I don't have strong feelings on this -- it seems like a bizarre
use-case to send the fd through a backchannel as christian describes,
but I do agree it is strange behavior (though it seems safe to me
since it prevents communication between unrelated entities). I don't
know how mqueue and shm work, its worth a look since this patch is
modelling their behavior...

We'll talk about it here at LPC and then on this thread.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH V4] binder: ipc namespace support for android binder
@ 2018-11-13  8:12 chouryzhou(周威)
  2018-11-13 16:10 ` Todd Kjos
  0 siblings, 1 reply; 10+ messages in thread
From: chouryzhou(周威) @ 2018-11-13  8:12 UTC (permalink / raw)
  To: Christian Brauner, Todd Kjos, Martijn Coenen
  Cc: Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos, akpm,
	dave, open list:ANDROID DRIVERS, LKML

> I have not received an answer to my questions in the last version of this patch
> set. Also it would be good if I could be Cc'ed by default. I can't hunt down all
> patches.
> I do not know of any kernel entity, specifically devices, that change namespaces
> on open().
> This seems like an invitation for all kinds of security bugs.
> A device node belongs to one namespace only which is attached to the
> underlying kobject. Opening the device should never change that.
> Please look at how mqueue or shm are doing this. They don't change
> namespaces on open either.
> I have to say that is one of the main reasons why I disagree with that design.
> 
> 

If we must return the same context when every open in proc, we can only isolate
binder with mnt namespace instead of ipc namespace, what do you think, Todd?

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-11-20  3:56 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-12  9:37 [PATCH V4] binder: ipc namespace support for android binder chouryzhou(周威)
2018-11-12 16:45 ` Todd Kjos
2018-11-12 19:24   ` Christian Brauner
2018-11-15 22:33 ` Andrew Morton
2018-11-15 22:54   ` gregkh
2018-11-16 19:19     ` Todd Kjos
2018-11-20  0:23       ` Christian Brauner
2018-11-13  8:12 chouryzhou(周威)
2018-11-13 16:10 ` Todd Kjos
2018-11-20  3:56 chouryzhou(周威)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).