linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V3] binder: ipc namespace support for android binder
@ 2018-11-08 13:02 chouryzhou(周威)
  2018-11-08 14:28 ` Christian Brauner
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: chouryzhou(周威) @ 2018-11-08 13:02 UTC (permalink / raw)
  To: gregkh
  Cc: arve, tkjos, akpm, dave, devel, linux-kernel,
	chouryzhou(周威)

  We are working for running android in container, but we found that binder is
not isolated by ipc namespace. Since binder is a form of IPC and therefore should
be tied to ipc namespace. With this patch, we can run more than one android
container on one host.
  This patch move "binder_procs" and "binder_context" into ipc_namespace,
driver will find the context from it when opening. Although statistics in debugfs
remain global.

Signed-off-by: chouryzhou <chouryzhou@tencent.com>
---
 drivers/android/binder.c      | 128 +++++++++++++++++++++++++++++++-----------
 include/linux/ipc_namespace.h |  15 +++++
 ipc/namespace.c               |  10 +++-
 3 files changed, 117 insertions(+), 36 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index cb30a524d16d..22e45bb937e6 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -68,6 +68,7 @@
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/pid_namespace.h>
+#include <linux/ipc_namespace.h>
 #include <linux/security.h>
 #include <linux/spinlock.h>
 #include <linux/ratelimit.h>
@@ -80,13 +81,18 @@
 #include "binder_alloc.h"
 #include "binder_trace.h"
 
+
+#if !defined(CONFIG_SYSVIPC) &&  !defined(CONFIG_POSIX_MQUEUE)
+struct ipc_namespace init_ipc_ns;
+#define ipcns  (&init_ipc_ns)
+#else
+#define ipcns  (current->nsproxy->ipc_ns)
+#endif
+
 static HLIST_HEAD(binder_deferred_list);
 static DEFINE_MUTEX(binder_deferred_lock);
 
 static HLIST_HEAD(binder_devices);
-static HLIST_HEAD(binder_procs);
-static DEFINE_MUTEX(binder_procs_lock);
-
 static HLIST_HEAD(binder_dead_nodes);
 static DEFINE_SPINLOCK(binder_dead_nodes_lock);
 
@@ -232,7 +238,7 @@ struct binder_transaction_log_entry {
        int return_error_line;
        uint32_t return_error;
        uint32_t return_error_param;
-       const char *context_name;
+       int context_device;
 };
 struct binder_transaction_log {
        atomic_t cur;
@@ -263,19 +269,64 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
 }
 
 struct binder_context {
+       struct hlist_node hlist;
        struct binder_node *binder_context_mgr_node;
        struct mutex context_mgr_node_lock;
 
        kuid_t binder_context_mgr_uid;
-       const char *name;
+       int    device;
 };
 
 struct binder_device {
        struct hlist_node hlist;
        struct miscdevice miscdev;
-       struct binder_context context;
 };
 
+void binder_exit_ns(struct ipc_namespace *ns)
+{
+       struct binder_context *context;
+       struct hlist_node *tmp;
+
+       mutex_destroy(&ns->binder_procs_lock);
+       hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) {
+               mutex_destroy(&context->context_mgr_node_lock);
+               hlist_del(&context->hlist);
+               kfree(context);
+       }
+}
+
+int binder_init_ns(struct ipc_namespace *ns)
+{
+       int ret;
+       struct binder_device *device;
+
+       mutex_init(&ns->binder_procs_lock);
+       INIT_HLIST_HEAD(&ns->binder_procs);
+       INIT_HLIST_HEAD(&ns->binder_contexts);
+
+       hlist_for_each_entry(device, &binder_devices, hlist) {
+               struct binder_context *context;
+
+               context = kzalloc(sizeof(*context), GFP_KERNEL);
+               if (!context) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               context->device = device->miscdev.minor;
+               context->binder_context_mgr_uid = INVALID_UID;
+               mutex_init(&context->context_mgr_node_lock);
+
+               hlist_add_head(&context->hlist, &ns->binder_contexts);
+       }
+
+       return 0;
+err:
+       binder_exit_ns(ns);
+       return ret;
+}
+
+
 /**
  * struct binder_work - work enqueued on a worklist
  * @entry:             node enqueued on list
@@ -2727,7 +2778,7 @@ static void binder_transaction(struct binder_proc *proc,
        e->target_handle = tr->target.handle;
        e->data_size = tr->data_size;
        e->offsets_size = tr->offsets_size;
-       e->context_name = proc->context->name;
+       e->context_device = proc->context->device;
 
        if (reply) {
                binder_inner_proc_lock(proc);
@@ -4922,6 +4973,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
 {
        struct binder_proc *proc;
        struct binder_device *binder_dev;
+       struct binder_context *context;
 
        binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
                     current->group_leader->pid, current->pid);
@@ -4937,7 +4989,15 @@ static int binder_open(struct inode *nodp, struct file *filp)
        proc->default_priority = task_nice(current);
        binder_dev = container_of(filp->private_data, struct binder_device,
                                  miscdev);
-       proc->context = &binder_dev->context;
+       hlist_for_each_entry(context, &ipcns->binder_contexts, hlist) {
+               if (context->device == binder_dev->miscdev.minor) {
+                       proc->context = context;
+                       break;
+               }
+       }
+       if (!proc->context)
+               return -ENOENT;
+
        binder_alloc_init(&proc->alloc);
 
        binder_stats_created(BINDER_STAT_PROC);
@@ -4946,9 +5006,9 @@ static int binder_open(struct inode *nodp, struct file *filp)
        INIT_LIST_HEAD(&proc->waiting_threads);
        filp->private_data = proc;
 
-       mutex_lock(&binder_procs_lock);
-       hlist_add_head(&proc->proc_node, &binder_procs);
-       mutex_unlock(&binder_procs_lock);
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        if (binder_debugfs_dir_entry_proc) {
                char strbuf[11];
@@ -5082,9 +5142,9 @@ static void binder_deferred_release(struct binder_proc *proc)
        struct rb_node *n;
        int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
 
-       mutex_lock(&binder_procs_lock);
+       mutex_lock(&ipcns->binder_procs_lock);
        hlist_del(&proc->proc_node);
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        mutex_lock(&context->context_mgr_node_lock);
        if (context->binder_context_mgr_node &&
@@ -5377,7 +5437,7 @@ static void print_binder_proc(struct seq_file *m,
        struct binder_node *last_node = NULL;
 
        seq_printf(m, "proc %d\n", proc->pid);
-       seq_printf(m, "context %s\n", proc->context->name);
+       seq_printf(m, "context %d\n", proc->context->device);
        header_pos = m->count;
 
        binder_inner_proc_lock(proc);
@@ -5538,7 +5598,7 @@ static void print_binder_proc_stats(struct seq_file *m,
                binder_alloc_get_free_async_space(&proc->alloc);
 
        seq_printf(m, "proc %d\n", proc->pid);
-       seq_printf(m, "context %s\n", proc->context->name);
+       seq_printf(m, "context %d\n", proc->context->device);
        count = 0;
        ready_threads = 0;
        binder_inner_proc_lock(proc);
@@ -5623,10 +5683,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
        if (last_node)
                binder_put_node(last_node);
 
-       mutex_lock(&binder_procs_lock);
-       hlist_for_each_entry(proc, &binder_procs, proc_node)
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
                print_binder_proc(m, proc, 1);
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        return 0;
 }
@@ -5639,10 +5699,10 @@ static int binder_stats_show(struct seq_file *m, void *unused)
 
        print_binder_stats(m, "", &binder_stats);
 
-       mutex_lock(&binder_procs_lock);
-       hlist_for_each_entry(proc, &binder_procs, proc_node)
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
                print_binder_proc_stats(m, proc);
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        return 0;
 }
@@ -5652,10 +5712,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
        struct binder_proc *proc;
 
        seq_puts(m, "binder transactions:\n");
-       mutex_lock(&binder_procs_lock);
-       hlist_for_each_entry(proc, &binder_procs, proc_node)
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
                print_binder_proc(m, proc, 0);
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        return 0;
 }
@@ -5665,14 +5725,14 @@ static int binder_proc_show(struct seq_file *m, void *unused)
        struct binder_proc *itr;
        int pid = (unsigned long)m->private;
 
-       mutex_lock(&binder_procs_lock);
-       hlist_for_each_entry(itr, &binder_procs, proc_node) {
+       mutex_lock(&ipcns->binder_procs_lock);
+       hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
                if (itr->pid == pid) {
                        seq_puts(m, "binder proc state:\n");
                        print_binder_proc(m, itr, 1);
                }
        }
-       mutex_unlock(&binder_procs_lock);
+       mutex_unlock(&ipcns->binder_procs_lock);
 
        return 0;
 }
@@ -5687,10 +5747,10 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
         */
        smp_rmb();
        seq_printf(m,
-                  "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
+                  "%d: %s from %d:%d to %d:%d context %d node %d handle %d size %d:%d ret %d/%d l=%d",
                   e->debug_id, (e->call_type == 2) ? "reply" :
                   ((e->call_type == 1) ? "async" : "call "), e->from_proc,
-                  e->from_thread, e->to_proc, e->to_thread, e->context_name,
+                  e->from_thread, e->to_proc, e->to_thread, e->context_device,
                   e->to_node, e->target_handle, e->data_size, e->offsets_size,
                   e->return_error, e->return_error_param,
                   e->return_error_line);
@@ -5753,10 +5813,6 @@ static int __init init_binder_device(const char *name)
        binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
        binder_device->miscdev.name = name;
 
-       binder_device->context.binder_context_mgr_uid = INVALID_UID;
-       binder_device->context.name = name;
-       mutex_init(&binder_device->context.context_mgr_node_lock);
-
        ret = misc_register(&binder_device->miscdev);
        if (ret < 0) {
                kfree(binder_device);
@@ -5832,8 +5888,12 @@ static int __init binder_init(void)
                        goto err_init_binder_device_failed;
        }
 
-       return ret;
+       ret = binder_init_ns(&init_ipc_ns);
+       if (ret)
+               goto err_init_namespace_failed;
 
+       return ret;
+err_init_namespace_failed:
 err_init_binder_device_failed:
        hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
                misc_deregister(&device->miscdev);
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6ab8c1bada3f..d7f850a2ded8 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -63,6 +63,13 @@ struct ipc_namespace {
        unsigned int    mq_msg_default;
        unsigned int    mq_msgsize_default;
 
+#ifdef CONFIG_ANDROID_BINDER_IPC
+       /* next fields are for binder */
+       struct mutex      binder_procs_lock;
+       struct hlist_head binder_procs;
+       struct hlist_head binder_contexts;
+#endif
+
        /* user_ns which owns the ipc ns */
        struct user_namespace *user_ns;
        struct ucounts *ucounts;
@@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
 
+#ifdef CONFIG_ANDROID_BINDER_IPC
+extern int binder_init_ns(struct ipc_namespace *ns);
+extern void binder_exit_ns(struct ipc_namespace *ns);
+#else
+static inline int binder_init_ns(struct ipc_namespace *ns) { return 0; }
+static inline void binder_exit_ns(struct ipc_namespace *ns) { }
+#endif
+
 #if defined(CONFIG_IPC_NS)
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
        struct user_namespace *user_ns, struct ipc_namespace *ns);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 21607791d62c..68c6e983b002 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 
        err = mq_init_ns(ns);
        if (err)
-               goto fail_put;
+               goto fail_init_mq;
+       err = binder_init_ns(ns);
+       if (err)
+               goto fail_init_binder;
 
        sem_init_ns(ns);
        msg_init_ns(ns);
@@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 
        return ns;
 
-fail_put:
+fail_init_binder:
+       mq_put_mnt(ns);
+fail_init_mq:
        put_user_ns(ns->user_ns);
        ns_free_inum(&ns->ns);
 fail_free:
@@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
        sem_exit_ns(ns);
        msg_exit_ns(ns);
        shm_exit_ns(ns);
+       binder_exit_ns(ns);
 
        dec_ipc_namespaces(ns->ucounts);
        put_user_ns(ns->user_ns);
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH V3] binder: ipc namespace support for android binder
  2018-11-08 13:02 [PATCH V3] binder: ipc namespace support for android binder chouryzhou(周威)
@ 2018-11-08 14:28 ` Christian Brauner
  2018-11-09 17:54 ` Todd Kjos
  2018-11-09 18:26 ` [PATCH V3] binder: ipc namespace support for android binder Davidlohr Bueso
  2 siblings, 0 replies; 12+ messages in thread
From: Christian Brauner @ 2018-11-08 14:28 UTC (permalink / raw)
  To: chouryzhou(周威)
  Cc: gregkh, arve, tkjos, akpm, dave, devel, linux-kernel

On Thu, Nov 08, 2018 at 01:02:32PM +0000, chouryzhou(周威) wrote:
>   We are working for running android in container, but we found that binder is
> not isolated by ipc namespace. Since binder is a form of IPC and therefore should
> be tied to ipc namespace. With this patch, we can run more than one android
> container on one host.
>   This patch move "binder_procs" and "binder_context" into ipc_namespace,
> driver will find the context from it when opening. Although statistics in debugfs
> remain global.
> 
> Signed-off-by: chouryzhou <chouryzhou@tencent.com>
> ---
>  drivers/android/binder.c      | 128 +++++++++++++++++++++++++++++++-----------
>  include/linux/ipc_namespace.h |  15 +++++
>  ipc/namespace.c               |  10 +++-
>  3 files changed, 117 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index cb30a524d16d..22e45bb937e6 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -68,6 +68,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/uaccess.h>
>  #include <linux/pid_namespace.h>
> +#include <linux/ipc_namespace.h>
>  #include <linux/security.h>
>  #include <linux/spinlock.h>
>  #include <linux/ratelimit.h>
> @@ -80,13 +81,18 @@
>  #include "binder_alloc.h"
>  #include "binder_trace.h"
>  
> +
> +#if !defined(CONFIG_SYSVIPC) &&  !defined(CONFIG_POSIX_MQUEUE)
> +struct ipc_namespace init_ipc_ns;
> +#define ipcns  (&init_ipc_ns)
> +#else
> +#define ipcns  (current->nsproxy->ipc_ns)
> +#endif
> +
>  static HLIST_HEAD(binder_deferred_list);
>  static DEFINE_MUTEX(binder_deferred_lock);
>  
>  static HLIST_HEAD(binder_devices);
> -static HLIST_HEAD(binder_procs);
> -static DEFINE_MUTEX(binder_procs_lock);
> -
>  static HLIST_HEAD(binder_dead_nodes);
>  static DEFINE_SPINLOCK(binder_dead_nodes_lock);
>  
> @@ -232,7 +238,7 @@ struct binder_transaction_log_entry {
>         int return_error_line;
>         uint32_t return_error;
>         uint32_t return_error_param;
> -       const char *context_name;
> +       int context_device;
>  };
>  struct binder_transaction_log {
>         atomic_t cur;
> @@ -263,19 +269,64 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
>  }
>  
>  struct binder_context {
> +       struct hlist_node hlist;
>         struct binder_node *binder_context_mgr_node;
>         struct mutex context_mgr_node_lock;
>  
>         kuid_t binder_context_mgr_uid;
> -       const char *name;
> +       int    device;
>  };
>  
>  struct binder_device {
>         struct hlist_node hlist;
>         struct miscdevice miscdev;
> -       struct binder_context context;
>  };
>  
> +void binder_exit_ns(struct ipc_namespace *ns)
> +{
> +       struct binder_context *context;
> +       struct hlist_node *tmp;
> +
> +       mutex_destroy(&ns->binder_procs_lock);
> +       hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) {
> +               mutex_destroy(&context->context_mgr_node_lock);
> +               hlist_del(&context->hlist);
> +               kfree(context);
> +       }
> +}
> +
> +int binder_init_ns(struct ipc_namespace *ns)
> +{
> +       int ret;
> +       struct binder_device *device;
> +
> +       mutex_init(&ns->binder_procs_lock);
> +       INIT_HLIST_HEAD(&ns->binder_procs);
> +       INIT_HLIST_HEAD(&ns->binder_contexts);
> +
> +       hlist_for_each_entry(device, &binder_devices, hlist) {
> +               struct binder_context *context;
> +
> +               context = kzalloc(sizeof(*context), GFP_KERNEL);
> +               if (!context) {
> +                       ret = -ENOMEM;
> +                       goto err;
> +               }
> +
> +               context->device = device->miscdev.minor;
> +               context->binder_context_mgr_uid = INVALID_UID;
> +               mutex_init(&context->context_mgr_node_lock);
> +
> +               hlist_add_head(&context->hlist, &ns->binder_contexts);
> +       }
> +
> +       return 0;
> +err:
> +       binder_exit_ns(ns);
> +       return ret;
> +}
> +
> +
>  /**
>   * struct binder_work - work enqueued on a worklist
>   * @entry:             node enqueued on list
> @@ -2727,7 +2778,7 @@ static void binder_transaction(struct binder_proc *proc,
>         e->target_handle = tr->target.handle;
>         e->data_size = tr->data_size;
>         e->offsets_size = tr->offsets_size;
> -       e->context_name = proc->context->name;
> +       e->context_device = proc->context->device;
>  
>         if (reply) {
>                 binder_inner_proc_lock(proc);
> @@ -4922,6 +4973,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
>  {
>         struct binder_proc *proc;
>         struct binder_device *binder_dev;
> +       struct binder_context *context;
>  
>         binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
>                      current->group_leader->pid, current->pid);
> @@ -4937,7 +4989,15 @@ static int binder_open(struct inode *nodp, struct file *filp)

So the idea is that on open() of a binder device you attach an ipc
namespace to the opening process?
So you'd basically be able to create a new device node with the same
minor and major number as the one in the host ipc namespace in another
ipc namespace and it would give you a different context.
That's basically namespacing binder devices without *properly*
namespacing them.

What happens if I do:

/* in initial ipc namespace */
binder_fd = open(/dev/binder0)

Now I send binder_fd via SCM_RIGHTs from initial ipc namespace to
non-initial ipc namespace.

/* in non-initial ipc namespace */
new_binder_fd = open(/proc/self/binder_fd);

What ipc namespace do you intend new_binder_fd to refer to? It seems
a re-open on such an fd would switch ipc namespaces. I find that odd
semantics. Are there other things that behave that way? If you send a
pty fd to another namespace it won't switch mount and user namespaces
just because you re-open it via /proc.

>         proc->default_priority = task_nice(current);
>         binder_dev = container_of(filp->private_data, struct binder_device,
>                                   miscdev);
> -       proc->context = &binder_dev->context;
> +       hlist_for_each_entry(context, &ipcns->binder_contexts, hlist) {
> +               if (context->device == binder_dev->miscdev.minor) {
> +                       proc->context = context;
> +                       break;
> +               }
> +       }
> +       if (!proc->context)
> +               return -ENOENT;
> +
>         binder_alloc_init(&proc->alloc);
>  
>         binder_stats_created(BINDER_STAT_PROC);
> @@ -4946,9 +5006,9 @@ static int binder_open(struct inode *nodp, struct file *filp)
>         INIT_LIST_HEAD(&proc->waiting_threads);
>         filp->private_data = proc;
>  
> -       mutex_lock(&binder_procs_lock);
> -       hlist_add_head(&proc->proc_node, &binder_procs);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>  
>         if (binder_debugfs_dir_entry_proc) {
>                 char strbuf[11];
> @@ -5082,9 +5142,9 @@ static void binder_deferred_release(struct binder_proc *proc)
>         struct rb_node *n;
>         int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
>  
> -       mutex_lock(&binder_procs_lock);
> +       mutex_lock(&ipcns->binder_procs_lock);
>         hlist_del(&proc->proc_node);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>  
>         mutex_lock(&context->context_mgr_node_lock);
>         if (context->binder_context_mgr_node &&
> @@ -5377,7 +5437,7 @@ static void print_binder_proc(struct seq_file *m,
>         struct binder_node *last_node = NULL;
>  
>         seq_printf(m, "proc %d\n", proc->pid);
> -       seq_printf(m, "context %s\n", proc->context->name);
> +       seq_printf(m, "context %d\n", proc->context->device);
>         header_pos = m->count;
>  
>         binder_inner_proc_lock(proc);
> @@ -5538,7 +5598,7 @@ static void print_binder_proc_stats(struct seq_file *m,
>                 binder_alloc_get_free_async_space(&proc->alloc);
>  
>         seq_printf(m, "proc %d\n", proc->pid);
> -       seq_printf(m, "context %s\n", proc->context->name);
> +       seq_printf(m, "context %d\n", proc->context->device);
>         count = 0;
>         ready_threads = 0;
>         binder_inner_proc_lock(proc);
> @@ -5623,10 +5683,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
>         if (last_node)
>                 binder_put_node(last_node);
>  
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc(m, proc, 1);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>  
>         return 0;
>  }
> @@ -5639,10 +5699,10 @@ static int binder_stats_show(struct seq_file *m, void *unused)
>  
>         print_binder_stats(m, "", &binder_stats);
>  
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc_stats(m, proc);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>  
>         return 0;
>  }
> @@ -5652,10 +5712,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
>         struct binder_proc *proc;
>  
>         seq_puts(m, "binder transactions:\n");
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc(m, proc, 0);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>  
>         return 0;
>  }
> @@ -5665,14 +5725,14 @@ static int binder_proc_show(struct seq_file *m, void *unused)
>         struct binder_proc *itr;
>         int pid = (unsigned long)m->private;
>  
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(itr, &binder_procs, proc_node) {
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
>                 if (itr->pid == pid) {
>                         seq_puts(m, "binder proc state:\n");
>                         print_binder_proc(m, itr, 1);
>                 }
>         }
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>  
>         return 0;
>  }
> @@ -5687,10 +5747,10 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
>          */
>         smp_rmb();
>         seq_printf(m,
> -                  "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
> +                  "%d: %s from %d:%d to %d:%d context %d node %d handle %d size %d:%d ret %d/%d l=%d",
>                    e->debug_id, (e->call_type == 2) ? "reply" :
>                    ((e->call_type == 1) ? "async" : "call "), e->from_proc,
> -                  e->from_thread, e->to_proc, e->to_thread, e->context_name,
> +                  e->from_thread, e->to_proc, e->to_thread, e->context_device,
>                    e->to_node, e->target_handle, e->data_size, e->offsets_size,
>                    e->return_error, e->return_error_param,
>                    e->return_error_line);
> @@ -5753,10 +5813,6 @@ static int __init init_binder_device(const char *name)
>         binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
>         binder_device->miscdev.name = name;
>  
> -       binder_device->context.binder_context_mgr_uid = INVALID_UID;
> -       binder_device->context.name = name;
> -       mutex_init(&binder_device->context.context_mgr_node_lock);
> -
>         ret = misc_register(&binder_device->miscdev);
>         if (ret < 0) {
>                 kfree(binder_device);
> @@ -5832,8 +5888,12 @@ static int __init binder_init(void)
>                         goto err_init_binder_device_failed;
>         }
>  
> -       return ret;
> +       ret = binder_init_ns(&init_ipc_ns);
> +       if (ret)
> +               goto err_init_namespace_failed;
>  
> +       return ret;
> +err_init_namespace_failed:
>  err_init_binder_device_failed:
>         hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
>                 misc_deregister(&device->miscdev);
> diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
> index 6ab8c1bada3f..d7f850a2ded8 100644
> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -63,6 +63,13 @@ struct ipc_namespace {
>         unsigned int    mq_msg_default;
>         unsigned int    mq_msgsize_default;
>  
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> +       /* next fields are for binder */
> +       struct mutex      binder_procs_lock;
> +       struct hlist_head binder_procs;
> +       struct hlist_head binder_contexts;
> +#endif
> +
>         /* user_ns which owns the ipc ns */
>         struct user_namespace *user_ns;
>         struct ucounts *ucounts;
> @@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
>  static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
>  #endif
>  
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> +extern int binder_init_ns(struct ipc_namespace *ns);
> +extern void binder_exit_ns(struct ipc_namespace *ns);
> +#else
> +static inline int binder_init_ns(struct ipc_namespace *ns) { return 0; }
> +static inline void binder_exit_ns(struct ipc_namespace *ns) { }
> +#endif
> +
>  #if defined(CONFIG_IPC_NS)
>  extern struct ipc_namespace *copy_ipcs(unsigned long flags,
>         struct user_namespace *user_ns, struct ipc_namespace *ns);
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 21607791d62c..68c6e983b002 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>  
>         err = mq_init_ns(ns);
>         if (err)
> -               goto fail_put;
> +               goto fail_init_mq;
> +       err = binder_init_ns(ns);
> +       if (err)
> +               goto fail_init_binder;
>  
>         sem_init_ns(ns);
>         msg_init_ns(ns);
> @@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>  
>         return ns;
>  
> -fail_put:
> +fail_init_binder:
> +       mq_put_mnt(ns);
> +fail_init_mq:
>         put_user_ns(ns->user_ns);
>         ns_free_inum(&ns->ns);
>  fail_free:
> @@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
>         sem_exit_ns(ns);
>         msg_exit_ns(ns);
>         shm_exit_ns(ns);
> +       binder_exit_ns(ns);
>  
>         dec_ipc_namespaces(ns->ucounts);
>         put_user_ns(ns->user_ns);
> -- 
> 2.11.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH V3] binder: ipc namespace support for android binder
  2018-11-08 13:02 [PATCH V3] binder: ipc namespace support for android binder chouryzhou(周威)
  2018-11-08 14:28 ` Christian Brauner
@ 2018-11-09 17:54 ` Todd Kjos
  2018-11-10  3:09   ` chouryzhou(周威)
  2018-11-09 18:26 ` [PATCH V3] binder: ipc namespace support for android binder Davidlohr Bueso
  2 siblings, 1 reply; 12+ messages in thread
From: Todd Kjos @ 2018-11-09 17:54 UTC (permalink / raw)
  To: chouryzhou
  Cc: Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos, akpm,
	dave, open list:ANDROID DRIVERS, LKML

On Thu, Nov 8, 2018 at 5:02 AM chouryzhou(周威) <chouryzhou@tencent.com> wrote:
>
>   We are working for running android in container, but we found that binder is
> not isolated by ipc namespace. Since binder is a form of IPC and therefore should
> be tied to ipc namespace. With this patch, we can run more than one android
> container on one host.
>   This patch move "binder_procs" and "binder_context" into ipc_namespace,
> driver will find the context from it when opening. Although statistics in debugfs
> remain global.
>
> Signed-off-by: chouryzhou <chouryzhou@tencent.com>
> ---
>  drivers/android/binder.c      | 128 +++++++++++++++++++++++++++++++-----------
>  include/linux/ipc_namespace.h |  15 +++++
>  ipc/namespace.c               |  10 +++-
>  3 files changed, 117 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index cb30a524d16d..22e45bb937e6 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -68,6 +68,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/uaccess.h>
>  #include <linux/pid_namespace.h>
> +#include <linux/ipc_namespace.h>
>  #include <linux/security.h>
>  #include <linux/spinlock.h>
>  #include <linux/ratelimit.h>
> @@ -80,13 +81,18 @@
>  #include "binder_alloc.h"
>  #include "binder_trace.h"
>
> +
> +#if !defined(CONFIG_SYSVIPC) &&  !defined(CONFIG_POSIX_MQUEUE)

I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE.
It seems like this mechanism would work even if both are disabled --
as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and
allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to
"#ifndef CONFIG_IPC_NS"

> +struct ipc_namespace init_ipc_ns;
> +#define ipcns  (&init_ipc_ns)
> +#else
> +#define ipcns  (current->nsproxy->ipc_ns)
> +#endif
> +
>  static HLIST_HEAD(binder_deferred_list);
>  static DEFINE_MUTEX(binder_deferred_lock);
>
>  static HLIST_HEAD(binder_devices);
> -static HLIST_HEAD(binder_procs);
> -static DEFINE_MUTEX(binder_procs_lock);
> -
>  static HLIST_HEAD(binder_dead_nodes);
>  static DEFINE_SPINLOCK(binder_dead_nodes_lock);
>
> @@ -232,7 +238,7 @@ struct binder_transaction_log_entry {
>         int return_error_line;
>         uint32_t return_error;
>         uint32_t return_error_param;
> -       const char *context_name;
> +       int context_device;
>  };
>  struct binder_transaction_log {
>         atomic_t cur;
> @@ -263,19 +269,64 @@ static struct binder_transaction_log_entry *binder_transaction_log_add(
>  }
>
>  struct binder_context {
> +       struct hlist_node hlist;
>         struct binder_node *binder_context_mgr_node;
>         struct mutex context_mgr_node_lock;
>
>         kuid_t binder_context_mgr_uid;
> -       const char *name;
> +       int    device;
>  };
>
>  struct binder_device {
>         struct hlist_node hlist;
>         struct miscdevice miscdev;
> -       struct binder_context context;
>  };
>
> +void binder_exit_ns(struct ipc_namespace *ns)
> +{
> +       struct binder_context *context;
> +       struct hlist_node *tmp;
> +
> +       mutex_destroy(&ns->binder_procs_lock);
> +       hlist_for_each_entry_safe(context, tmp, &ns->binder_contexts, hlist) {
> +               mutex_destroy(&context->context_mgr_node_lock);
> +               hlist_del(&context->hlist);
> +               kfree(context);
> +       }
> +}
> +
> +int binder_init_ns(struct ipc_namespace *ns)
> +{
> +       int ret;
> +       struct binder_device *device;
> +
> +       mutex_init(&ns->binder_procs_lock);
> +       INIT_HLIST_HEAD(&ns->binder_procs);
> +       INIT_HLIST_HEAD(&ns->binder_contexts);
> +
> +       hlist_for_each_entry(device, &binder_devices, hlist) {
> +               struct binder_context *context;
> +
> +               context = kzalloc(sizeof(*context), GFP_KERNEL);
> +               if (!context) {
> +                       ret = -ENOMEM;
> +                       goto err;
> +               }
> +
> +               context->device = device->miscdev.minor;
> +               context->binder_context_mgr_uid = INVALID_UID;
> +               mutex_init(&context->context_mgr_node_lock);
> +
> +               hlist_add_head(&context->hlist, &ns->binder_contexts);
> +       }
> +
> +       return 0;
> +err:
> +       binder_exit_ns(ns);
> +       return ret;
> +}
> +
> +
>  /**
>   * struct binder_work - work enqueued on a worklist
>   * @entry:             node enqueued on list
> @@ -2727,7 +2778,7 @@ static void binder_transaction(struct binder_proc *proc,
>         e->target_handle = tr->target.handle;
>         e->data_size = tr->data_size;
>         e->offsets_size = tr->offsets_size;
> -       e->context_name = proc->context->name;
> +       e->context_device = proc->context->device;

why eliminate name? The string name is very useful for differentiating
normal "framework" binder transactions vs "hal" or "vendor"
transactions. If we just have a device number it will be hard to tell
in the logs even which namespace it belongs to. We need to keep both
the "name" (for which there might be multiple in each ns) and some
indication of which namespace this is. Maybe we assign some sort of
namespace ID during binder_init_ns().

>
>         if (reply) {
>                 binder_inner_proc_lock(proc);
> @@ -4922,6 +4973,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
>  {
>         struct binder_proc *proc;
>         struct binder_device *binder_dev;
> +       struct binder_context *context;
>
>         binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__,
>                      current->group_leader->pid, current->pid);
> @@ -4937,7 +4989,15 @@ static int binder_open(struct inode *nodp, struct file *filp)
>         proc->default_priority = task_nice(current);
>         binder_dev = container_of(filp->private_data, struct binder_device,
>                                   miscdev);
> -       proc->context = &binder_dev->context;
> +       hlist_for_each_entry(context, &ipcns->binder_contexts, hlist) {
> +               if (context->device == binder_dev->miscdev.minor) {
> +                       proc->context = context;
> +                       break;
> +               }
> +       }
> +       if (!proc->context)
> +               return -ENOENT;
> +
>         binder_alloc_init(&proc->alloc);
>
>         binder_stats_created(BINDER_STAT_PROC);
> @@ -4946,9 +5006,9 @@ static int binder_open(struct inode *nodp, struct file *filp)
>         INIT_LIST_HEAD(&proc->waiting_threads);
>         filp->private_data = proc;
>
> -       mutex_lock(&binder_procs_lock);
> -       hlist_add_head(&proc->proc_node, &binder_procs);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_add_head(&proc->proc_node, &ipcns->binder_procs);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         if (binder_debugfs_dir_entry_proc) {
>                 char strbuf[11];
> @@ -5082,9 +5142,9 @@ static void binder_deferred_release(struct binder_proc *proc)
>         struct rb_node *n;
>         int threads, nodes, incoming_refs, outgoing_refs, active_transactions;
>
> -       mutex_lock(&binder_procs_lock);
> +       mutex_lock(&ipcns->binder_procs_lock);
>         hlist_del(&proc->proc_node);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         mutex_lock(&context->context_mgr_node_lock);
>         if (context->binder_context_mgr_node &&
> @@ -5377,7 +5437,7 @@ static void print_binder_proc(struct seq_file *m,
>         struct binder_node *last_node = NULL;
>
>         seq_printf(m, "proc %d\n", proc->pid);
> -       seq_printf(m, "context %s\n", proc->context->name);
> +       seq_printf(m, "context %d\n", proc->context->device);

As mentioned above, we need to retain name and probably also want a ns
id of some sort. So context now has 3 components if IPC_NS, so maybe a
helper function to print context like:

static void binder_seq_print_context(struct seq_file *m, struct
binder_context *context)
{
#ifdef CONFIG_IPC_NS
          seq_printf(m, "%d-%d-%s", context->ns_id, context->device,
context->name);
#else
          seq_printf(m, "%d", context->name);
#endif
}

(same comment below everywhere context is printed)

Should these debugfs nodes should be ns aware and only print debugging
info for the context of the thread accessing the node? If so, we would
also want to be namespace-aware when printing pids.

>         header_pos = m->count;
>
>         binder_inner_proc_lock(proc);
> @@ -5538,7 +5598,7 @@ static void print_binder_proc_stats(struct seq_file *m,
>                 binder_alloc_get_free_async_space(&proc->alloc);
>
>         seq_printf(m, "proc %d\n", proc->pid);
> -       seq_printf(m, "context %s\n", proc->context->name);
> +       seq_printf(m, "context %d\n", proc->context->device);
>         count = 0;
>         ready_threads = 0;
>         binder_inner_proc_lock(proc);
> @@ -5623,10 +5683,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
>         if (last_node)
>                 binder_put_node(last_node);
>
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc(m, proc, 1);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         return 0;
>  }
> @@ -5639,10 +5699,10 @@ static int binder_stats_show(struct seq_file *m, void *unused)
>
>         print_binder_stats(m, "", &binder_stats);
>
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc_stats(m, proc);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         return 0;
>  }
> @@ -5652,10 +5712,10 @@ static int binder_transactions_show(struct seq_file *m, void *unused)
>         struct binder_proc *proc;
>
>         seq_puts(m, "binder transactions:\n");
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(proc, &binder_procs, proc_node)
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(proc, &ipcns->binder_procs, proc_node)
>                 print_binder_proc(m, proc, 0);
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         return 0;
>  }
> @@ -5665,14 +5725,14 @@ static int binder_proc_show(struct seq_file *m, void *unused)
>         struct binder_proc *itr;
>         int pid = (unsigned long)m->private;
>
> -       mutex_lock(&binder_procs_lock);
> -       hlist_for_each_entry(itr, &binder_procs, proc_node) {
> +       mutex_lock(&ipcns->binder_procs_lock);
> +       hlist_for_each_entry(itr, &ipcns->binder_procs, proc_node) {
>                 if (itr->pid == pid) {
>                         seq_puts(m, "binder proc state:\n");
>                         print_binder_proc(m, itr, 1);
>                 }
>         }
> -       mutex_unlock(&binder_procs_lock);
> +       mutex_unlock(&ipcns->binder_procs_lock);
>
>         return 0;
>  }
> @@ -5687,10 +5747,10 @@ static void print_binder_transaction_log_entry(struct seq_file *m,
>          */
>         smp_rmb();
>         seq_printf(m,
> -                  "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d",
> +                  "%d: %s from %d:%d to %d:%d context %d node %d handle %d size %d:%d ret %d/%d l=%d",
>                    e->debug_id, (e->call_type == 2) ? "reply" :
>                    ((e->call_type == 1) ? "async" : "call "), e->from_proc,
> -                  e->from_thread, e->to_proc, e->to_thread, e->context_name,
> +                  e->from_thread, e->to_proc, e->to_thread, e->context_device,
>                    e->to_node, e->target_handle, e->data_size, e->offsets_size,
>                    e->return_error, e->return_error_param,
>                    e->return_error_line);
> @@ -5753,10 +5813,6 @@ static int __init init_binder_device(const char *name)
>         binder_device->miscdev.minor = MISC_DYNAMIC_MINOR;
>         binder_device->miscdev.name = name;
>
> -       binder_device->context.binder_context_mgr_uid = INVALID_UID;
> -       binder_device->context.name = name;
> -       mutex_init(&binder_device->context.context_mgr_node_lock);
> -
>         ret = misc_register(&binder_device->miscdev);
>         if (ret < 0) {
>                 kfree(binder_device);
> @@ -5832,8 +5888,12 @@ static int __init binder_init(void)
>                         goto err_init_binder_device_failed;
>         }
>
> -       return ret;
> +       ret = binder_init_ns(&init_ipc_ns);
> +       if (ret)
> +               goto err_init_namespace_failed;
>
> +       return ret;
> +err_init_namespace_failed:
>  err_init_binder_device_failed:
>         hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) {
>                 misc_deregister(&device->miscdev);
> diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
> index 6ab8c1bada3f..d7f850a2ded8 100644
> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -63,6 +63,13 @@ struct ipc_namespace {
>         unsigned int    mq_msg_default;
>         unsigned int    mq_msgsize_default;
>
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> +       /* next fields are for binder */
> +       struct mutex      binder_procs_lock;
> +       struct hlist_head binder_procs;
> +       struct hlist_head binder_contexts;
> +#endif
> +
>         /* user_ns which owns the ipc ns */
>         struct user_namespace *user_ns;
>         struct ucounts *ucounts;
> @@ -118,6 +125,14 @@ extern int mq_init_ns(struct ipc_namespace *ns);
>  static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
>  #endif
>
> +#ifdef CONFIG_ANDROID_BINDER_IPC
> +extern int binder_init_ns(struct ipc_namespace *ns);
> +extern void binder_exit_ns(struct ipc_namespace *ns);
> +#else
> +static inline int binder_init_ns(struct ipc_namespace *ns) { return 0; }
> +static inline void binder_exit_ns(struct ipc_namespace *ns) { }
> +#endif

I'm fine with this if the namespace.[hc] maintainers are, but would it
be better to add some sort of registration function so we don't need
binder-specific stuff here?

A version of this was proposed by Oren Laadan in:

https://lists.linuxfoundation.org/pipermail/containers/2013-December/033834.html

and then updated for 4.9 in a proposal in AOSP:

https://android-review.googlesource.com/c/kernel/common/+/471961

There will be some discussions on containerized binder support at LPC,
so we'll want to wait until after LPC before moving ahead with this.

-Todd


> +
>  #if defined(CONFIG_IPC_NS)
>  extern struct ipc_namespace *copy_ipcs(unsigned long flags,
>         struct user_namespace *user_ns, struct ipc_namespace *ns);
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 21607791d62c..68c6e983b002 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -57,7 +57,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>
>         err = mq_init_ns(ns);
>         if (err)
> -               goto fail_put;
> +               goto fail_init_mq;
> +       err = binder_init_ns(ns);
> +       if (err)
> +               goto fail_init_binder;
>
>         sem_init_ns(ns);
>         msg_init_ns(ns);
> @@ -65,7 +68,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
>
>         return ns;
>
> -fail_put:
> +fail_init_binder:
> +       mq_put_mnt(ns);
> +fail_init_mq:
>         put_user_ns(ns->user_ns);
>         ns_free_inum(&ns->ns);
>  fail_free:
> @@ -120,6 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
>         sem_exit_ns(ns);
>         msg_exit_ns(ns);
>         shm_exit_ns(ns);
> +       binder_exit_ns(ns);
>
>         dec_ipc_namespaces(ns->ucounts);
>         put_user_ns(ns->user_ns);
> --
> 2.11.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH V3] binder: ipc namespace support for android binder
  2018-11-08 13:02 [PATCH V3] binder: ipc namespace support for android binder chouryzhou(周威)
  2018-11-08 14:28 ` Christian Brauner
  2018-11-09 17:54 ` Todd Kjos
@ 2018-11-09 18:26 ` Davidlohr Bueso
  2018-11-09 19:03   ` Todd Kjos
  2 siblings, 1 reply; 12+ messages in thread
From: Davidlohr Bueso @ 2018-11-09 18:26 UTC (permalink / raw)
  To: chouryzhou(??????); +Cc: gregkh, arve, tkjos, akpm, devel, linux-kernel

On Thu, 08 Nov 2018, chouryzhou(??????) wrote:

>+#ifdef CONFIG_ANDROID_BINDER_IPC
>+       /* next fields are for binder */
>+       struct mutex      binder_procs_lock;
>+       struct hlist_head binder_procs;
>+       struct hlist_head binder_contexts;
>+#endif

Now, I took a look at how the binder_procs list is used; and no, what
follows isn't really related to this patch, but a general observation.

I think that a mutex is also an overkill and you might wanna replace it
with a spinlock/rwlock. Can anything block while holding the binder_procs_lock?
I don't see anything... you mainly need it for consulting the hlist calling
print_binder_proc[_stat]() - which will take the proc->inner_lock anyway, so
no blocking there. Also, if this is perhaps because of long hold times, dunno,
the rb_first_cached primitive might reduce some of it, although I don't know
how big the rbtrees in binder can get and if it matters at all.

Anyway, that said and along with addressing Todd's comments, the ipc/ bits look
good. Feel free to add my:

Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH V3] binder: ipc namespace support for android binder
  2018-11-09 18:26 ` [PATCH V3] binder: ipc namespace support for android binder Davidlohr Bueso
@ 2018-11-09 19:03   ` Todd Kjos
  2018-11-09 19:16     ` Davidlohr Bueso
  0 siblings, 1 reply; 12+ messages in thread
From: Todd Kjos @ 2018-11-09 19:03 UTC (permalink / raw)
  To: dave
  Cc: chouryzhou, Greg Kroah-Hartman, Arve Hjønnevåg,
	Todd Kjos, akpm, open list:ANDROID DRIVERS, LKML

On Fri, Nov 9, 2018 at 10:27 AM Davidlohr Bueso <dave@stgolabs.net> wrote:
>
> On Thu, 08 Nov 2018, chouryzhou(??????) wrote:
>
> >+#ifdef CONFIG_ANDROID_BINDER_IPC
> >+       /* next fields are for binder */
> >+       struct mutex      binder_procs_lock;
> >+       struct hlist_head binder_procs;
> >+       struct hlist_head binder_contexts;
> >+#endif
>
> Now, I took a look at how the binder_procs list is used; and no, what
> follows isn't really related to this patch, but a general observation.
>
> I think that a mutex is also an overkill and you might wanna replace it
> with a spinlock/rwlock. Can anything block while holding the binder_procs_lock?
> I don't see anything... you mainly need it for consulting the hlist calling
> print_binder_proc[_stat]() - which will take the proc->inner_lock anyway, so
> no blocking there.

print_binder_proc() drops proc->inner_lock and calls
binder_alloc_print_allocated() which acquires proc->alloc->mutex.
Likewise, print_binder_stats() calls print_binder_proc_stats() which
drops its locks to call binder_alloc_print_pages() which also acquires
proc->alloc->mutex. So binder_procs_lock needs to be a mutex since it
can block on proc->alloc->mutex.

> Also, if this is perhaps because of long hold times, dunno,
> the rb_first_cached primitive might reduce some of it, although I don't know
> how big the rbtrees in binder can get and if it matters at all.
>
> Anyway, that said and along with addressing Todd's comments, the ipc/ bits look
> good. Feel free to add my:
>
> Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
>
> Thanks,
> Davidlohr

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH V3] binder: ipc namespace support for android binder
  2018-11-09 19:03   ` Todd Kjos
@ 2018-11-09 19:16     ` Davidlohr Bueso
  0 siblings, 0 replies; 12+ messages in thread
From: Davidlohr Bueso @ 2018-11-09 19:16 UTC (permalink / raw)
  To: Todd Kjos
  Cc: chouryzhou, Greg Kroah-Hartman, Arve Hj?nnev?g, Todd Kjos, akpm,
	open list:ANDROID DRIVERS, LKML

On Fri, 09 Nov 2018, Todd Kjos wrote:
>
>print_binder_proc() drops proc->inner_lock and calls
>binder_alloc_print_allocated() which acquires proc->alloc->mutex.
>Likewise, print_binder_stats() calls print_binder_proc_stats() which
>drops its locks to call binder_alloc_print_pages() which also acquires
>proc->alloc->mutex. So binder_procs_lock needs to be a mutex since it
>can block on proc->alloc->mutex.

Ah, very good then.

Thanks,
Davidlohr

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Re: [PATCH V3] binder: ipc namespace support for android binder
  2018-11-09 17:54 ` Todd Kjos
@ 2018-11-10  3:09   ` chouryzhou(周威)
  2018-11-10  4:16     ` Todd Kjos
  0 siblings, 1 reply; 12+ messages in thread
From: chouryzhou(周威) @ 2018-11-10  3:09 UTC (permalink / raw)
  To: tkjos; +Cc: gregkh, arve, tkjos, akpm, dave, devel, linux-kernel, chouryzhou

> 
> I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE.
> It seems like this mechanism would work even if both are disabled --
> as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and
> allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to
> "#ifndef CONFIG_IPC_NS"

Let me explain it in detail. If SYSIPC and IPC_NS are both defined,  
current->nsproxy->ipc_ns will save the ipc namespace variables. We just use 
it. If SYSIPC (or POSIX_MQUEUE) is defined while IPC_NS is not set, 
current->nsproxy->ipc_ns will always refer to init_ipc_ns in ipc/msgutil.c, 
which is also fine to us. But if neither SYSIPC nor POSIX_MQUEUE is set 
(IPC_NS can't be set in this situation), there is no current->nsproxy->ipc_ns.
We make a fack init_ipc_ns here and use it.

> why eliminate name? The string name is very useful for differentiating
> normal "framework" binder transactions vs "hal" or "vendor"
> transactions. If we just have a device number it will be hard to tell
> in the logs even which namespace it belongs to. We need to keep both
> the "name" (for which there might be multiple in each ns) and some
> indication of which namespace this is. Maybe we assign some sort of
> namespace ID during binder_init_ns().

 I will remain the name of device. The  inum of ipc_ns can be treated as 
namespace ID in ipc_ns.

> As mentioned above, we need to retain name and probably also want a ns
> id of some sort. So context now has 3 components if IPC_NS, so maybe a
> helper function to print context like:
> 
> static void binder_seq_print_context(struct seq_file *m, struct
> binder_context *context)
> {
> #ifdef CONFIG_IPC_NS
>           seq_printf(m, "%d-%d-%s", context->ns_id, context->device,
> context->name);
> #else
>           seq_printf(m, "%d", context->name);
> #endif
> }
> 
> (same comment below everywhere context is printed)
> 
> Should these debugfs nodes should be ns aware and only print debugging
> info for the context of the thread accessing the node? If so, we would
> also want to be namespace-aware when printing pids.

Nowadays, debugfs is not namespace-ized, and pid namespace is not associated 
with ipc namespace.  Will it be more complicated to debug this if we just print 
the info for current thread? Because we will have to enter the ipc namespace 
firstly. But add ipc inum should be no problem.

- choury -



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Re: [PATCH V3] binder: ipc namespace support for android binder
  2018-11-10  3:09   ` chouryzhou(周威)
@ 2018-11-10  4:16     ` Todd Kjos
  2018-11-10  4:43       ` Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail) chouryzhou(周威)
  0 siblings, 1 reply; 12+ messages in thread
From: Todd Kjos @ 2018-11-10  4:16 UTC (permalink / raw)
  To: chouryzhou
  Cc: Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos, akpm,
	dave, open list:ANDROID DRIVERS, LKML, chouryzhou

On Fri, Nov 9, 2018 at 7:09 PM chouryzhou(周威) <chouryzhou@tencent.com> wrote:
>
> >
> > I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE.
> > It seems like this mechanism would work even if both are disabled --
> > as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and
> > allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to
> > "#ifndef CONFIG_IPC_NS"
>
> Let me explain it in detail. If SYSIPC and IPC_NS are both defined,
> current->nsproxy->ipc_ns will save the ipc namespace variables. We just use
> it. If SYSIPC (or POSIX_MQUEUE) is defined while IPC_NS is not set,
> current->nsproxy->ipc_ns will always refer to init_ipc_ns in ipc/msgutil.c,
> which is also fine to us. But if neither SYSIPC nor POSIX_MQUEUE is set
> (IPC_NS can't be set in this situation), there is no current->nsproxy->ipc_ns.
> We make a fack init_ipc_ns here and use it.

Yes, I can read the code. I'm wondering specifically about SYSVIPC and
POSIX_MQUEUE. Even with your code changes, binder has no dependency on
these configs. Why are you creating one? The actual dependency with
your changes is on "current->nsproxy->ipc_ns" being initialized for
binder -- which is dependent on CONFIG_IPC_NS being enabled, isn't it?

If SYSVIPC or POSIX_MQUEUE are enabled, but IPC_NS is disabled, does this work?

>
> > why eliminate name? The string name is very useful for differentiating
> > normal "framework" binder transactions vs "hal" or "vendor"
> > transactions. If we just have a device number it will be hard to tell
> > in the logs even which namespace it belongs to. We need to keep both
> > the "name" (for which there might be multiple in each ns) and some
> > indication of which namespace this is. Maybe we assign some sort of
> > namespace ID during binder_init_ns().
>
>  I will remain the name of device. The  inum of ipc_ns can be treated as
> namespace ID in ipc_ns.
>
> > As mentioned above, we need to retain name and probably also want a ns
> > id of some sort. So context now has 3 components if IPC_NS, so maybe a
> > helper function to print context like:
> >
> > static void binder_seq_print_context(struct seq_file *m, struct
> > binder_context *context)
> > {
> > #ifdef CONFIG_IPC_NS
> >           seq_printf(m, "%d-%d-%s", context->ns_id, context->device,
> > context->name);
> > #else
> >           seq_printf(m, "%d", context->name);
> > #endif
> > }
> >
> > (same comment below everywhere context is printed)
> >
> > Should these debugfs nodes should be ns aware and only print debugging
> > info for the context of the thread accessing the node? If so, we would
> > also want to be namespace-aware when printing pids.
>
> Nowadays, debugfs is not namespace-ized, and pid namespace is not associated
> with ipc namespace.  Will it be more complicated to debug this if we just print
> the info for current thread? Because we will have to enter the ipc namespace
> firstly. But add ipc inum should be no problem.

With the name and ns id, debugging would be fine from the host-side. I
don't understand the container use cases enough to know if you need to
be able to debug binder transaction failures from within the container
-- in which case it seems like you'd want the container-version of the
PIDs -- but obviously this depends on how the containers are set up
and what the use-cases really are. I'm ok with leaving that for a
later patch.

>
> - choury -
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail)
  2018-11-10  4:16     ` Todd Kjos
@ 2018-11-10  4:43       ` chouryzhou(周威)
  2018-11-10  5:24         ` Todd Kjos
  0 siblings, 1 reply; 12+ messages in thread
From: chouryzhou(周威) @ 2018-11-10  4:43 UTC (permalink / raw)
  To: tkjos; +Cc: gregkh, arve, tkjos, akpm, dave, devel, linux-kernel, chouryzhou


> > > I still don't understand the dependencies on SYSVIPC or POSIX_MQUEUE.
> > > It seems like this mechanism would work even if both are disabled --
> > > as long as IPC_NS is enabled. Seems cleaner to change init/Kconfig and
> > > allow IPC_NS if CONFIG_ANDROID_BINDER_IPC and change this line to
> > > "#ifndef CONFIG_IPC_NS"
> >
> > Let me explain it in detail. If SYSIPC and IPC_NS are both defined,
> > current->nsproxy->ipc_ns will save the ipc namespace variables. We just use
> > it. If SYSIPC (or POSIX_MQUEUE) is defined while IPC_NS is not set,
> > current->nsproxy->ipc_ns will always refer to init_ipc_ns in ipc/msgutil.c,
> > which is also fine to us. But if neither SYSIPC nor POSIX_MQUEUE is set
> > (IPC_NS can't be set in this situation), there is no current->nsproxy->ipc_ns.
> > We make a fack init_ipc_ns here and use it.
> 
> Yes, I can read the code. I'm wondering specifically about SYSVIPC and
> POSIX_MQUEUE. Even with your code changes, binder has no dependency on
> these configs. Why are you creating one? The actual dependency with
> your changes is on "current->nsproxy->ipc_ns" being initialized for
> binder -- which is dependent on CONFIG_IPC_NS being enabled, isn't it?
> 
> If SYSVIPC or POSIX_MQUEUE are enabled, but IPC_NS is disabled, does this work? 

If IPC_NS is disabled, "current-nsporxy->ipc_ns" will also exists,  it will be a static 
reference of "init_ipc_ns" (in ipc/msgutil.c, not defined in binder.c by me) with 
no namespace-ization. You will get the same one in all processes, everything is 
the same as  without this patch.

- choury -


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail)
  2018-11-10  4:43       ` Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail) chouryzhou(周威)
@ 2018-11-10  5:24         ` Todd Kjos
  2018-11-10  5:43           ` chouryzhou(周威)
  0 siblings, 1 reply; 12+ messages in thread
From: Todd Kjos @ 2018-11-10  5:24 UTC (permalink / raw)
  To: chouryzhou
  Cc: Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos, akpm,
	dave, open list:ANDROID DRIVERS, LKML, chouryzhou

On Fri, Nov 9, 2018 at 8:43 PM chouryzhou(周威) <chouryzhou@tencent.com> wrote:

>
> If IPC_NS is disabled, "current-nsporxy->ipc_ns" will also exists,  it will be a static
> reference of "init_ipc_ns" (in ipc/msgutil.c, not defined in binder.c by me) with
> no namespace-ization. You will get the same one in all processes, everything is
> the same as  without this patch.

except, as far as I can tell, binder_init_ns() would never have been
called on it so the mutex and list heads are not initialized so its
completely broken. Am I missing something? How do those fields get
initialized in this case?

>
> - choury -
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail)
  2018-11-10  5:24         ` Todd Kjos
@ 2018-11-10  5:43           ` chouryzhou(周威)
  2018-11-10  6:38             ` Todd Kjos
  0 siblings, 1 reply; 12+ messages in thread
From: chouryzhou(周威) @ 2018-11-10  5:43 UTC (permalink / raw)
  To: tkjos; +Cc: gregkh, arve, tkjos, akpm, dave, devel, linux-kernel, chouryzhou

> >
> > If IPC_NS is disabled, "current-nsporxy->ipc_ns" will also exists,  it will be a static
> > reference of "init_ipc_ns" (in ipc/msgutil.c, not defined in binder.c by me) with
> > no namespace-ization. You will get the same one in all processes, everything is
> > the same as  without this patch.
> 
> except, as far as I can tell, binder_init_ns() would never have been
> called on it so the mutex and list heads are not initialized so its
> completely broken. Am I missing something? How do those fields get
> initialized in this case?

> @@ -5832,8 +5888,12 @@ static int __init binder_init(void)
>                         goto err_init_binder_device_failed;
>         }
>
> -       return ret;
> +       ret = binder_init_ns(&init_ipc_ns);
> +       if (ret)
> +               goto err_init_namespace_failed;
>
> +       return ret;

They are initialized here.

- choury -


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail)
  2018-11-10  5:43           ` chouryzhou(周威)
@ 2018-11-10  6:38             ` Todd Kjos
  0 siblings, 0 replies; 12+ messages in thread
From: Todd Kjos @ 2018-11-10  6:38 UTC (permalink / raw)
  To: chouryzhou
  Cc: Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos, akpm,
	dave, open list:ANDROID DRIVERS, LKML, chouryzhou

On Fri, Nov 9, 2018 at 9:43 PM chouryzhou(周威) <chouryzhou@tencent.com> wrote:
>
> > >
> > > If IPC_NS is disabled, "current-nsporxy->ipc_ns" will also exists,  it will be a static
> > > reference of "init_ipc_ns" (in ipc/msgutil.c, not defined in binder.c by me) with
> > > no namespace-ization. You will get the same one in all processes, everything is
> > > the same as  without this patch.
> >
> > except, as far as I can tell, binder_init_ns() would never have been
> > called on it so the mutex and list heads are not initialized so its
> > completely broken. Am I missing something? How do those fields get
> > initialized in this case?
>
> > @@ -5832,8 +5888,12 @@ static int __init binder_init(void)
> >                         goto err_init_binder_device_failed;
> >         }
> >
> > -       return ret;
> > +       ret = binder_init_ns(&init_ipc_ns);
> > +       if (ret)
> > +               goto err_init_namespace_failed;
> >
> > +       return ret;
>
> They are initialized here.

Ok, This init_ipc_ns is a global declared in msgutil.c if SYSVIPC ||
POSIX_MQUEUE. This seems kinda hacky, but now I finally see why the
dependancy... msgutil.c is the only file we can count on if !IPC_NS &&
(SYSVIPC || POSIX_MQUEUE). There must be a cleaner way to do this, I
really don't like this dependency... wouldn't it be cleaner to do:

#ifndef CONFIG_IPC_NS
static struct ipc_namespace binder_ipc_ns;
#define ipcns  (&binder_ipc_ns)
#else
#define ipcns  (current->nsproxy->ipc_ns)
#endif

(and make the initialization of binder_ipc_ns conditional on IPC_NS)

This gets us the same thing without the incestuous dependency on the
msgutil.c version of init_ipc_ns...and then binder doesn't rely on
SYSVIPC or POSIX_MQUEUE directly.

>
> - choury -
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2018-11-10  6:38 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-08 13:02 [PATCH V3] binder: ipc namespace support for android binder chouryzhou(周威)
2018-11-08 14:28 ` Christian Brauner
2018-11-09 17:54 ` Todd Kjos
2018-11-10  3:09   ` chouryzhou(周威)
2018-11-10  4:16     ` Todd Kjos
2018-11-10  4:43       ` Re: [PATCH V3] binder: ipc namespace support for android binder(Internet mail) chouryzhou(周威)
2018-11-10  5:24         ` Todd Kjos
2018-11-10  5:43           ` chouryzhou(周威)
2018-11-10  6:38             ` Todd Kjos
2018-11-09 18:26 ` [PATCH V3] binder: ipc namespace support for android binder Davidlohr Bueso
2018-11-09 19:03   ` Todd Kjos
2018-11-09 19:16     ` Davidlohr Bueso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).