All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] android: binder: Disable preemption while holding the global binder lock
@ 2016-09-08 16:12 Todd Kjos
  2016-09-08 16:15 ` Todd Kjos
                   ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Todd Kjos @ 2016-09-08 16:12 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Arve Hjønnevåg, Riley Andrews,
	devel, linux-kernel

In Android systems, the display pipeline relies on low
latency binder transactions and is therefore sensitive to
delays caused by contention for the global binder lock.
Jank is siginificantly reduced by disabling preemption
while the global binder lock is held.

Originally-from: Riley Andrews <riandrews@google.com>
Signed-off-by: Todd Kjos <tkjos@google.com>
---
 drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 146 insertions(+), 48 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 16288e7..c36e420 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct
binder_proc *proc, int flags)
  struct files_struct *files = proc->files;
  unsigned long rlim_cur;
  unsigned long irqs;
+ int ret;

  if (files == NULL)
  return -ESRCH;
@@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
binder_proc *proc, int flags)
  rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
  unlock_task_sighand(proc->tsk, &irqs);

- return __alloc_fd(files, 0, rlim_cur, flags);
+ preempt_enable_no_resched();
+ ret = __alloc_fd(files, 0, rlim_cur, flags);
+ preempt_disable();
+
+ return ret;
 }

 /*
@@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct
binder_proc *proc, int flags)
 static void task_fd_install(
  struct binder_proc *proc, unsigned int fd, struct file *file)
 {
- if (proc->files)
+ if (proc->files) {
+ preempt_enable_no_resched();
  __fd_install(proc->files, fd, file);
+ preempt_disable();
+ }
 }

 /*
@@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
 {
  trace_binder_lock(tag);
  mutex_lock(&binder_main_lock);
+ preempt_disable();
  trace_binder_locked(tag);
 }

@@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
 {
  trace_binder_unlock(tag);
  mutex_unlock(&binder_main_lock);
+ preempt_enable();
+}
+
+static inline void *kzalloc_nopreempt(size_t size)
+{
+ void *ptr;
+
+ ptr = kzalloc(size, GFP_NOWAIT);
+ if (ptr)
+ return ptr;
+
+ preempt_enable_no_resched();
+ ptr = kzalloc(size, GFP_KERNEL);
+ preempt_disable();
+
+ return ptr;
+}
+
+static inline long copy_to_user_nopreempt(void __user *to,
+  const void *from, long n)
+{
+ long ret;
+
+ preempt_enable_no_resched();
+ ret = copy_to_user(to, from, n);
+ preempt_disable();
+ return ret;
+}
+
+static inline long copy_from_user_nopreempt(void *to,
+    const void __user *from,
+    long n)
+{
+ long ret;
+
+ preempt_enable_no_resched();
+ ret = copy_from_user(to, from, n);
+ preempt_disable();
+ return ret;
 }

+#define get_user_nopreempt(x, ptr) \
+({ \
+ int __ret; \
+ preempt_enable_no_resched(); \
+ __ret = get_user(x, ptr); \
+ preempt_disable(); \
+ __ret; \
+})
+
+#define put_user_nopreempt(x, ptr) \
+({ \
+ int __ret; \
+ preempt_enable_no_resched(); \
+ __ret = put_user(x, ptr); \
+ preempt_disable(); \
+ __ret; \
+})
+
 static void binder_set_nice(long nice)
 {
  long min_nice;
@@ -568,6 +634,8 @@ static int binder_update_page_range(struct
binder_proc *proc, int allocate,
  else
  mm = get_task_mm(proc->tsk);

+ preempt_enable_no_resched();
+
  if (mm) {
  down_write(&mm->mmap_sem);
  vma = proc->vma;
@@ -622,6 +690,9 @@ static int binder_update_page_range(struct
binder_proc *proc, int allocate,
  up_write(&mm->mmap_sem);
  mmput(mm);
  }
+
+ preempt_disable();
+
  return 0;

 free_range:
@@ -644,6 +715,9 @@ err_no_vma:
  up_write(&mm->mmap_sem);
  mmput(mm);
  }
+
+ preempt_disable();
+
  return -ENOMEM;
 }

@@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct
binder_proc *proc,
  return NULL;
  }

- node = kzalloc(sizeof(*node), GFP_KERNEL);
+ node = kzalloc_nopreempt(sizeof(*node));
  if (node == NULL)
  return NULL;
  binder_stats_created(BINDER_STAT_NODE);
@@ -1040,7 +1114,7 @@ static struct binder_ref
*binder_get_ref_for_node(struct binder_proc *proc,
  else
  return ref;
  }
- new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+ new_ref = kzalloc_nopreempt(sizeof(*ref));
  if (new_ref == NULL)
  return NULL;
  binder_stats_created(BINDER_STAT_REF);
@@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc *proc,
  e->to_proc = target_proc->pid;

  /* TODO: reuse incoming transaction for reply */
- t = kzalloc(sizeof(*t), GFP_KERNEL);
+ t = kzalloc_nopreempt(sizeof(*t));
  if (t == NULL) {
  return_error = BR_FAILED_REPLY;
  goto err_alloc_t_failed;
  }
  binder_stats_created(BINDER_STAT_TRANSACTION);

- tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL);
+ tcomplete = kzalloc_nopreempt(sizeof(*tcomplete));
  if (tcomplete == NULL) {
  return_error = BR_FAILED_REPLY;
  goto err_alloc_tcomplete_failed;
@@ -1502,15 +1576,16 @@ static void binder_transaction(struct binder_proc *proc,
  offp = (binder_size_t *)(t->buffer->data +
  ALIGN(tr->data_size, sizeof(void *)));

- if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t)
-   tr->data.ptr.buffer, tr->data_size)) {
+ if (copy_from_user_nopreempt(t->buffer->data,
+ (const void __user *)(uintptr_t) tr->data.ptr.buffer,
+ tr->data_size)) {
  binder_user_error("%d:%d got transaction with invalid data ptr\n",
  proc->pid, thread->pid);
  return_error = BR_FAILED_REPLY;
  goto err_copy_data_failed;
  }
- if (copy_from_user(offp, (const void __user *)(uintptr_t)
-   tr->data.ptr.offsets, tr->offsets_size)) {
+ if (copy_from_user_nopreempt(offp, (const void __user *)(uintptr_t)
+     tr->data.ptr.offsets, tr->offsets_size)) {
  binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
  proc->pid, thread->pid);
  return_error = BR_FAILED_REPLY;
@@ -1770,7 +1845,7 @@ static int binder_thread_write(struct binder_proc *proc,
  void __user *end = buffer + size;

  while (ptr < end && thread->return_error == BR_OK) {
- if (get_user(cmd, (uint32_t __user *)ptr))
+ if (get_user_nopreempt(cmd, (uint32_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
  trace_binder_command(cmd);
@@ -1788,7 +1863,7 @@ static int binder_thread_write(struct binder_proc *proc,
  struct binder_ref *ref;
  const char *debug_string;

- if (get_user(target, (uint32_t __user *)ptr))
+ if (get_user_nopreempt(target, (uint32_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
  if (target == 0 && binder_context_mgr_node &&
@@ -1838,10 +1913,12 @@ static int binder_thread_write(struct binder_proc *proc,
  binder_uintptr_t cookie;
  struct binder_node *node;

- if (get_user(node_ptr, (binder_uintptr_t __user *)ptr))
+ if (get_user_nopreempt(node_ptr,
+ (binder_uintptr_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(binder_uintptr_t);
- if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+ if (get_user_nopreempt(cookie,
+ (binder_uintptr_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(binder_uintptr_t);
  node = binder_get_node(proc, node_ptr);
@@ -1899,7 +1976,8 @@ static int binder_thread_write(struct binder_proc *proc,
  binder_uintptr_t data_ptr;
  struct binder_buffer *buffer;

- if (get_user(data_ptr, (binder_uintptr_t __user *)ptr))
+ if (get_user_nopreempt(data_ptr,
+ (binder_uintptr_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(binder_uintptr_t);

@@ -1941,7 +2019,7 @@ static int binder_thread_write(struct binder_proc *proc,
  case BC_REPLY: {
  struct binder_transaction_data tr;

- if (copy_from_user(&tr, ptr, sizeof(tr)))
+ if (copy_from_user_nopreempt(&tr, ptr, sizeof(tr)))
  return -EFAULT;
  ptr += sizeof(tr);
  binder_transaction(proc, thread, &tr, cmd == BC_REPLY);
@@ -1991,10 +2069,12 @@ static int binder_thread_write(struct binder_proc *proc,
  struct binder_ref *ref;
  struct binder_ref_death *death;

- if (get_user(target, (uint32_t __user *)ptr))
+ if (get_user_nopreempt(target,
+ (uint32_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
- if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+ if (get_user_nopreempt(cookie,
+ (binder_uintptr_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(binder_uintptr_t);
  ref = binder_get_ref(proc, target);
@@ -2023,7 +2103,7 @@ static int binder_thread_write(struct binder_proc *proc,
  proc->pid, thread->pid);
  break;
  }
- death = kzalloc(sizeof(*death), GFP_KERNEL);
+ death = kzalloc_nopreempt(sizeof(*death));
  if (death == NULL) {
  thread->return_error = BR_ERROR;
  binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
@@ -2077,8 +2157,8 @@ static int binder_thread_write(struct binder_proc *proc,
  struct binder_work *w;
  binder_uintptr_t cookie;
  struct binder_ref_death *death = NULL;
-
- if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+ if (get_user_nopreempt(cookie,
+ (binder_uintptr_t __user *)ptr))
  return -EFAULT;

  ptr += sizeof(cookie);
@@ -2159,7 +2239,7 @@ static int binder_thread_read(struct binder_proc *proc,
  int wait_for_proc_work;

  if (*consumed == 0) {
- if (put_user(BR_NOOP, (uint32_t __user *)ptr))
+ if (put_user_nopreempt(BR_NOOP, (uint32_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
  }
@@ -2170,7 +2250,8 @@ retry:

  if (thread->return_error != BR_OK && ptr < end) {
  if (thread->return_error2 != BR_OK) {
- if (put_user(thread->return_error2, (uint32_t __user *)ptr))
+ if (put_user_nopreempt(thread->return_error2,
+       (uint32_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
  binder_stat_br(proc, thread, thread->return_error2);
@@ -2178,7 +2259,8 @@ retry:
  goto done;
  thread->return_error2 = BR_OK;
  }
- if (put_user(thread->return_error, (uint32_t __user *)ptr))
+ if (put_user_nopreempt(thread->return_error,
+       (uint32_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
  binder_stat_br(proc, thread, thread->return_error);
@@ -2256,7 +2338,7 @@ retry:
  } break;
  case BINDER_WORK_TRANSACTION_COMPLETE: {
  cmd = BR_TRANSACTION_COMPLETE;
- if (put_user(cmd, (uint32_t __user *)ptr))
+ if (put_user_nopreempt(cmd, (uint32_t __user *) ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);

@@ -2298,15 +2380,16 @@ retry:
  node->has_weak_ref = 0;
  }
  if (cmd != BR_NOOP) {
- if (put_user(cmd, (uint32_t __user *)ptr))
+ if (put_user_nopreempt(cmd,
+ (uint32_t __user *) ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
- if (put_user(node->ptr,
-     (binder_uintptr_t __user *)ptr))
+ if (put_user_nopreempt(node->ptr,
+      (binder_uintptr_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(binder_uintptr_t);
- if (put_user(node->cookie,
-     (binder_uintptr_t __user *)ptr))
+ if (put_user_nopreempt(node->cookie,
+      (binder_uintptr_t __user *)ptr))
  return -EFAULT;
  ptr += sizeof(binder_uintptr_t);

@@ -2349,11 +2432,12 @@ retry:
  cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;
  else
  cmd = BR_DEAD_BINDER;
- if (put_user(cmd, (uint32_t __user *)ptr))
+ if (put_user_nopreempt(cmd,
+       (uint32_t __user *) ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
- if (put_user(death->cookie,
-     (binder_uintptr_t __user *)ptr))
+ if (put_user_nopreempt(death->cookie,
+       (binder_uintptr_t __user *) ptr))
  return -EFAULT;
  ptr += sizeof(binder_uintptr_t);
  binder_stat_br(proc, thread, cmd);
@@ -2420,10 +2504,10 @@ retry:
  ALIGN(t->buffer->data_size,
     sizeof(void *));

- if (put_user(cmd, (uint32_t __user *)ptr))
+ if (put_user_nopreempt(cmd, (uint32_t __user *) ptr))
  return -EFAULT;
  ptr += sizeof(uint32_t);
- if (copy_to_user(ptr, &tr, sizeof(tr)))
+ if (copy_to_user_nopreempt(ptr, &tr, sizeof(tr)))
  return -EFAULT;
  ptr += sizeof(tr);

@@ -2465,7 +2549,8 @@ done:
  binder_debug(BINDER_DEBUG_THREADS,
      "%d:%d BR_SPAWN_LOOPER\n",
      proc->pid, thread->pid);
- if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
+ if (put_user_nopreempt(BR_SPAWN_LOOPER,
+       (uint32_t __user *) buffer))
  return -EFAULT;
  binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
  }
@@ -2540,7 +2625,7 @@ static struct binder_thread
*binder_get_thread(struct binder_proc *proc)
  break;
  }
  if (*p == NULL) {
- thread = kzalloc(sizeof(*thread), GFP_KERNEL);
+ thread = kzalloc_nopreempt(sizeof(*thread));
  if (thread == NULL)
  return NULL;
  binder_stats_created(BINDER_STAT_THREAD);
@@ -2644,7 +2729,7 @@ static int binder_ioctl_write_read(struct file *filp,
  ret = -EINVAL;
  goto out;
  }
- if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
+ if (copy_from_user_nopreempt(&bwr, ubuf, sizeof(bwr))) {
  ret = -EFAULT;
  goto out;
  }
@@ -2662,7 +2747,7 @@ static int binder_ioctl_write_read(struct file *filp,
  trace_binder_write_done(ret);
  if (ret < 0) {
  bwr.read_consumed = 0;
- if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
+ if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr)))
  ret = -EFAULT;
  goto out;
  }
@@ -2676,7 +2761,7 @@ static int binder_ioctl_write_read(struct file *filp,
  if (!list_empty(&proc->todo))
  wake_up_interruptible(&proc->wait);
  if (ret < 0) {
- if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
+ if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr)))
  ret = -EFAULT;
  goto out;
  }
@@ -2686,7 +2771,7 @@ static int binder_ioctl_write_read(struct file *filp,
      proc->pid, thread->pid,
      (u64)bwr.write_consumed, (u64)bwr.write_size,
      (u64)bwr.read_consumed, (u64)bwr.read_size);
- if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
+ if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr))) {
  ret = -EFAULT;
  goto out;
  }
@@ -2768,7 +2853,8 @@ static long binder_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg)
  goto err;
  break;
  case BINDER_SET_MAX_THREADS:
- if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) {
+ if (copy_from_user_nopreempt(&proc->max_threads, ubuf,
+     sizeof(proc->max_threads))) {
  ret = -EINVAL;
  goto err;
  }
@@ -2791,9 +2877,9 @@ static long binder_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg)
  ret = -EINVAL;
  goto err;
  }
- if (put_user(BINDER_CURRENT_PROTOCOL_VERSION,
-     &ver->protocol_version)) {
- ret = -EINVAL;
+ if (put_user_nopreempt(BINDER_CURRENT_PROTOCOL_VERSION,
+       &ver->protocol_version)) {
+ ret = -EINVAL;
  goto err;
  }
  break;
@@ -2854,6 +2940,7 @@ static const struct vm_operations_struct binder_vm_ops = {
 static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 {
  int ret;
+
  struct vm_struct *area;
  struct binder_proc *proc = filp->private_data;
  const char *failure_string;
@@ -2914,7 +3001,12 @@ static int binder_mmap(struct file *filp,
struct vm_area_struct *vma)
  vma->vm_ops = &binder_vm_ops;
  vma->vm_private_data = proc;

- if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer +
PAGE_SIZE, vma)) {
+ /* binder_update_page_range assumes preemption is disabled */
+ preempt_disable();
+ ret = binder_update_page_range(proc, 1, proc->buffer,
+       proc->buffer + PAGE_SIZE, vma);
+ preempt_enable_no_resched();
+ if (ret) {
  ret = -ENOMEM;
  failure_string = "alloc small buf";
  goto err_alloc_small_buf_failed;
@@ -3185,8 +3277,12 @@ static void binder_deferred_func(struct
work_struct *work)
  int defer;

  do {
- binder_lock(__func__);
+ trace_binder_lock(__func__);
+ mutex_lock(&binder_main_lock);
+ trace_binder_locked(__func__);
+
  mutex_lock(&binder_deferred_lock);
+ preempt_disable();
  if (!hlist_empty(&binder_deferred_list)) {
  proc = hlist_entry(binder_deferred_list.first,
  struct binder_proc, deferred_work_node);
@@ -3212,7 +3308,9 @@ static void binder_deferred_func(struct work_struct *work)
  if (defer & BINDER_DEFERRED_RELEASE)
  binder_deferred_release(proc); /* frees proc */

- binder_unlock(__func__);
+ trace_binder_unlock(__func__);
+ mutex_unlock(&binder_main_lock);
+ preempt_enable_no_resched();
  if (files)
  put_files_struct(files);
  } while (proc);
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-08 16:12 [PATCH] android: binder: Disable preemption while holding the global binder lock Todd Kjos
@ 2016-09-08 16:15 ` Todd Kjos
  2016-09-08 17:46 ` Greg Kroah-Hartman
  2016-09-10 16:16 ` Christoph Hellwig
  2 siblings, 0 replies; 27+ messages in thread
From: Todd Kjos @ 2016-09-08 16:15 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Arve Hjønnevåg, Riley Andrews,
	devel, linux-kernel

This was introduced in the 2015 Nexus devices and should have been
submitted to the kernel then since we keep forward porting it to each
new device.

On Thu, Sep 8, 2016 at 9:12 AM, Todd Kjos <tkjos@google.com> wrote:
> In Android systems, the display pipeline relies on low
> latency binder transactions and is therefore sensitive to
> delays caused by contention for the global binder lock.
> Jank is siginificantly reduced by disabling preemption
> while the global binder lock is held.
>
> Originally-from: Riley Andrews <riandrews@google.com>
> Signed-off-by: Todd Kjos <tkjos@google.com>
> ---
>  drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
>  1 file changed, 146 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 16288e7..c36e420 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   struct files_struct *files = proc->files;
>   unsigned long rlim_cur;
>   unsigned long irqs;
> + int ret;
>
>   if (files == NULL)
>   return -ESRCH;
> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>   unlock_task_sighand(proc->tsk, &irqs);
>
> - return __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_enable_no_resched();
> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_disable();
> +
> + return ret;
>  }
>
>  /*
> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>  static void task_fd_install(
>   struct binder_proc *proc, unsigned int fd, struct file *file)
>  {
> - if (proc->files)
> + if (proc->files) {
> + preempt_enable_no_resched();
>   __fd_install(proc->files, fd, file);
> + preempt_disable();
> + }
>  }
>
>  /*
> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
>  {
>   trace_binder_lock(tag);
>   mutex_lock(&binder_main_lock);
> + preempt_disable();
>   trace_binder_locked(tag);
>  }
>
> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
>  {
>   trace_binder_unlock(tag);
>   mutex_unlock(&binder_main_lock);
> + preempt_enable();
> +}
> +
> +static inline void *kzalloc_nopreempt(size_t size)
> +{
> + void *ptr;
> +
> + ptr = kzalloc(size, GFP_NOWAIT);
> + if (ptr)
> + return ptr;
> +
> + preempt_enable_no_resched();
> + ptr = kzalloc(size, GFP_KERNEL);
> + preempt_disable();
> +
> + return ptr;
> +}
> +
> +static inline long copy_to_user_nopreempt(void __user *to,
> +  const void *from, long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_to_user(to, from, n);
> + preempt_disable();
> + return ret;
> +}
> +
> +static inline long copy_from_user_nopreempt(void *to,
> +    const void __user *from,
> +    long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_from_user(to, from, n);
> + preempt_disable();
> + return ret;
>  }
>
> +#define get_user_nopreempt(x, ptr) \
> +({ \
> + int __ret; \
> + preempt_enable_no_resched(); \
> + __ret = get_user(x, ptr); \
> + preempt_disable(); \
> + __ret; \
> +})
> +
> +#define put_user_nopreempt(x, ptr) \
> +({ \
> + int __ret; \
> + preempt_enable_no_resched(); \
> + __ret = put_user(x, ptr); \
> + preempt_disable(); \
> + __ret; \
> +})
> +
>  static void binder_set_nice(long nice)
>  {
>   long min_nice;
> @@ -568,6 +634,8 @@ static int binder_update_page_range(struct
> binder_proc *proc, int allocate,
>   else
>   mm = get_task_mm(proc->tsk);
>
> + preempt_enable_no_resched();
> +
>   if (mm) {
>   down_write(&mm->mmap_sem);
>   vma = proc->vma;
> @@ -622,6 +690,9 @@ static int binder_update_page_range(struct
> binder_proc *proc, int allocate,
>   up_write(&mm->mmap_sem);
>   mmput(mm);
>   }
> +
> + preempt_disable();
> +
>   return 0;
>
>  free_range:
> @@ -644,6 +715,9 @@ err_no_vma:
>   up_write(&mm->mmap_sem);
>   mmput(mm);
>   }
> +
> + preempt_disable();
> +
>   return -ENOMEM;
>  }
>
> @@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct
> binder_proc *proc,
>   return NULL;
>   }
>
> - node = kzalloc(sizeof(*node), GFP_KERNEL);
> + node = kzalloc_nopreempt(sizeof(*node));
>   if (node == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_NODE);
> @@ -1040,7 +1114,7 @@ static struct binder_ref
> *binder_get_ref_for_node(struct binder_proc *proc,
>   else
>   return ref;
>   }
> - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
> + new_ref = kzalloc_nopreempt(sizeof(*ref));
>   if (new_ref == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_REF);
> @@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc *proc,
>   e->to_proc = target_proc->pid;
>
>   /* TODO: reuse incoming transaction for reply */
> - t = kzalloc(sizeof(*t), GFP_KERNEL);
> + t = kzalloc_nopreempt(sizeof(*t));
>   if (t == NULL) {
>   return_error = BR_FAILED_REPLY;
>   goto err_alloc_t_failed;
>   }
>   binder_stats_created(BINDER_STAT_TRANSACTION);
>
> - tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL);
> + tcomplete = kzalloc_nopreempt(sizeof(*tcomplete));
>   if (tcomplete == NULL) {
>   return_error = BR_FAILED_REPLY;
>   goto err_alloc_tcomplete_failed;
> @@ -1502,15 +1576,16 @@ static void binder_transaction(struct binder_proc *proc,
>   offp = (binder_size_t *)(t->buffer->data +
>   ALIGN(tr->data_size, sizeof(void *)));
>
> - if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t)
> -   tr->data.ptr.buffer, tr->data_size)) {
> + if (copy_from_user_nopreempt(t->buffer->data,
> + (const void __user *)(uintptr_t) tr->data.ptr.buffer,
> + tr->data_size)) {
>   binder_user_error("%d:%d got transaction with invalid data ptr\n",
>   proc->pid, thread->pid);
>   return_error = BR_FAILED_REPLY;
>   goto err_copy_data_failed;
>   }
> - if (copy_from_user(offp, (const void __user *)(uintptr_t)
> -   tr->data.ptr.offsets, tr->offsets_size)) {
> + if (copy_from_user_nopreempt(offp, (const void __user *)(uintptr_t)
> +     tr->data.ptr.offsets, tr->offsets_size)) {
>   binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
>   proc->pid, thread->pid);
>   return_error = BR_FAILED_REPLY;
> @@ -1770,7 +1845,7 @@ static int binder_thread_write(struct binder_proc *proc,
>   void __user *end = buffer + size;
>
>   while (ptr < end && thread->return_error == BR_OK) {
> - if (get_user(cmd, (uint32_t __user *)ptr))
> + if (get_user_nopreempt(cmd, (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   trace_binder_command(cmd);
> @@ -1788,7 +1863,7 @@ static int binder_thread_write(struct binder_proc *proc,
>   struct binder_ref *ref;
>   const char *debug_string;
>
> - if (get_user(target, (uint32_t __user *)ptr))
> + if (get_user_nopreempt(target, (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   if (target == 0 && binder_context_mgr_node &&
> @@ -1838,10 +1913,12 @@ static int binder_thread_write(struct binder_proc *proc,
>   binder_uintptr_t cookie;
>   struct binder_node *node;
>
> - if (get_user(node_ptr, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(node_ptr,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
> - if (get_user(cookie, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(cookie,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
>   node = binder_get_node(proc, node_ptr);
> @@ -1899,7 +1976,8 @@ static int binder_thread_write(struct binder_proc *proc,
>   binder_uintptr_t data_ptr;
>   struct binder_buffer *buffer;
>
> - if (get_user(data_ptr, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(data_ptr,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
>
> @@ -1941,7 +2019,7 @@ static int binder_thread_write(struct binder_proc *proc,
>   case BC_REPLY: {
>   struct binder_transaction_data tr;
>
> - if (copy_from_user(&tr, ptr, sizeof(tr)))
> + if (copy_from_user_nopreempt(&tr, ptr, sizeof(tr)))
>   return -EFAULT;
>   ptr += sizeof(tr);
>   binder_transaction(proc, thread, &tr, cmd == BC_REPLY);
> @@ -1991,10 +2069,12 @@ static int binder_thread_write(struct binder_proc *proc,
>   struct binder_ref *ref;
>   struct binder_ref_death *death;
>
> - if (get_user(target, (uint32_t __user *)ptr))
> + if (get_user_nopreempt(target,
> + (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> - if (get_user(cookie, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(cookie,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
>   ref = binder_get_ref(proc, target);
> @@ -2023,7 +2103,7 @@ static int binder_thread_write(struct binder_proc *proc,
>   proc->pid, thread->pid);
>   break;
>   }
> - death = kzalloc(sizeof(*death), GFP_KERNEL);
> + death = kzalloc_nopreempt(sizeof(*death));
>   if (death == NULL) {
>   thread->return_error = BR_ERROR;
>   binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
> @@ -2077,8 +2157,8 @@ static int binder_thread_write(struct binder_proc *proc,
>   struct binder_work *w;
>   binder_uintptr_t cookie;
>   struct binder_ref_death *death = NULL;
> -
> - if (get_user(cookie, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(cookie,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>
>   ptr += sizeof(cookie);
> @@ -2159,7 +2239,7 @@ static int binder_thread_read(struct binder_proc *proc,
>   int wait_for_proc_work;
>
>   if (*consumed == 0) {
> - if (put_user(BR_NOOP, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(BR_NOOP, (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   }
> @@ -2170,7 +2250,8 @@ retry:
>
>   if (thread->return_error != BR_OK && ptr < end) {
>   if (thread->return_error2 != BR_OK) {
> - if (put_user(thread->return_error2, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(thread->return_error2,
> +       (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   binder_stat_br(proc, thread, thread->return_error2);
> @@ -2178,7 +2259,8 @@ retry:
>   goto done;
>   thread->return_error2 = BR_OK;
>   }
> - if (put_user(thread->return_error, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(thread->return_error,
> +       (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   binder_stat_br(proc, thread, thread->return_error);
> @@ -2256,7 +2338,7 @@ retry:
>   } break;
>   case BINDER_WORK_TRANSACTION_COMPLETE: {
>   cmd = BR_TRANSACTION_COMPLETE;
> - if (put_user(cmd, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(cmd, (uint32_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>
> @@ -2298,15 +2380,16 @@ retry:
>   node->has_weak_ref = 0;
>   }
>   if (cmd != BR_NOOP) {
> - if (put_user(cmd, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(cmd,
> + (uint32_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> - if (put_user(node->ptr,
> -     (binder_uintptr_t __user *)ptr))
> + if (put_user_nopreempt(node->ptr,
> +      (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
> - if (put_user(node->cookie,
> -     (binder_uintptr_t __user *)ptr))
> + if (put_user_nopreempt(node->cookie,
> +      (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
>
> @@ -2349,11 +2432,12 @@ retry:
>   cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;
>   else
>   cmd = BR_DEAD_BINDER;
> - if (put_user(cmd, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(cmd,
> +       (uint32_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> - if (put_user(death->cookie,
> -     (binder_uintptr_t __user *)ptr))
> + if (put_user_nopreempt(death->cookie,
> +       (binder_uintptr_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
>   binder_stat_br(proc, thread, cmd);
> @@ -2420,10 +2504,10 @@ retry:
>   ALIGN(t->buffer->data_size,
>      sizeof(void *));
>
> - if (put_user(cmd, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(cmd, (uint32_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> - if (copy_to_user(ptr, &tr, sizeof(tr)))
> + if (copy_to_user_nopreempt(ptr, &tr, sizeof(tr)))
>   return -EFAULT;
>   ptr += sizeof(tr);
>
> @@ -2465,7 +2549,8 @@ done:
>   binder_debug(BINDER_DEBUG_THREADS,
>       "%d:%d BR_SPAWN_LOOPER\n",
>       proc->pid, thread->pid);
> - if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
> + if (put_user_nopreempt(BR_SPAWN_LOOPER,
> +       (uint32_t __user *) buffer))
>   return -EFAULT;
>   binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
>   }
> @@ -2540,7 +2625,7 @@ static struct binder_thread
> *binder_get_thread(struct binder_proc *proc)
>   break;
>   }
>   if (*p == NULL) {
> - thread = kzalloc(sizeof(*thread), GFP_KERNEL);
> + thread = kzalloc_nopreempt(sizeof(*thread));
>   if (thread == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_THREAD);
> @@ -2644,7 +2729,7 @@ static int binder_ioctl_write_read(struct file *filp,
>   ret = -EINVAL;
>   goto out;
>   }
> - if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
> + if (copy_from_user_nopreempt(&bwr, ubuf, sizeof(bwr))) {
>   ret = -EFAULT;
>   goto out;
>   }
> @@ -2662,7 +2747,7 @@ static int binder_ioctl_write_read(struct file *filp,
>   trace_binder_write_done(ret);
>   if (ret < 0) {
>   bwr.read_consumed = 0;
> - if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
> + if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr)))
>   ret = -EFAULT;
>   goto out;
>   }
> @@ -2676,7 +2761,7 @@ static int binder_ioctl_write_read(struct file *filp,
>   if (!list_empty(&proc->todo))
>   wake_up_interruptible(&proc->wait);
>   if (ret < 0) {
> - if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
> + if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr)))
>   ret = -EFAULT;
>   goto out;
>   }
> @@ -2686,7 +2771,7 @@ static int binder_ioctl_write_read(struct file *filp,
>       proc->pid, thread->pid,
>       (u64)bwr.write_consumed, (u64)bwr.write_size,
>       (u64)bwr.read_consumed, (u64)bwr.read_size);
> - if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
> + if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr))) {
>   ret = -EFAULT;
>   goto out;
>   }
> @@ -2768,7 +2853,8 @@ static long binder_ioctl(struct file *filp,
> unsigned int cmd, unsigned long arg)
>   goto err;
>   break;
>   case BINDER_SET_MAX_THREADS:
> - if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) {
> + if (copy_from_user_nopreempt(&proc->max_threads, ubuf,
> +     sizeof(proc->max_threads))) {
>   ret = -EINVAL;
>   goto err;
>   }
> @@ -2791,9 +2877,9 @@ static long binder_ioctl(struct file *filp,
> unsigned int cmd, unsigned long arg)
>   ret = -EINVAL;
>   goto err;
>   }
> - if (put_user(BINDER_CURRENT_PROTOCOL_VERSION,
> -     &ver->protocol_version)) {
> - ret = -EINVAL;
> + if (put_user_nopreempt(BINDER_CURRENT_PROTOCOL_VERSION,
> +       &ver->protocol_version)) {
> + ret = -EINVAL;
>   goto err;
>   }
>   break;
> @@ -2854,6 +2940,7 @@ static const struct vm_operations_struct binder_vm_ops = {
>  static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
>  {
>   int ret;
> +
>   struct vm_struct *area;
>   struct binder_proc *proc = filp->private_data;
>   const char *failure_string;
> @@ -2914,7 +3001,12 @@ static int binder_mmap(struct file *filp,
> struct vm_area_struct *vma)
>   vma->vm_ops = &binder_vm_ops;
>   vma->vm_private_data = proc;
>
> - if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer +
> PAGE_SIZE, vma)) {
> + /* binder_update_page_range assumes preemption is disabled */
> + preempt_disable();
> + ret = binder_update_page_range(proc, 1, proc->buffer,
> +       proc->buffer + PAGE_SIZE, vma);
> + preempt_enable_no_resched();
> + if (ret) {
>   ret = -ENOMEM;
>   failure_string = "alloc small buf";
>   goto err_alloc_small_buf_failed;
> @@ -3185,8 +3277,12 @@ static void binder_deferred_func(struct
> work_struct *work)
>   int defer;
>
>   do {
> - binder_lock(__func__);
> + trace_binder_lock(__func__);
> + mutex_lock(&binder_main_lock);
> + trace_binder_locked(__func__);
> +
>   mutex_lock(&binder_deferred_lock);
> + preempt_disable();
>   if (!hlist_empty(&binder_deferred_list)) {
>   proc = hlist_entry(binder_deferred_list.first,
>   struct binder_proc, deferred_work_node);
> @@ -3212,7 +3308,9 @@ static void binder_deferred_func(struct work_struct *work)
>   if (defer & BINDER_DEFERRED_RELEASE)
>   binder_deferred_release(proc); /* frees proc */
>
> - binder_unlock(__func__);
> + trace_binder_unlock(__func__);
> + mutex_unlock(&binder_main_lock);
> + preempt_enable_no_resched();
>   if (files)
>   put_files_struct(files);
>   } while (proc);
> --
> 2.8.0.rc3.226.g39d4020

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-08 16:12 [PATCH] android: binder: Disable preemption while holding the global binder lock Todd Kjos
  2016-09-08 16:15 ` Todd Kjos
@ 2016-09-08 17:46 ` Greg Kroah-Hartman
  2016-09-10 16:16 ` Christoph Hellwig
  2 siblings, 0 replies; 27+ messages in thread
From: Greg Kroah-Hartman @ 2016-09-08 17:46 UTC (permalink / raw)
  To: Todd Kjos; +Cc: Arve Hjønnevåg, Riley Andrews, devel, linux-kernel

On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> In Android systems, the display pipeline relies on low
> latency binder transactions and is therefore sensitive to
> delays caused by contention for the global binder lock.
> Jank is siginificantly reduced by disabling preemption
> while the global binder lock is held.
> 
> Originally-from: Riley Andrews <riandrews@google.com>

So should the "From: " line be Riley?  Did Riley sign off on this?

> Signed-off-by: Todd Kjos <tkjos@google.com>
> ---
>  drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
>  1 file changed, 146 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 16288e7..c36e420 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   struct files_struct *files = proc->files;
>   unsigned long rlim_cur;
>   unsigned long irqs;
> + int ret;
> 
>   if (files == NULL)
>   return -ESRCH;
> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>   unlock_task_sighand(proc->tsk, &irqs);
> 
> - return __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_enable_no_resched();
> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_disable();
> +
> + return ret;
>  }

Your tabs are all eaten and broke the patch, so it can't be applied :(

Can you try again?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-08 16:12 [PATCH] android: binder: Disable preemption while holding the global binder lock Todd Kjos
  2016-09-08 16:15 ` Todd Kjos
  2016-09-08 17:46 ` Greg Kroah-Hartman
@ 2016-09-10 16:16 ` Christoph Hellwig
  2016-09-10 16:22   ` Peter Zijlstra
  2 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-10 16:16 UTC (permalink / raw)
  To: Todd Kjos
  Cc: Greg Kroah-Hartman, Arve Hj??nnev??g, Riley Andrews, devel,
	linux-kernel, Thomas Gleixner, Peter Zijlstra

On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> In Android systems, the display pipeline relies on low
> latency binder transactions and is therefore sensitive to
> delays caused by contention for the global binder lock.
> Jank is siginificantly reduced by disabling preemption
> while the global binder lock is held.

That's now how preempt_disable is supposed to use.  It is for critical
sections that use per-cpu or similar resources.

> 
> Originally-from: Riley Andrews <riandrews@google.com>
> Signed-off-by: Todd Kjos <tkjos@google.com>
> ---
>  drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
>  1 file changed, 146 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 16288e7..c36e420 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   struct files_struct *files = proc->files;
>   unsigned long rlim_cur;
>   unsigned long irqs;
> + int ret;
> 
>   if (files == NULL)
>   return -ESRCH;
> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>   unlock_task_sighand(proc->tsk, &irqs);
> 
> - return __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_enable_no_resched();
> + ret = __alloc_fd(files, 0, rlim_cur, flags);
> + preempt_disable();
> +
> + return ret;
>  }
> 
>  /*
> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct
> binder_proc *proc, int flags)
>  static void task_fd_install(
>   struct binder_proc *proc, unsigned int fd, struct file *file)
>  {
> - if (proc->files)
> + if (proc->files) {
> + preempt_enable_no_resched();
>   __fd_install(proc->files, fd, file);
> + preempt_disable();
> + }
>  }
> 
>  /*
> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
>  {
>   trace_binder_lock(tag);
>   mutex_lock(&binder_main_lock);
> + preempt_disable();
>   trace_binder_locked(tag);
>  }
> 
> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
>  {
>   trace_binder_unlock(tag);
>   mutex_unlock(&binder_main_lock);
> + preempt_enable();
> +}
> +
> +static inline void *kzalloc_nopreempt(size_t size)
> +{
> + void *ptr;
> +
> + ptr = kzalloc(size, GFP_NOWAIT);
> + if (ptr)
> + return ptr;
> +
> + preempt_enable_no_resched();
> + ptr = kzalloc(size, GFP_KERNEL);
> + preempt_disable();
> +
> + return ptr;
> +}
> +
> +static inline long copy_to_user_nopreempt(void __user *to,
> +  const void *from, long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_to_user(to, from, n);
> + preempt_disable();
> + return ret;
> +}
> +
> +static inline long copy_from_user_nopreempt(void *to,
> +    const void __user *from,
> +    long n)
> +{
> + long ret;
> +
> + preempt_enable_no_resched();
> + ret = copy_from_user(to, from, n);
> + preempt_disable();
> + return ret;
>  }
> 
> +#define get_user_nopreempt(x, ptr) \
> +({ \
> + int __ret; \
> + preempt_enable_no_resched(); \
> + __ret = get_user(x, ptr); \
> + preempt_disable(); \
> + __ret; \
> +})
> +
> +#define put_user_nopreempt(x, ptr) \
> +({ \
> + int __ret; \
> + preempt_enable_no_resched(); \
> + __ret = put_user(x, ptr); \
> + preempt_disable(); \
> + __ret; \
> +})
> +
>  static void binder_set_nice(long nice)
>  {
>   long min_nice;
> @@ -568,6 +634,8 @@ static int binder_update_page_range(struct
> binder_proc *proc, int allocate,
>   else
>   mm = get_task_mm(proc->tsk);
> 
> + preempt_enable_no_resched();
> +
>   if (mm) {
>   down_write(&mm->mmap_sem);
>   vma = proc->vma;
> @@ -622,6 +690,9 @@ static int binder_update_page_range(struct
> binder_proc *proc, int allocate,
>   up_write(&mm->mmap_sem);
>   mmput(mm);
>   }
> +
> + preempt_disable();
> +
>   return 0;
> 
>  free_range:
> @@ -644,6 +715,9 @@ err_no_vma:
>   up_write(&mm->mmap_sem);
>   mmput(mm);
>   }
> +
> + preempt_disable();
> +
>   return -ENOMEM;
>  }
> 
> @@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct
> binder_proc *proc,
>   return NULL;
>   }
> 
> - node = kzalloc(sizeof(*node), GFP_KERNEL);
> + node = kzalloc_nopreempt(sizeof(*node));
>   if (node == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_NODE);
> @@ -1040,7 +1114,7 @@ static struct binder_ref
> *binder_get_ref_for_node(struct binder_proc *proc,
>   else
>   return ref;
>   }
> - new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
> + new_ref = kzalloc_nopreempt(sizeof(*ref));
>   if (new_ref == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_REF);
> @@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc *proc,
>   e->to_proc = target_proc->pid;
> 
>   /* TODO: reuse incoming transaction for reply */
> - t = kzalloc(sizeof(*t), GFP_KERNEL);
> + t = kzalloc_nopreempt(sizeof(*t));
>   if (t == NULL) {
>   return_error = BR_FAILED_REPLY;
>   goto err_alloc_t_failed;
>   }
>   binder_stats_created(BINDER_STAT_TRANSACTION);
> 
> - tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL);
> + tcomplete = kzalloc_nopreempt(sizeof(*tcomplete));
>   if (tcomplete == NULL) {
>   return_error = BR_FAILED_REPLY;
>   goto err_alloc_tcomplete_failed;
> @@ -1502,15 +1576,16 @@ static void binder_transaction(struct binder_proc *proc,
>   offp = (binder_size_t *)(t->buffer->data +
>   ALIGN(tr->data_size, sizeof(void *)));
> 
> - if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t)
> -   tr->data.ptr.buffer, tr->data_size)) {
> + if (copy_from_user_nopreempt(t->buffer->data,
> + (const void __user *)(uintptr_t) tr->data.ptr.buffer,
> + tr->data_size)) {
>   binder_user_error("%d:%d got transaction with invalid data ptr\n",
>   proc->pid, thread->pid);
>   return_error = BR_FAILED_REPLY;
>   goto err_copy_data_failed;
>   }
> - if (copy_from_user(offp, (const void __user *)(uintptr_t)
> -   tr->data.ptr.offsets, tr->offsets_size)) {
> + if (copy_from_user_nopreempt(offp, (const void __user *)(uintptr_t)
> +     tr->data.ptr.offsets, tr->offsets_size)) {
>   binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
>   proc->pid, thread->pid);
>   return_error = BR_FAILED_REPLY;
> @@ -1770,7 +1845,7 @@ static int binder_thread_write(struct binder_proc *proc,
>   void __user *end = buffer + size;
> 
>   while (ptr < end && thread->return_error == BR_OK) {
> - if (get_user(cmd, (uint32_t __user *)ptr))
> + if (get_user_nopreempt(cmd, (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   trace_binder_command(cmd);
> @@ -1788,7 +1863,7 @@ static int binder_thread_write(struct binder_proc *proc,
>   struct binder_ref *ref;
>   const char *debug_string;
> 
> - if (get_user(target, (uint32_t __user *)ptr))
> + if (get_user_nopreempt(target, (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   if (target == 0 && binder_context_mgr_node &&
> @@ -1838,10 +1913,12 @@ static int binder_thread_write(struct binder_proc *proc,
>   binder_uintptr_t cookie;
>   struct binder_node *node;
> 
> - if (get_user(node_ptr, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(node_ptr,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
> - if (get_user(cookie, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(cookie,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
>   node = binder_get_node(proc, node_ptr);
> @@ -1899,7 +1976,8 @@ static int binder_thread_write(struct binder_proc *proc,
>   binder_uintptr_t data_ptr;
>   struct binder_buffer *buffer;
> 
> - if (get_user(data_ptr, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(data_ptr,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
> 
> @@ -1941,7 +2019,7 @@ static int binder_thread_write(struct binder_proc *proc,
>   case BC_REPLY: {
>   struct binder_transaction_data tr;
> 
> - if (copy_from_user(&tr, ptr, sizeof(tr)))
> + if (copy_from_user_nopreempt(&tr, ptr, sizeof(tr)))
>   return -EFAULT;
>   ptr += sizeof(tr);
>   binder_transaction(proc, thread, &tr, cmd == BC_REPLY);
> @@ -1991,10 +2069,12 @@ static int binder_thread_write(struct binder_proc *proc,
>   struct binder_ref *ref;
>   struct binder_ref_death *death;
> 
> - if (get_user(target, (uint32_t __user *)ptr))
> + if (get_user_nopreempt(target,
> + (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> - if (get_user(cookie, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(cookie,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
>   ref = binder_get_ref(proc, target);
> @@ -2023,7 +2103,7 @@ static int binder_thread_write(struct binder_proc *proc,
>   proc->pid, thread->pid);
>   break;
>   }
> - death = kzalloc(sizeof(*death), GFP_KERNEL);
> + death = kzalloc_nopreempt(sizeof(*death));
>   if (death == NULL) {
>   thread->return_error = BR_ERROR;
>   binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
> @@ -2077,8 +2157,8 @@ static int binder_thread_write(struct binder_proc *proc,
>   struct binder_work *w;
>   binder_uintptr_t cookie;
>   struct binder_ref_death *death = NULL;
> -
> - if (get_user(cookie, (binder_uintptr_t __user *)ptr))
> + if (get_user_nopreempt(cookie,
> + (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
> 
>   ptr += sizeof(cookie);
> @@ -2159,7 +2239,7 @@ static int binder_thread_read(struct binder_proc *proc,
>   int wait_for_proc_work;
> 
>   if (*consumed == 0) {
> - if (put_user(BR_NOOP, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(BR_NOOP, (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   }
> @@ -2170,7 +2250,8 @@ retry:
> 
>   if (thread->return_error != BR_OK && ptr < end) {
>   if (thread->return_error2 != BR_OK) {
> - if (put_user(thread->return_error2, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(thread->return_error2,
> +       (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   binder_stat_br(proc, thread, thread->return_error2);
> @@ -2178,7 +2259,8 @@ retry:
>   goto done;
>   thread->return_error2 = BR_OK;
>   }
> - if (put_user(thread->return_error, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(thread->return_error,
> +       (uint32_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
>   binder_stat_br(proc, thread, thread->return_error);
> @@ -2256,7 +2338,7 @@ retry:
>   } break;
>   case BINDER_WORK_TRANSACTION_COMPLETE: {
>   cmd = BR_TRANSACTION_COMPLETE;
> - if (put_user(cmd, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(cmd, (uint32_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> 
> @@ -2298,15 +2380,16 @@ retry:
>   node->has_weak_ref = 0;
>   }
>   if (cmd != BR_NOOP) {
> - if (put_user(cmd, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(cmd,
> + (uint32_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> - if (put_user(node->ptr,
> -     (binder_uintptr_t __user *)ptr))
> + if (put_user_nopreempt(node->ptr,
> +      (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
> - if (put_user(node->cookie,
> -     (binder_uintptr_t __user *)ptr))
> + if (put_user_nopreempt(node->cookie,
> +      (binder_uintptr_t __user *)ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
> 
> @@ -2349,11 +2432,12 @@ retry:
>   cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;
>   else
>   cmd = BR_DEAD_BINDER;
> - if (put_user(cmd, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(cmd,
> +       (uint32_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> - if (put_user(death->cookie,
> -     (binder_uintptr_t __user *)ptr))
> + if (put_user_nopreempt(death->cookie,
> +       (binder_uintptr_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(binder_uintptr_t);
>   binder_stat_br(proc, thread, cmd);
> @@ -2420,10 +2504,10 @@ retry:
>   ALIGN(t->buffer->data_size,
>      sizeof(void *));
> 
> - if (put_user(cmd, (uint32_t __user *)ptr))
> + if (put_user_nopreempt(cmd, (uint32_t __user *) ptr))
>   return -EFAULT;
>   ptr += sizeof(uint32_t);
> - if (copy_to_user(ptr, &tr, sizeof(tr)))
> + if (copy_to_user_nopreempt(ptr, &tr, sizeof(tr)))
>   return -EFAULT;
>   ptr += sizeof(tr);
> 
> @@ -2465,7 +2549,8 @@ done:
>   binder_debug(BINDER_DEBUG_THREADS,
>       "%d:%d BR_SPAWN_LOOPER\n",
>       proc->pid, thread->pid);
> - if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
> + if (put_user_nopreempt(BR_SPAWN_LOOPER,
> +       (uint32_t __user *) buffer))
>   return -EFAULT;
>   binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
>   }
> @@ -2540,7 +2625,7 @@ static struct binder_thread
> *binder_get_thread(struct binder_proc *proc)
>   break;
>   }
>   if (*p == NULL) {
> - thread = kzalloc(sizeof(*thread), GFP_KERNEL);
> + thread = kzalloc_nopreempt(sizeof(*thread));
>   if (thread == NULL)
>   return NULL;
>   binder_stats_created(BINDER_STAT_THREAD);
> @@ -2644,7 +2729,7 @@ static int binder_ioctl_write_read(struct file *filp,
>   ret = -EINVAL;
>   goto out;
>   }
> - if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
> + if (copy_from_user_nopreempt(&bwr, ubuf, sizeof(bwr))) {
>   ret = -EFAULT;
>   goto out;
>   }
> @@ -2662,7 +2747,7 @@ static int binder_ioctl_write_read(struct file *filp,
>   trace_binder_write_done(ret);
>   if (ret < 0) {
>   bwr.read_consumed = 0;
> - if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
> + if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr)))
>   ret = -EFAULT;
>   goto out;
>   }
> @@ -2676,7 +2761,7 @@ static int binder_ioctl_write_read(struct file *filp,
>   if (!list_empty(&proc->todo))
>   wake_up_interruptible(&proc->wait);
>   if (ret < 0) {
> - if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
> + if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr)))
>   ret = -EFAULT;
>   goto out;
>   }
> @@ -2686,7 +2771,7 @@ static int binder_ioctl_write_read(struct file *filp,
>       proc->pid, thread->pid,
>       (u64)bwr.write_consumed, (u64)bwr.write_size,
>       (u64)bwr.read_consumed, (u64)bwr.read_size);
> - if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
> + if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr))) {
>   ret = -EFAULT;
>   goto out;
>   }
> @@ -2768,7 +2853,8 @@ static long binder_ioctl(struct file *filp,
> unsigned int cmd, unsigned long arg)
>   goto err;
>   break;
>   case BINDER_SET_MAX_THREADS:
> - if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) {
> + if (copy_from_user_nopreempt(&proc->max_threads, ubuf,
> +     sizeof(proc->max_threads))) {
>   ret = -EINVAL;
>   goto err;
>   }
> @@ -2791,9 +2877,9 @@ static long binder_ioctl(struct file *filp,
> unsigned int cmd, unsigned long arg)
>   ret = -EINVAL;
>   goto err;
>   }
> - if (put_user(BINDER_CURRENT_PROTOCOL_VERSION,
> -     &ver->protocol_version)) {
> - ret = -EINVAL;
> + if (put_user_nopreempt(BINDER_CURRENT_PROTOCOL_VERSION,
> +       &ver->protocol_version)) {
> + ret = -EINVAL;
>   goto err;
>   }
>   break;
> @@ -2854,6 +2940,7 @@ static const struct vm_operations_struct binder_vm_ops = {
>  static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
>  {
>   int ret;
> +
>   struct vm_struct *area;
>   struct binder_proc *proc = filp->private_data;
>   const char *failure_string;
> @@ -2914,7 +3001,12 @@ static int binder_mmap(struct file *filp,
> struct vm_area_struct *vma)
>   vma->vm_ops = &binder_vm_ops;
>   vma->vm_private_data = proc;
> 
> - if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer +
> PAGE_SIZE, vma)) {
> + /* binder_update_page_range assumes preemption is disabled */
> + preempt_disable();
> + ret = binder_update_page_range(proc, 1, proc->buffer,
> +       proc->buffer + PAGE_SIZE, vma);
> + preempt_enable_no_resched();
> + if (ret) {
>   ret = -ENOMEM;
>   failure_string = "alloc small buf";
>   goto err_alloc_small_buf_failed;
> @@ -3185,8 +3277,12 @@ static void binder_deferred_func(struct
> work_struct *work)
>   int defer;
> 
>   do {
> - binder_lock(__func__);
> + trace_binder_lock(__func__);
> + mutex_lock(&binder_main_lock);
> + trace_binder_locked(__func__);
> +
>   mutex_lock(&binder_deferred_lock);
> + preempt_disable();
>   if (!hlist_empty(&binder_deferred_list)) {
>   proc = hlist_entry(binder_deferred_list.first,
>   struct binder_proc, deferred_work_node);
> @@ -3212,7 +3308,9 @@ static void binder_deferred_func(struct work_struct *work)
>   if (defer & BINDER_DEFERRED_RELEASE)
>   binder_deferred_release(proc); /* frees proc */
> 
> - binder_unlock(__func__);
> + trace_binder_unlock(__func__);
> + mutex_unlock(&binder_main_lock);
> + preempt_enable_no_resched();
>   if (files)
>   put_files_struct(files);
>   } while (proc);
> -- 
> 2.8.0.rc3.226.g39d4020
---end quoted text---

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-10 16:16 ` Christoph Hellwig
@ 2016-09-10 16:22   ` Peter Zijlstra
  2016-09-10 16:37     ` Thomas Gleixner
  0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2016-09-10 16:22 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Todd Kjos, Greg Kroah-Hartman, Arve Hj??nnev??g, Riley Andrews,
	devel, linux-kernel, Thomas Gleixner

On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
> On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> > In Android systems, the display pipeline relies on low
> > latency binder transactions and is therefore sensitive to
> > delays caused by contention for the global binder lock.
> > Jank is siginificantly reduced by disabling preemption
> > while the global binder lock is held.
> 
> That's now how preempt_disable is supposed to use.  It is for critical

not, that's supposed to be _not_. Just to be absolutely clear, this is
NOT how you're supposed to use preempt_disable().

> sections that use per-cpu or similar resources.
> 
> > 
> > Originally-from: Riley Andrews <riandrews@google.com>
> > Signed-off-by: Todd Kjos <tkjos@google.com>

> > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> > binder_proc *proc, int flags)
> >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> >   unlock_task_sighand(proc->tsk, &irqs);
> > 
> > - return __alloc_fd(files, 0, rlim_cur, flags);
> > + preempt_enable_no_resched();
> > + ret = __alloc_fd(files, 0, rlim_cur, flags);
> > + preempt_disable();

And the fact that people want to use preempt_enable_no_resched() shows
that they're absolutely clueless.

This is so broken its not funny.

NAK NAK NAK

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-10 16:22   ` Peter Zijlstra
@ 2016-09-10 16:37     ` Thomas Gleixner
  2016-09-10 17:28       ` Greg Kroah-Hartman
  0 siblings, 1 reply; 27+ messages in thread
From: Thomas Gleixner @ 2016-09-10 16:37 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Christoph Hellwig, Todd Kjos, Greg Kroah-Hartman,
	Arve Hj??nnev??g, Riley Andrews, devel, linux-kernel

On Sat, 10 Sep 2016, Peter Zijlstra wrote:

> On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
> > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> > > In Android systems, the display pipeline relies on low
> > > latency binder transactions and is therefore sensitive to
> > > delays caused by contention for the global binder lock.
> > > Jank is siginificantly reduced by disabling preemption
> > > while the global binder lock is held.
> > 
> > That's now how preempt_disable is supposed to use.  It is for critical
> 
> not, that's supposed to be _not_. Just to be absolutely clear, this is
> NOT how you're supposed to use preempt_disable().
> 
> > sections that use per-cpu or similar resources.
> > 
> > > 
> > > Originally-from: Riley Andrews <riandrews@google.com>
> > > Signed-off-by: Todd Kjos <tkjos@google.com>
> 
> > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> > > binder_proc *proc, int flags)
> > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> > >   unlock_task_sighand(proc->tsk, &irqs);
> > > 
> > > - return __alloc_fd(files, 0, rlim_cur, flags);
> > > + preempt_enable_no_resched();
> > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
> > > + preempt_disable();
> 
> And the fact that people want to use preempt_enable_no_resched() shows
> that they're absolutely clueless.
> 
> This is so broken its not funny.
> 
> NAK NAK NAK

Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
documents clearly that this is tinkering and not proper software
engineering.

NAK from my side as well.

Thanks,

	Thomas

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-10 16:37     ` Thomas Gleixner
@ 2016-09-10 17:28       ` Greg Kroah-Hartman
  2016-09-12 15:49         ` Todd Kjos
  2016-09-13  3:44         ` Arve Hjønnevåg
  0 siblings, 2 replies; 27+ messages in thread
From: Greg Kroah-Hartman @ 2016-09-10 17:28 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Peter Zijlstra, devel, Riley Andrews, linux-kernel,
	Christoph Hellwig, Arve Hj??nnev??g, Todd Kjos

On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
> 
> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> > > > In Android systems, the display pipeline relies on low
> > > > latency binder transactions and is therefore sensitive to
> > > > delays caused by contention for the global binder lock.
> > > > Jank is siginificantly reduced by disabling preemption
> > > > while the global binder lock is held.
> > > 
> > > That's now how preempt_disable is supposed to use.  It is for critical
> > 
> > not, that's supposed to be _not_. Just to be absolutely clear, this is
> > NOT how you're supposed to use preempt_disable().
> > 
> > > sections that use per-cpu or similar resources.
> > > 
> > > > 
> > > > Originally-from: Riley Andrews <riandrews@google.com>
> > > > Signed-off-by: Todd Kjos <tkjos@google.com>
> > 
> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> > > > binder_proc *proc, int flags)
> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> > > >   unlock_task_sighand(proc->tsk, &irqs);
> > > > 
> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
> > > > + preempt_enable_no_resched();
> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
> > > > + preempt_disable();
> > 
> > And the fact that people want to use preempt_enable_no_resched() shows
> > that they're absolutely clueless.
> > 
> > This is so broken its not funny.
> > 
> > NAK NAK NAK
> 
> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
> documents clearly that this is tinkering and not proper software
> engineering.

I have pointed out in the other thread for this patch (the one that had
a patch that could be applied) that the single lock in the binder code
is the main problem here, it should be solved instead of this messing
around with priorities.

So don't worry, I'm not taking this change :)

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-10 17:28       ` Greg Kroah-Hartman
@ 2016-09-12 15:49         ` Todd Kjos
  2016-09-13  3:44         ` Arve Hjønnevåg
  1 sibling, 0 replies; 27+ messages in thread
From: Todd Kjos @ 2016-09-12 15:49 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Thomas Gleixner, Peter Zijlstra, devel, Riley Andrews,
	linux-kernel, Christoph Hellwig, Arve Hj??nnev??g, Todd Kjos

Thanks for the reviews. We'll come up with a different solution.

On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
> On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
>> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
>>
>> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
>> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
>> > > > In Android systems, the display pipeline relies on low
>> > > > latency binder transactions and is therefore sensitive to
>> > > > delays caused by contention for the global binder lock.
>> > > > Jank is siginificantly reduced by disabling preemption
>> > > > while the global binder lock is held.
>> > >
>> > > That's now how preempt_disable is supposed to use.  It is for critical
>> >
>> > not, that's supposed to be _not_. Just to be absolutely clear, this is
>> > NOT how you're supposed to use preempt_disable().
>> >
>> > > sections that use per-cpu or similar resources.
>> > >
>> > > >
>> > > > Originally-from: Riley Andrews <riandrews@google.com>
>> > > > Signed-off-by: Todd Kjos <tkjos@google.com>
>> >
>> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
>> > > > binder_proc *proc, int flags)
>> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>> > > >   unlock_task_sighand(proc->tsk, &irqs);
>> > > >
>> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
>> > > > + preempt_enable_no_resched();
>> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
>> > > > + preempt_disable();
>> >
>> > And the fact that people want to use preempt_enable_no_resched() shows
>> > that they're absolutely clueless.
>> >
>> > This is so broken its not funny.
>> >
>> > NAK NAK NAK
>>
>> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
>> documents clearly that this is tinkering and not proper software
>> engineering.
>
> I have pointed out in the other thread for this patch (the one that had
> a patch that could be applied) that the single lock in the binder code
> is the main problem here, it should be solved instead of this messing
> around with priorities.
>
> So don't worry, I'm not taking this change :)
>
> thanks,
>
> greg k-h

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-10 17:28       ` Greg Kroah-Hartman
  2016-09-12 15:49         ` Todd Kjos
@ 2016-09-13  3:44         ` Arve Hjønnevåg
  2016-09-13  6:42           ` Greg Kroah-Hartman
  2016-09-13  7:32           ` Peter Zijlstra
  1 sibling, 2 replies; 27+ messages in thread
From: Arve Hjønnevåg @ 2016-09-13  3:44 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Thomas Gleixner, Peter Zijlstra, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos

On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
> On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
>> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
>>
>> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
>> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
>> > > > In Android systems, the display pipeline relies on low
>> > > > latency binder transactions and is therefore sensitive to
>> > > > delays caused by contention for the global binder lock.
>> > > > Jank is siginificantly reduced by disabling preemption
>> > > > while the global binder lock is held.
>> > >
>> > > That's now how preempt_disable is supposed to use.  It is for critical
>> >
>> > not, that's supposed to be _not_. Just to be absolutely clear, this is
>> > NOT how you're supposed to use preempt_disable().
>> >
>> > > sections that use per-cpu or similar resources.
>> > >
>> > > >
>> > > > Originally-from: Riley Andrews <riandrews@google.com>
>> > > > Signed-off-by: Todd Kjos <tkjos@google.com>
>> >
>> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
>> > > > binder_proc *proc, int flags)
>> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>> > > >   unlock_task_sighand(proc->tsk, &irqs);
>> > > >
>> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
>> > > > + preempt_enable_no_resched();
>> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
>> > > > + preempt_disable();
>> >
>> > And the fact that people want to use preempt_enable_no_resched() shows
>> > that they're absolutely clueless.
>> >
>> > This is so broken its not funny.
>> >
>> > NAK NAK NAK
>>
>> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
>> documents clearly that this is tinkering and not proper software
>> engineering.
>
> I have pointed out in the other thread for this patch (the one that had
> a patch that could be applied) that the single lock in the binder code
> is the main problem here, it should be solved instead of this messing
> around with priorities.
>

While removing the single lock in the binder driver would help reduce
the problem that this patch tries to work around, it would not fix it.
The largest problems occur when a very low priority thread gets
preempted while holding the lock. When a high priority thread then
needs the same lock it can't get it. Changing the driver to use more
fine-grained locking would reduce the set of threads that can trigger
this problem, but there are processes that receive work from both high
and low priority threads and could still end up in the same situation.

A previous attempt to fix this problem, changed the lock to use
rt_mutex instead of mutex, but this apparently did not work as well as
this patch. I believe the added overhead was noticeable, and it did
not work when the preempted thread was in a different cgroup (I don't
know if this is still the case).

It would be useful to generic solution to this problem.

> So don't worry, I'm not taking this change :)
>
> thanks,
>
> greg k-h



-- 
Arve Hjønnevåg

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-13  3:44         ` Arve Hjønnevåg
@ 2016-09-13  6:42           ` Greg Kroah-Hartman
  2016-09-13 19:52             ` Arve Hjønnevåg
  2016-09-13  7:32           ` Peter Zijlstra
  1 sibling, 1 reply; 27+ messages in thread
From: Greg Kroah-Hartman @ 2016-09-13  6:42 UTC (permalink / raw)
  To: Arve Hjønnevåg
  Cc: devel, Peter Zijlstra, LKML, Christoph Hellwig, Riley Andrews,
	Thomas Gleixner, Todd Kjos

On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:
> On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman
> <gregkh@linuxfoundation.org> wrote:
> > On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
> >> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
> >>
> >> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
> >> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
> >> > > > In Android systems, the display pipeline relies on low
> >> > > > latency binder transactions and is therefore sensitive to
> >> > > > delays caused by contention for the global binder lock.
> >> > > > Jank is siginificantly reduced by disabling preemption
> >> > > > while the global binder lock is held.
> >> > >
> >> > > That's now how preempt_disable is supposed to use.  It is for critical
> >> >
> >> > not, that's supposed to be _not_. Just to be absolutely clear, this is
> >> > NOT how you're supposed to use preempt_disable().
> >> >
> >> > > sections that use per-cpu or similar resources.
> >> > >
> >> > > >
> >> > > > Originally-from: Riley Andrews <riandrews@google.com>
> >> > > > Signed-off-by: Todd Kjos <tkjos@google.com>
> >> >
> >> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
> >> > > > binder_proc *proc, int flags)
> >> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> >> > > >   unlock_task_sighand(proc->tsk, &irqs);
> >> > > >
> >> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
> >> > > > + preempt_enable_no_resched();
> >> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
> >> > > > + preempt_disable();
> >> >
> >> > And the fact that people want to use preempt_enable_no_resched() shows
> >> > that they're absolutely clueless.
> >> >
> >> > This is so broken its not funny.
> >> >
> >> > NAK NAK NAK
> >>
> >> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
> >> documents clearly that this is tinkering and not proper software
> >> engineering.
> >
> > I have pointed out in the other thread for this patch (the one that had
> > a patch that could be applied) that the single lock in the binder code
> > is the main problem here, it should be solved instead of this messing
> > around with priorities.
> >
> 
> While removing the single lock in the binder driver would help reduce
> the problem that this patch tries to work around, it would not fix it.
> The largest problems occur when a very low priority thread gets
> preempted while holding the lock. When a high priority thread then
> needs the same lock it can't get it. Changing the driver to use more
> fine-grained locking would reduce the set of threads that can trigger
> this problem, but there are processes that receive work from both high
> and low priority threads and could still end up in the same situation.

Ok, but how would this patch solve the problem?  It only reduces the
window of when the preemption could occur, it doesn't stop it from
happening entirely.

> A previous attempt to fix this problem, changed the lock to use
> rt_mutex instead of mutex, but this apparently did not work as well as
> this patch. I believe the added overhead was noticeable, and it did
> not work when the preempted thread was in a different cgroup (I don't
> know if this is still the case).

Why would rt_mutex solve this?

I worry that this is only going to get worse as more portions of the
Android userspace/HAL get rewritten to use binder more and more.  What
about trying to get rid of the lock entirely?  That way the task
priority levels would work "properly" here.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-13  3:44         ` Arve Hjønnevåg
  2016-09-13  6:42           ` Greg Kroah-Hartman
@ 2016-09-13  7:32           ` Peter Zijlstra
  2016-09-13 19:53             ` Arve Hjønnevåg
  1 sibling, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2016-09-13  7:32 UTC (permalink / raw)
  To: Arve Hjønnevåg
  Cc: Greg Kroah-Hartman, Thomas Gleixner, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos

On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:

> A previous attempt to fix this problem, changed the lock to use
> rt_mutex instead of mutex, but this apparently did not work as well as
> this patch. I believe the added overhead was noticeable, and it did
> not work when the preempted thread was in a different cgroup (I don't
> know if this is still the case).

Do you actually have RR/FIFO/DL tasks? Currently PI isn't
defined/implemented for OTHER.

cgroups should be irrelevant, PI is unaware of them.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-13  6:42           ` Greg Kroah-Hartman
@ 2016-09-13 19:52             ` Arve Hjønnevåg
  0 siblings, 0 replies; 27+ messages in thread
From: Arve Hjønnevåg @ 2016-09-13 19:52 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: devel, Peter Zijlstra, LKML, Christoph Hellwig, Riley Andrews,
	Thomas Gleixner, Todd Kjos

On Mon, Sep 12, 2016 at 11:42 PM, Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
> On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:
>> On Sat, Sep 10, 2016 at 10:28 AM, Greg Kroah-Hartman
>> <gregkh@linuxfoundation.org> wrote:
>> > On Sat, Sep 10, 2016 at 06:37:29PM +0200, Thomas Gleixner wrote:
>> >> On Sat, 10 Sep 2016, Peter Zijlstra wrote:
>> >>
>> >> > On Sat, Sep 10, 2016 at 09:16:59AM -0700, Christoph Hellwig wrote:
>> >> > > On Thu, Sep 08, 2016 at 09:12:50AM -0700, Todd Kjos wrote:
>> >> > > > In Android systems, the display pipeline relies on low
>> >> > > > latency binder transactions and is therefore sensitive to
>> >> > > > delays caused by contention for the global binder lock.
>> >> > > > Jank is siginificantly reduced by disabling preemption
>> >> > > > while the global binder lock is held.
>> >> > >
>> >> > > That's now how preempt_disable is supposed to use.  It is for critical
>> >> >
>> >> > not, that's supposed to be _not_. Just to be absolutely clear, this is
>> >> > NOT how you're supposed to use preempt_disable().
>> >> >
>> >> > > sections that use per-cpu or similar resources.
>> >> > >
>> >> > > >
>> >> > > > Originally-from: Riley Andrews <riandrews@google.com>
>> >> > > > Signed-off-by: Todd Kjos <tkjos@google.com>
>> >> >
>> >> > > > @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct
>> >> > > > binder_proc *proc, int flags)
>> >> > > >   rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>> >> > > >   unlock_task_sighand(proc->tsk, &irqs);
>> >> > > >
>> >> > > > - return __alloc_fd(files, 0, rlim_cur, flags);
>> >> > > > + preempt_enable_no_resched();
>> >> > > > + ret = __alloc_fd(files, 0, rlim_cur, flags);
>> >> > > > + preempt_disable();
>> >> >
>> >> > And the fact that people want to use preempt_enable_no_resched() shows
>> >> > that they're absolutely clueless.
>> >> >
>> >> > This is so broken its not funny.
>> >> >
>> >> > NAK NAK NAK
>> >>
>> >> Indeed. Sprinkling random preempt_enabe/disable() pairs all over the place
>> >> documents clearly that this is tinkering and not proper software
>> >> engineering.
>> >
>> > I have pointed out in the other thread for this patch (the one that had
>> > a patch that could be applied) that the single lock in the binder code
>> > is the main problem here, it should be solved instead of this messing
>> > around with priorities.
>> >
>>
>> While removing the single lock in the binder driver would help reduce
>> the problem that this patch tries to work around, it would not fix it.
>> The largest problems occur when a very low priority thread gets
>> preempted while holding the lock. When a high priority thread then
>> needs the same lock it can't get it. Changing the driver to use more
>> fine-grained locking would reduce the set of threads that can trigger
>> this problem, but there are processes that receive work from both high
>> and low priority threads and could still end up in the same situation.
>
> Ok, but how would this patch solve the problem?  It only reduces the
> window of when the preemption could occur, it doesn't stop it from
> happening entirely.
>

I agree, this patch does not _solve_ the problem either. It only
reduces it, as there are many places where it re-enables preemption
for significant work.

>> A previous attempt to fix this problem, changed the lock to use
>> rt_mutex instead of mutex, but this apparently did not work as well as
>> this patch. I believe the added overhead was noticeable, and it did
>> not work when the preempted thread was in a different cgroup (I don't
>> know if this is still the case).
>
> Why would rt_mutex solve this?

If the task that holds the lock would run when you try to get the
lock, there would be no long delay to get the lock. rt_mutex seems to
be designed to solve this problem.

>
> I worry that this is only going to get worse as more portions of the
> Android userspace/HAL get rewritten to use binder more and more.  What
> about trying to get rid of the lock entirely?  That way the task
> priority levels would work "properly" here.
>

I think a good first step would be to switch to locks with a smaller
scope. I'm not how useful it would be to eliminate locks in this
driver since it calls into other code that uses locks.

> thanks,
>
> greg k-h

-- 
Arve Hjønnevåg

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-13  7:32           ` Peter Zijlstra
@ 2016-09-13 19:53             ` Arve Hjønnevåg
  2016-09-14  7:10               ` Peter Zijlstra
  2016-09-14 16:11               ` Peter Zijlstra
  0 siblings, 2 replies; 27+ messages in thread
From: Arve Hjønnevåg @ 2016-09-13 19:53 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Greg Kroah-Hartman, Thomas Gleixner, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos

On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:
>
>> A previous attempt to fix this problem, changed the lock to use
>> rt_mutex instead of mutex, but this apparently did not work as well as
>> this patch. I believe the added overhead was noticeable, and it did
>> not work when the preempted thread was in a different cgroup (I don't
>> know if this is still the case).
>
> Do you actually have RR/FIFO/DL tasks? Currently PI isn't
> defined/implemented for OTHER.
>

Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything
in the rtmutex code or documentation that indicates that they don't
work for normal tasks. From what I can tell the priority gets boosted
in every case. This may not work as well for CFS tasks as for realtime
tasks, but it should at least help when there is a large priority
difference.

> cgroups should be irrelevant, PI is unaware of them.

I don't think cgroups are irrelevant. PI being unaware of them
explains the problem I described. If the task that holds the lock is
in a cgroup that has a low cpu.shares value, then boosting the task's
priority within that group does necessarily make it any more likely to
run.

-- 
Arve Hjønnevåg

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-13 19:53             ` Arve Hjønnevåg
@ 2016-09-14  7:10               ` Peter Zijlstra
  2016-09-14  7:41                 ` Peter Zijlstra
  2016-09-14 13:38                 ` Peter Zijlstra
  2016-09-14 16:11               ` Peter Zijlstra
  1 sibling, 2 replies; 27+ messages in thread
From: Peter Zijlstra @ 2016-09-14  7:10 UTC (permalink / raw)
  To: Arve Hjønnevåg
  Cc: Greg Kroah-Hartman, Thomas Gleixner, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos

On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
> On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Mon, Sep 12, 2016 at 08:44:09PM -0700, Arve Hjønnevåg wrote:
> >
> >> A previous attempt to fix this problem, changed the lock to use
> >> rt_mutex instead of mutex, but this apparently did not work as well as
> >> this patch. I believe the added overhead was noticeable, and it did
> >> not work when the preempted thread was in a different cgroup (I don't
> >> know if this is still the case).
> >
> > Do you actually have RR/FIFO/DL tasks? Currently PI isn't
> > defined/implemented for OTHER.
> >
> 
> Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything
> in the rtmutex code or documentation that indicates that they don't
> work for normal tasks. From what I can tell the priority gets boosted
> in every case. This may not work as well for CFS tasks as for realtime
> tasks, but it should at least help when there is a large priority
> difference.

It does something (it used to explicitly ignore OTHER) but its not
something well defined or usable.

> > cgroups should be irrelevant, PI is unaware of them.
> 
> I don't think cgroups are irrelevant. PI being unaware of them
> explains the problem I described. If the task that holds the lock is
> in a cgroup that has a low cpu.shares value, then boosting the task's
> priority within that group does necessarily make it any more likely to
> run.

See, the problem is that 'priority' is a meaningless concept for
OTHER/CFS.

In any case, typically only RT tasks care about PI, and the traditional
Priority Inheritance algorithm only really works correctly for FIFO. As
is RR has issues and DL is a crude hack, CFS is really just an accident
by not explicitly exempting it.

We could define a meaningful something for CFS and implement that, but
it isn't currently done.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-14  7:10               ` Peter Zijlstra
@ 2016-09-14  7:41                 ` Peter Zijlstra
  2016-09-14 13:38                 ` Peter Zijlstra
  1 sibling, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2016-09-14  7:41 UTC (permalink / raw)
  To: Arve Hjønnevåg
  Cc: Greg Kroah-Hartman, Thomas Gleixner, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos

On Wed, Sep 14, 2016 at 09:10:01AM +0200, Peter Zijlstra wrote:
> We could define a meaningful something for CFS and implement that, but
> it isn't currently done.

So the generalization of the Priority Inheritance Protocol is Proxy
Execution Protocol, which basically lets the boosted task run _as_ the
task on the block chain as picked by the schedule function (treating all
of them as runnable). Where 'as' means it really consumes scheduling
resources of the (blocked) donor task.

Since the scheduling function for FIFO is: pick the highest prio one and
go for it, this trivially reduces to PI for FIFO.

Now, Proxy Execution Protocol is 'trivial' to implement on UP, but has a
few wobbles on SMP.

But we can use it to define a sensible definition for a WFQ class
scheduler (like CFS). For these the scheduling function basically runs
the boosted task as every donor task on the block chain gets their slice.
Alternatively, since it treats all 'blocked' tasks as runnable, the
total weight of the boosted task is its own weight plus the sum of
weight on the block chain.

Which is something that shouldn't be too hard to implement, but very
much isn't what happens today.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-14  7:10               ` Peter Zijlstra
  2016-09-14  7:41                 ` Peter Zijlstra
@ 2016-09-14 13:38                 ` Peter Zijlstra
  1 sibling, 0 replies; 27+ messages in thread
From: Peter Zijlstra @ 2016-09-14 13:38 UTC (permalink / raw)
  To: Arve Hjønnevåg
  Cc: Greg Kroah-Hartman, Thomas Gleixner, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos

On Wed, Sep 14, 2016 at 09:10:01AM +0200, Peter Zijlstra wrote:
> On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:

> > Most of the tasks here are not RR/FIFO/DL tasks. I don't see anything
> > in the rtmutex code or documentation that indicates that they don't
> > work for normal tasks. From what I can tell the priority gets boosted
> > in every case. This may not work as well for CFS tasks as for realtime
> > tasks, but it should at least help when there is a large priority
> > difference.
> 
> It does something (it used to explicitly ignore OTHER) but its not
> something well defined or usable.

I looked again, and while it updates the ->prio field for OTHER tasks,
that does not seem to cause a change to the actual weight field (which
is derived from ->static_prio).

So it really should not do anything.. as I remebered it.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-13 19:53             ` Arve Hjønnevåg
  2016-09-14  7:10               ` Peter Zijlstra
@ 2016-09-14 16:11               ` Peter Zijlstra
  2016-09-14 16:13                 ` Peter Zijlstra
  1 sibling, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2016-09-14 16:11 UTC (permalink / raw)
  To: Arve Hjønnevåg
  Cc: Greg Kroah-Hartman, Thomas Gleixner, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos

On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
> On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > cgroups should be irrelevant, PI is unaware of them.
> 
> I don't think cgroups are irrelevant. PI being unaware of them
> explains the problem I described. If the task that holds the lock is
> in a cgroup that has a low cpu.shares value, then boosting the task's
> priority within that group does necessarily make it any more likely to
> run.

Thing is, for FIFO/DL the important parameters (prio and deadline resp.)
are not cgroup dependent.

For CFS you're right, and as per usual, cgroups will be a royal pain.
While we can compute the total weight in the block chain, getting that
back to a task which is stuck in a cgroup is just not going to work
well.

The only 'solution' I can come up with in a hurry is, when the task is
boosted, move it to the root cgroup. That of course has a whole heap of
problems all on its own.

/me curses @ cgroups.. bloody stupid things.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-14 16:11               ` Peter Zijlstra
@ 2016-09-14 16:13                 ` Peter Zijlstra
  2016-09-14 16:55                   ` Peter Zijlstra
  0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2016-09-14 16:13 UTC (permalink / raw)
  To: Arve Hjønnevåg
  Cc: Greg Kroah-Hartman, Thomas Gleixner, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos

On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote:
> On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
> > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > > cgroups should be irrelevant, PI is unaware of them.
> > 
> > I don't think cgroups are irrelevant. PI being unaware of them
> > explains the problem I described. If the task that holds the lock is
> > in a cgroup that has a low cpu.shares value, then boosting the task's
> > priority within that group does necessarily make it any more likely to
> > run.
> 
> Thing is, for FIFO/DL the important parameters (prio and deadline resp.)
> are not cgroup dependent.
> 
> For CFS you're right, and as per usual, cgroups will be a royal pain.
> While we can compute the total weight in the block chain, getting that
> back to a task which is stuck in a cgroup is just not going to work
> well.

Not to mention the fact that the weight depends on the current running
state. Having those tasks block insta changes the actual weight.

> /me curses @ cgroups.. bloody stupid things.

More of that.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-14 16:13                 ` Peter Zijlstra
@ 2016-09-14 16:55                   ` Peter Zijlstra
  2016-09-17  0:42                     ` Todd Kjos
  0 siblings, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2016-09-14 16:55 UTC (permalink / raw)
  To: Arve Hjønnevåg
  Cc: Greg Kroah-Hartman, Thomas Gleixner, devel, Riley Andrews, LKML,
	Christoph Hellwig, Todd Kjos, Ingo Molnar

On Wed, Sep 14, 2016 at 06:13:40PM +0200, Peter Zijlstra wrote:
> On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote:
> > On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
> > > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> > > > cgroups should be irrelevant, PI is unaware of them.
> > > 
> > > I don't think cgroups are irrelevant. PI being unaware of them
> > > explains the problem I described. If the task that holds the lock is
> > > in a cgroup that has a low cpu.shares value, then boosting the task's
> > > priority within that group does necessarily make it any more likely to
> > > run.
> > 
> > Thing is, for FIFO/DL the important parameters (prio and deadline resp.)
> > are not cgroup dependent.
> > 
> > For CFS you're right, and as per usual, cgroups will be a royal pain.
> > While we can compute the total weight in the block chain, getting that
> > back to a task which is stuck in a cgroup is just not going to work
> > well.
> 
> Not to mention the fact that the weight depends on the current running
> state. Having those tasks block insta changes the actual weight.
> 
> > /me curses @ cgroups.. bloody stupid things.
> 
> More of that.


Something a little like so... completely untested, and has a fair number
of corner cases still left open I think..


--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1691,8 +1691,8 @@ struct task_struct {
 	struct seccomp seccomp;
 
 /* Thread group tracking */
-   	u32 parent_exec_id;
-   	u32 self_exec_id;
+	u32 parent_exec_id;
+	u32 self_exec_id;
 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
  * mempolicy */
 	spinlock_t alloc_lock;
@@ -1710,6 +1710,11 @@ struct task_struct {
 	struct task_struct *pi_top_task;
 	/* Deadlock detection and priority inheritance handling */
 	struct rt_mutex_waiter *pi_blocked_on;
+	unsigned long pi_weight;
+#ifdef CONFIG_CGROUP_SCHED
+	struct task_group *orig_task_group;
+	unsigned long normal_weight;
+#endif
 #endif
 
 #ifdef CONFIG_DEBUG_MUTEXES
@@ -1977,6 +1982,8 @@ static inline int tsk_nr_cpus_allowed(st
 	return p->nr_cpus_allowed;
 }
 
+extern unsigned long task_pi_weight(struct task_struct *p);
+
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -20,6 +20,19 @@
 #include "rtmutex_common.h"
 
 /*
+ * !(rt_prio || dl_prio)
+ */
+static inline bool other_prio(int prio)
+{
+	return prio >= MAX_RT_PRIO;
+}
+
+static inline bool other_task(struct task_struct *task)
+{
+	return other_prio(task->prio);
+}
+
+/*
  * lock->owner state tracking:
  *
  * lock->owner holds the task_struct pointer of the owner. Bit 0
@@ -226,6 +239,11 @@ rt_mutex_enqueue(struct rt_mutex *lock,
 
 	rb_link_node(&waiter->tree_entry, parent, link);
 	rb_insert_color(&waiter->tree_entry, &lock->waiters);
+	/*
+	 * We want the lock->waiter_weight to reflect the sum of all queued
+	 * waiters.
+	 */
+	lock->waiters_weight += waiter->weight;
 }
 
 static void
@@ -239,6 +257,7 @@ rt_mutex_dequeue(struct rt_mutex *lock,
 
 	rb_erase(&waiter->tree_entry, &lock->waiters);
 	RB_CLEAR_NODE(&waiter->tree_entry);
+	lock->waiters_weight += waiter->weight;
 }
 
 static void
@@ -265,6 +284,18 @@ rt_mutex_enqueue_pi(struct task_struct *
 
 	rb_link_node(&waiter->pi_tree_entry, parent, link);
 	rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
+
+	/*
+	 * Since a task can own multiple locks, and we have one pi_waiter
+	 * for every lock, propagate the lock->waiter_weight, which is the sum
+	 * of all weights queued on that lock, into the top waiter, and add
+	 * that to the owning task's pi_weight.
+	 *
+	 * This results in pi_weight being the sum of all blocked waiters
+	 * on this task.
+	 */
+	waiter->top_weight = waiter->lock->waiters_weight;
+	task->pi_weight += waiter->top_weight;
 }
 
 static void
@@ -278,6 +309,7 @@ rt_mutex_dequeue_pi(struct task_struct *
 
 	rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
 	RB_CLEAR_NODE(&waiter->pi_tree_entry);
+	task->pi_weight -= waiter->top_weight;
 }
 
 static void rt_mutex_adjust_prio(struct task_struct *p)
@@ -497,7 +529,7 @@ static int rt_mutex_adjust_prio_chain(st
 		 * detection is enabled we continue, but stop the
 		 * requeueing in the chain walk.
 		 */
-		if (top_waiter != task_top_pi_waiter(task)) {
+		if (top_waiter != task_top_pi_waiter(task) && !other_task(task)) {
 			if (!detect_deadlock)
 				goto out_unlock_pi;
 			else
@@ -512,7 +544,7 @@ static int rt_mutex_adjust_prio_chain(st
 	 * enabled we continue, but stop the requeueing in the chain
 	 * walk.
 	 */
-	if (rt_mutex_waiter_equal(waiter, cmp_task(task))) {
+	if (rt_mutex_waiter_equal(waiter, cmp_task(task)) && !other_task(task)) {
 		if (!detect_deadlock)
 			goto out_unlock_pi;
 		else
@@ -627,6 +659,11 @@ static int rt_mutex_adjust_prio_chain(st
 	 */
 	waiter->prio = task->prio;
 	waiter->deadline = task->dl.deadline;
+	/*
+	 * The weight of the task depends on the block chain, since
+	 * we're iterating that, its likely to have changed.
+	 */
+	waiter->weight = task_pi_weight(task);
 
 	rt_mutex_enqueue(lock, waiter);
 
@@ -685,6 +722,15 @@ static int rt_mutex_adjust_prio_chain(st
 		waiter = rt_mutex_top_waiter(lock);
 		rt_mutex_enqueue_pi(task, waiter);
 		rt_mutex_adjust_prio(task);
+
+	} else if (other_task(task)) {
+		/*
+		 * Propagate the lock->waiters_weight into task->pi_weight
+		 */
+		rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
+		rt_mutex_enqueue_pi(task, prerequeue_top_waiter);
+		rt_mutex_adjust_prio(task);
+
 	} else {
 		/*
 		 * Nothing changed. No need to do any priority
@@ -728,7 +774,7 @@ static int rt_mutex_adjust_prio_chain(st
 	 * then we can stop the chain walk here if we are not in full
 	 * deadlock detection mode.
 	 */
-	if (!detect_deadlock && waiter != top_waiter)
+	if (!detect_deadlock && waiter != top_waiter && !other_task(task))
 		goto out_put_task;
 
 	goto again;
@@ -904,6 +950,7 @@ static int task_blocks_on_rt_mutex(struc
 	waiter->lock = lock;
 	waiter->prio = task->prio;
 	waiter->deadline = task->dl.deadline;
+	waiter->weight = task_pi_weight(task);
 
 	/* Get the top priority waiter on the lock */
 	if (rt_mutex_has_waiters(lock))
@@ -918,7 +965,7 @@ static int task_blocks_on_rt_mutex(struc
 		return 0;
 
 	raw_spin_lock(&owner->pi_lock);
-	if (waiter == rt_mutex_top_waiter(lock)) {
+	if (other_task(owner) || waiter == rt_mutex_top_waiter(lock)) {
 		rt_mutex_dequeue_pi(owner, top_waiter);
 		rt_mutex_enqueue_pi(owner, waiter);
 
@@ -1017,7 +1064,8 @@ static void mark_wakeup_next_waiter(stru
 static void remove_waiter(struct rt_mutex *lock,
 			  struct rt_mutex_waiter *waiter)
 {
-	bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
+	struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
+	bool is_top_waiter = (waiter == top_waiter);
 	struct task_struct *owner = rt_mutex_owner(lock);
 	struct rt_mutex *next_lock;
 
@@ -1032,12 +1080,12 @@ static void remove_waiter(struct rt_mute
 	 * Only update priority if the waiter was the highest priority
 	 * waiter of the lock and there is an owner to update.
 	 */
-	if (!owner || !is_top_waiter)
+	if (!owner || (!other_task(owner) && !is_top_waiter))
 		return;
 
 	raw_spin_lock(&owner->pi_lock);
 
-	rt_mutex_dequeue_pi(owner, waiter);
+	rt_mutex_dequeue_pi(owner, top_waiter);
 
 	if (rt_mutex_has_waiters(lock))
 		rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -34,6 +34,7 @@ struct rt_mutex_waiter {
 #endif
 	int prio;
 	u64 deadline;
+	unsigned weight, top_weight;
 };
 
 /*
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3725,6 +3725,8 @@ void rt_mutex_setprio(struct task_struct
 		if (rt_prio(oldprio))
 			p->rt.timeout = 0;
 		p->sched_class = &fair_sched_class;
+
+		task_set_pi_weight(p);
 	}
 
 	p->prio = prio;
@@ -7933,6 +7935,12 @@ static void sched_change_group(struct ta
 	tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
 			  struct task_group, css);
 	tg = autogroup_task_group(tsk, tg);
+
+#ifdef CONFIG_RT_MUTEXES
+	tsk->orig_task_group = tg;
+
+	if (!tsk->pi_weight)
+#endif
 	tsk->sched_task_group = tg;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6646,6 +6646,32 @@ static void attach_tasks(struct lb_env *
 	raw_spin_unlock(&env->dst_rq->lock);
 }
 
+#ifdef CONFIG_RT_MUTEXES
+/*
+ * See set_load_weight().
+ */
+static inline unsigned long __task_normal_weight(struct task_struct *p)
+{
+	int prio = p->static_prio - MAX_RT_PRIO;
+
+	/*
+	 * SCHED_IDLE tasks get minimal weight:
+	 */
+	if (idle_policy(p->policy))
+		return scale_load(WEIGHT_IDLEPRIO);
+
+	return scale_load(sched_prio_to_weight[prio]);
+}
+
+
+static unsigned long task_normal_weight(struct task_struct *p);
+
+unsigned long task_pi_weight(struct task_struct *p)
+{
+	return task_normal_weight(p) + p->pi_weight;
+}
+#endif
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void update_blocked_averages(int cpu)
 {
@@ -6717,7 +6743,80 @@ static unsigned long task_h_load(struct
 	return div64_ul(p->se.avg.load_avg * cfs_rq->h_load,
 			cfs_rq_load_avg(cfs_rq) + 1);
 }
-#else
+
+#ifdef CONFIG_RT_MUTEXES
+/*
+ * Humongous horrid hack.. because cgroups bloody stink.
+ *
+ * The idea with PI on CFS is to sum the weight of all blocked tasks but with
+ * cgroups the weight of a task depends on the running state of tasks. Blocking
+ * changes the weight.
+ *
+ * Paper over that problem by using the regular averages, and hoping boosting
+ * is short enough to not actually matter much.
+ *
+ * The next problem is that getting that weight back to the boosted task
+ * requires lifting it out of its cgroup. Ideally we'd place it in the first
+ * common parent, but *shees* then we'd have to compute that, so bail and stick
+ * it in the root group.
+ */
+
+static unsigned long task_normal_weight(struct task_struct *p)
+{
+	struct cfs_rq *cfs_rq = task_cfs_rq(p);
+
+	/*
+	 * Once we have ->pi_weight, we'll get boosted into the root group
+	 * and the below falls apart.
+	 */
+	if (!p->pi_weight) {
+		update_cfs_rq_h_load(cfs_rq);
+		p->normal_weight = 
+			div64_ul(__task_normal_weight(p) * cfs_rq->h_load,
+					cfs_rq_load_avg(cfs_rq) + 1);
+	}
+
+	return p->normal_weight;
+}
+
+static void detach_task_cfs_rq(struct task_struct *p);
+static void attach_task_cfs_rq(struct task_struct *p);
+
+void task_set_pi_weight(struct task_struct *p)
+{
+	unsigned long normal_weight = p->normal_weight;
+	struct task_group *tg = &root_task_group;
+	bool move_group;
+
+	if (!p->pi_weight) {
+		tg = p->orig_task_group;
+		normal_weight = __task_normal_weight(p);
+	}
+
+	move_group = p->sched_task_group != tg;
+
+	if (move_group) {
+		p->sched_task_group = tg;
+
+		detach_task_cfs_rq(p);
+		set_task_rq(p, task_cpu(p));
+
+#ifdef CONFIG_SMP
+		/* Tell se's cfs_rq has been changed -- migrated */
+		p->se.avg.last_update_time = 0;
+#endif
+	}
+
+	update_load_set(&p->se.load, normal_weight + p->pi_weight);
+
+	if (move_group)
+		attach_task_cfs_rq(p);
+}
+
+#endif
+
+#else /* CONFIG_FAIR_GROUP_SCHED */
+
 static inline void update_blocked_averages(int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
@@ -6734,8 +6833,21 @@ static unsigned long task_h_load(struct
 {
 	return p->se.avg.load_avg;
 }
+
+#ifdef CONFIG_RT_MUTEXES
+static unsigned long task_normal_weight(struct task_struct *p)
+{
+	return __task_normal_weight(p);
+}
+
+void task_set_pi_weight(struct task_struct *p)
+{
+	update_load_set(&p->se.load, task_pi_weight(p));
+}
 #endif
 
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
 /********** Helpers for find_busiest_group ************************/
 
 enum group_type {
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1827,3 +1827,5 @@ static inline void cpufreq_trigger_updat
 #else /* arch_scale_freq_capacity */
 #define arch_scale_freq_invariant()	(false)
 #endif
+
+extern void task_set_pi_weight(struct task_struct *p);

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-14 16:55                   ` Peter Zijlstra
@ 2016-09-17  0:42                     ` Todd Kjos
  0 siblings, 0 replies; 27+ messages in thread
From: Todd Kjos @ 2016-09-17  0:42 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Arve Hjønnevåg, Greg Kroah-Hartman, Thomas Gleixner,
	devel, Riley Andrews, LKML, Christoph Hellwig, Todd Kjos,
	Ingo Molnar

Thanks Peter. We'll give that patch a try as part of our refactoring.
Looking at finer-grained locking and we'll try going back to rt_mutex
plus this patch.

On Wed, Sep 14, 2016 at 9:55 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Sep 14, 2016 at 06:13:40PM +0200, Peter Zijlstra wrote:
>> On Wed, Sep 14, 2016 at 06:11:03PM +0200, Peter Zijlstra wrote:
>> > On Tue, Sep 13, 2016 at 12:53:27PM -0700, Arve Hjønnevåg wrote:
>> > > On Tue, Sep 13, 2016 at 12:32 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>> > > > cgroups should be irrelevant, PI is unaware of them.
>> > >
>> > > I don't think cgroups are irrelevant. PI being unaware of them
>> > > explains the problem I described. If the task that holds the lock is
>> > > in a cgroup that has a low cpu.shares value, then boosting the task's
>> > > priority within that group does necessarily make it any more likely to
>> > > run.
>> >
>> > Thing is, for FIFO/DL the important parameters (prio and deadline resp.)
>> > are not cgroup dependent.
>> >
>> > For CFS you're right, and as per usual, cgroups will be a royal pain.
>> > While we can compute the total weight in the block chain, getting that
>> > back to a task which is stuck in a cgroup is just not going to work
>> > well.
>>
>> Not to mention the fact that the weight depends on the current running
>> state. Having those tasks block insta changes the actual weight.
>>
>> > /me curses @ cgroups.. bloody stupid things.
>>
>> More of that.
>
>
> Something a little like so... completely untested, and has a fair number
> of corner cases still left open I think..
>
>
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1691,8 +1691,8 @@ struct task_struct {
>         struct seccomp seccomp;
>
>  /* Thread group tracking */
> -       u32 parent_exec_id;
> -       u32 self_exec_id;
> +       u32 parent_exec_id;
> +       u32 self_exec_id;
>  /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
>   * mempolicy */
>         spinlock_t alloc_lock;
> @@ -1710,6 +1710,11 @@ struct task_struct {
>         struct task_struct *pi_top_task;
>         /* Deadlock detection and priority inheritance handling */
>         struct rt_mutex_waiter *pi_blocked_on;
> +       unsigned long pi_weight;
> +#ifdef CONFIG_CGROUP_SCHED
> +       struct task_group *orig_task_group;
> +       unsigned long normal_weight;
> +#endif
>  #endif
>
>  #ifdef CONFIG_DEBUG_MUTEXES
> @@ -1977,6 +1982,8 @@ static inline int tsk_nr_cpus_allowed(st
>         return p->nr_cpus_allowed;
>  }
>
> +extern unsigned long task_pi_weight(struct task_struct *p);
> +
>  #define TNF_MIGRATED   0x01
>  #define TNF_NO_GROUP   0x02
>  #define TNF_SHARED     0x04
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -20,6 +20,19 @@
>  #include "rtmutex_common.h"
>
>  /*
> + * !(rt_prio || dl_prio)
> + */
> +static inline bool other_prio(int prio)
> +{
> +       return prio >= MAX_RT_PRIO;
> +}
> +
> +static inline bool other_task(struct task_struct *task)
> +{
> +       return other_prio(task->prio);
> +}
> +
> +/*
>   * lock->owner state tracking:
>   *
>   * lock->owner holds the task_struct pointer of the owner. Bit 0
> @@ -226,6 +239,11 @@ rt_mutex_enqueue(struct rt_mutex *lock,
>
>         rb_link_node(&waiter->tree_entry, parent, link);
>         rb_insert_color(&waiter->tree_entry, &lock->waiters);
> +       /*
> +        * We want the lock->waiter_weight to reflect the sum of all queued
> +        * waiters.
> +        */
> +       lock->waiters_weight += waiter->weight;
>  }
>
>  static void
> @@ -239,6 +257,7 @@ rt_mutex_dequeue(struct rt_mutex *lock,
>
>         rb_erase(&waiter->tree_entry, &lock->waiters);
>         RB_CLEAR_NODE(&waiter->tree_entry);
> +       lock->waiters_weight += waiter->weight;
>  }
>
>  static void
> @@ -265,6 +284,18 @@ rt_mutex_enqueue_pi(struct task_struct *
>
>         rb_link_node(&waiter->pi_tree_entry, parent, link);
>         rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
> +
> +       /*
> +        * Since a task can own multiple locks, and we have one pi_waiter
> +        * for every lock, propagate the lock->waiter_weight, which is the sum
> +        * of all weights queued on that lock, into the top waiter, and add
> +        * that to the owning task's pi_weight.
> +        *
> +        * This results in pi_weight being the sum of all blocked waiters
> +        * on this task.
> +        */
> +       waiter->top_weight = waiter->lock->waiters_weight;
> +       task->pi_weight += waiter->top_weight;
>  }
>
>  static void
> @@ -278,6 +309,7 @@ rt_mutex_dequeue_pi(struct task_struct *
>
>         rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
>         RB_CLEAR_NODE(&waiter->pi_tree_entry);
> +       task->pi_weight -= waiter->top_weight;
>  }
>
>  static void rt_mutex_adjust_prio(struct task_struct *p)
> @@ -497,7 +529,7 @@ static int rt_mutex_adjust_prio_chain(st
>                  * detection is enabled we continue, but stop the
>                  * requeueing in the chain walk.
>                  */
> -               if (top_waiter != task_top_pi_waiter(task)) {
> +               if (top_waiter != task_top_pi_waiter(task) && !other_task(task)) {
>                         if (!detect_deadlock)
>                                 goto out_unlock_pi;
>                         else
> @@ -512,7 +544,7 @@ static int rt_mutex_adjust_prio_chain(st
>          * enabled we continue, but stop the requeueing in the chain
>          * walk.
>          */
> -       if (rt_mutex_waiter_equal(waiter, cmp_task(task))) {
> +       if (rt_mutex_waiter_equal(waiter, cmp_task(task)) && !other_task(task)) {
>                 if (!detect_deadlock)
>                         goto out_unlock_pi;
>                 else
> @@ -627,6 +659,11 @@ static int rt_mutex_adjust_prio_chain(st
>          */
>         waiter->prio = task->prio;
>         waiter->deadline = task->dl.deadline;
> +       /*
> +        * The weight of the task depends on the block chain, since
> +        * we're iterating that, its likely to have changed.
> +        */
> +       waiter->weight = task_pi_weight(task);
>
>         rt_mutex_enqueue(lock, waiter);
>
> @@ -685,6 +722,15 @@ static int rt_mutex_adjust_prio_chain(st
>                 waiter = rt_mutex_top_waiter(lock);
>                 rt_mutex_enqueue_pi(task, waiter);
>                 rt_mutex_adjust_prio(task);
> +
> +       } else if (other_task(task)) {
> +               /*
> +                * Propagate the lock->waiters_weight into task->pi_weight
> +                */
> +               rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
> +               rt_mutex_enqueue_pi(task, prerequeue_top_waiter);
> +               rt_mutex_adjust_prio(task);
> +
>         } else {
>                 /*
>                  * Nothing changed. No need to do any priority
> @@ -728,7 +774,7 @@ static int rt_mutex_adjust_prio_chain(st
>          * then we can stop the chain walk here if we are not in full
>          * deadlock detection mode.
>          */
> -       if (!detect_deadlock && waiter != top_waiter)
> +       if (!detect_deadlock && waiter != top_waiter && !other_task(task))
>                 goto out_put_task;
>
>         goto again;
> @@ -904,6 +950,7 @@ static int task_blocks_on_rt_mutex(struc
>         waiter->lock = lock;
>         waiter->prio = task->prio;
>         waiter->deadline = task->dl.deadline;
> +       waiter->weight = task_pi_weight(task);
>
>         /* Get the top priority waiter on the lock */
>         if (rt_mutex_has_waiters(lock))
> @@ -918,7 +965,7 @@ static int task_blocks_on_rt_mutex(struc
>                 return 0;
>
>         raw_spin_lock(&owner->pi_lock);
> -       if (waiter == rt_mutex_top_waiter(lock)) {
> +       if (other_task(owner) || waiter == rt_mutex_top_waiter(lock)) {
>                 rt_mutex_dequeue_pi(owner, top_waiter);
>                 rt_mutex_enqueue_pi(owner, waiter);
>
> @@ -1017,7 +1064,8 @@ static void mark_wakeup_next_waiter(stru
>  static void remove_waiter(struct rt_mutex *lock,
>                           struct rt_mutex_waiter *waiter)
>  {
> -       bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
> +       struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
> +       bool is_top_waiter = (waiter == top_waiter);
>         struct task_struct *owner = rt_mutex_owner(lock);
>         struct rt_mutex *next_lock;
>
> @@ -1032,12 +1080,12 @@ static void remove_waiter(struct rt_mute
>          * Only update priority if the waiter was the highest priority
>          * waiter of the lock and there is an owner to update.
>          */
> -       if (!owner || !is_top_waiter)
> +       if (!owner || (!other_task(owner) && !is_top_waiter))
>                 return;
>
>         raw_spin_lock(&owner->pi_lock);
>
> -       rt_mutex_dequeue_pi(owner, waiter);
> +       rt_mutex_dequeue_pi(owner, top_waiter);
>
>         if (rt_mutex_has_waiters(lock))
>                 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
> --- a/kernel/locking/rtmutex_common.h
> +++ b/kernel/locking/rtmutex_common.h
> @@ -34,6 +34,7 @@ struct rt_mutex_waiter {
>  #endif
>         int prio;
>         u64 deadline;
> +       unsigned weight, top_weight;
>  };
>
>  /*
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3725,6 +3725,8 @@ void rt_mutex_setprio(struct task_struct
>                 if (rt_prio(oldprio))
>                         p->rt.timeout = 0;
>                 p->sched_class = &fair_sched_class;
> +
> +               task_set_pi_weight(p);
>         }
>
>         p->prio = prio;
> @@ -7933,6 +7935,12 @@ static void sched_change_group(struct ta
>         tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
>                           struct task_group, css);
>         tg = autogroup_task_group(tsk, tg);
> +
> +#ifdef CONFIG_RT_MUTEXES
> +       tsk->orig_task_group = tg;
> +
> +       if (!tsk->pi_weight)
> +#endif
>         tsk->sched_task_group = tg;
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6646,6 +6646,32 @@ static void attach_tasks(struct lb_env *
>         raw_spin_unlock(&env->dst_rq->lock);
>  }
>
> +#ifdef CONFIG_RT_MUTEXES
> +/*
> + * See set_load_weight().
> + */
> +static inline unsigned long __task_normal_weight(struct task_struct *p)
> +{
> +       int prio = p->static_prio - MAX_RT_PRIO;
> +
> +       /*
> +        * SCHED_IDLE tasks get minimal weight:
> +        */
> +       if (idle_policy(p->policy))
> +               return scale_load(WEIGHT_IDLEPRIO);
> +
> +       return scale_load(sched_prio_to_weight[prio]);
> +}
> +
> +
> +static unsigned long task_normal_weight(struct task_struct *p);
> +
> +unsigned long task_pi_weight(struct task_struct *p)
> +{
> +       return task_normal_weight(p) + p->pi_weight;
> +}
> +#endif
> +
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>  static void update_blocked_averages(int cpu)
>  {
> @@ -6717,7 +6743,80 @@ static unsigned long task_h_load(struct
>         return div64_ul(p->se.avg.load_avg * cfs_rq->h_load,
>                         cfs_rq_load_avg(cfs_rq) + 1);
>  }
> -#else
> +
> +#ifdef CONFIG_RT_MUTEXES
> +/*
> + * Humongous horrid hack.. because cgroups bloody stink.
> + *
> + * The idea with PI on CFS is to sum the weight of all blocked tasks but with
> + * cgroups the weight of a task depends on the running state of tasks. Blocking
> + * changes the weight.
> + *
> + * Paper over that problem by using the regular averages, and hoping boosting
> + * is short enough to not actually matter much.
> + *
> + * The next problem is that getting that weight back to the boosted task
> + * requires lifting it out of its cgroup. Ideally we'd place it in the first
> + * common parent, but *shees* then we'd have to compute that, so bail and stick
> + * it in the root group.
> + */
> +
> +static unsigned long task_normal_weight(struct task_struct *p)
> +{
> +       struct cfs_rq *cfs_rq = task_cfs_rq(p);
> +
> +       /*
> +        * Once we have ->pi_weight, we'll get boosted into the root group
> +        * and the below falls apart.
> +        */
> +       if (!p->pi_weight) {
> +               update_cfs_rq_h_load(cfs_rq);
> +               p->normal_weight =
> +                       div64_ul(__task_normal_weight(p) * cfs_rq->h_load,
> +                                       cfs_rq_load_avg(cfs_rq) + 1);
> +       }
> +
> +       return p->normal_weight;
> +}
> +
> +static void detach_task_cfs_rq(struct task_struct *p);
> +static void attach_task_cfs_rq(struct task_struct *p);
> +
> +void task_set_pi_weight(struct task_struct *p)
> +{
> +       unsigned long normal_weight = p->normal_weight;
> +       struct task_group *tg = &root_task_group;
> +       bool move_group;
> +
> +       if (!p->pi_weight) {
> +               tg = p->orig_task_group;
> +               normal_weight = __task_normal_weight(p);
> +       }
> +
> +       move_group = p->sched_task_group != tg;
> +
> +       if (move_group) {
> +               p->sched_task_group = tg;
> +
> +               detach_task_cfs_rq(p);
> +               set_task_rq(p, task_cpu(p));
> +
> +#ifdef CONFIG_SMP
> +               /* Tell se's cfs_rq has been changed -- migrated */
> +               p->se.avg.last_update_time = 0;
> +#endif
> +       }
> +
> +       update_load_set(&p->se.load, normal_weight + p->pi_weight);
> +
> +       if (move_group)
> +               attach_task_cfs_rq(p);
> +}
> +
> +#endif
> +
> +#else /* CONFIG_FAIR_GROUP_SCHED */
> +
>  static inline void update_blocked_averages(int cpu)
>  {
>         struct rq *rq = cpu_rq(cpu);
> @@ -6734,8 +6833,21 @@ static unsigned long task_h_load(struct
>  {
>         return p->se.avg.load_avg;
>  }
> +
> +#ifdef CONFIG_RT_MUTEXES
> +static unsigned long task_normal_weight(struct task_struct *p)
> +{
> +       return __task_normal_weight(p);
> +}
> +
> +void task_set_pi_weight(struct task_struct *p)
> +{
> +       update_load_set(&p->se.load, task_pi_weight(p));
> +}
>  #endif
>
> +#endif /* CONFIG_FAIR_GROUP_SCHED */
> +
>  /********** Helpers for find_busiest_group ************************/
>
>  enum group_type {
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1827,3 +1827,5 @@ static inline void cpufreq_trigger_updat
>  #else /* arch_scale_freq_capacity */
>  #define arch_scale_freq_invariant()    (false)
>  #endif
> +
> +extern void task_set_pi_weight(struct task_struct *p);

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-10 11:18     ` Greg KH
@ 2016-09-10 11:25         ` Greg KH
  0 siblings, 0 replies; 27+ messages in thread
From: Greg KH @ 2016-09-10 11:25 UTC (permalink / raw)
  To: Todd Kjos; +Cc: arve, riandrews, devel, linux-kernel, linux-mm

On Sat, Sep 10, 2016 at 01:18:47PM +0200, Greg KH wrote:
> On Fri, Sep 09, 2016 at 10:39:32AM -0700, Todd Kjos wrote:
> > On Fri, Sep 9, 2016 at 8:44 AM, Greg KH <gregkh@linuxfoundation.org> wrote:
> > > On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
> > >> From: Todd Kjos <tkjos@android.com>
> > >>
> > >> In Android systems, the display pipeline relies on low
> > >> latency binder transactions and is therefore sensitive to
> > >> delays caused by contention for the global binder lock.
> > >> Jank is significantly reduced by disabling preemption
> > >> while the global binder lock is held.
> > >
> > > What is the technical definition of "Jank"?  :)
> > 
> > I'll rephrase in the next version to "dropped or delayed frames".
> 
> Heh, thanks :)
> 
> Also in the next version can you fix the errors found by the 0-day build
> bot?
> 
> > >> This patch was originated by Riley Andrews <riandrews@android.com>
> > >> with tweaks and forward-porting by me.
> > >>
> > >> Originally-from: Riley Andrews <riandrews@android.com>
> > >> Signed-off-by: Todd Kjos <tkjos@android.com>
> > >> ---
> > >>  drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
> > >>  1 file changed, 146 insertions(+), 48 deletions(-)
> > >>
> > >> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > >> index 16288e7..c36e420 100644
> > >> --- a/drivers/android/binder.c
> > >> +++ b/drivers/android/binder.c
> > >> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> > >>       struct files_struct *files = proc->files;
> > >>       unsigned long rlim_cur;
> > >>       unsigned long irqs;
> > >> +     int ret;
> > >>
> > >>       if (files == NULL)
> > >>               return -ESRCH;
> > >> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> > >>       rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> > >>       unlock_task_sighand(proc->tsk, &irqs);
> > >>
> > >> -     return __alloc_fd(files, 0, rlim_cur, flags);
> > >> +     preempt_enable_no_resched();
> > >> +     ret = __alloc_fd(files, 0, rlim_cur, flags);
> > >> +     preempt_disable();
> > >> +
> > >> +     return ret;
> > >>  }
> > >>
> > >>  /*
> > >> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> > >>  static void task_fd_install(
> > >>       struct binder_proc *proc, unsigned int fd, struct file *file)
> > >>  {
> > >> -     if (proc->files)
> > >> +     if (proc->files) {
> > >> +             preempt_enable_no_resched();
> > >>               __fd_install(proc->files, fd, file);
> > >> +             preempt_disable();
> > >> +     }
> > >>  }
> > >>
> > >>  /*
> > >> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
> > >>  {
> > >>       trace_binder_lock(tag);
> > >>       mutex_lock(&binder_main_lock);
> > >> +     preempt_disable();
> > >>       trace_binder_locked(tag);
> > >>  }
> > >>
> > >> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
> > >>  {
> > >>       trace_binder_unlock(tag);
> > >>       mutex_unlock(&binder_main_lock);
> > >> +     preempt_enable();
> > >> +}
> > >> +
> > >> +static inline void *kzalloc_nopreempt(size_t size)
> > >> +{
> > >> +     void *ptr;
> > >> +
> > >> +     ptr = kzalloc(size, GFP_NOWAIT);
> > >> +     if (ptr)
> > >> +             return ptr;
> > >> +
> > >> +     preempt_enable_no_resched();
> > >> +     ptr = kzalloc(size, GFP_KERNEL);
> > >> +     preempt_disable();
> > >
> > > Doesn't the allocator retry if the first one fails anyway?  Why not
> > > GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
> > > usage?
> > 
> > I suspect we have hit the second, since we do get into cases where
> > direct reclaim is needed. I can't confirm since I haven't instrumented
> > this case. As you say, if we use GFP_ATOMIC instead, maybe we
> > wouldn't, but even then I'd be concerned that we could deplete the
> > memory reserved for atomic. The general idea of trying for a fast,
> > nowait allocation and then enabling preempt for the rare potentially
> > blocking allocation seems reasonable, doesn't it?
> 
> Yes it is, so much so that I think there's a generic kernel function for
> it already.  Adding in the linux-mm mailing list to be told that I'm
> wrong about this :)

Ok, adding the correct linux-mm list address this time...

greg k-h

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
@ 2016-09-10 11:25         ` Greg KH
  0 siblings, 0 replies; 27+ messages in thread
From: Greg KH @ 2016-09-10 11:25 UTC (permalink / raw)
  To: Todd Kjos; +Cc: arve, riandrews, devel, linux-kernel, linux-mm

On Sat, Sep 10, 2016 at 01:18:47PM +0200, Greg KH wrote:
> On Fri, Sep 09, 2016 at 10:39:32AM -0700, Todd Kjos wrote:
> > On Fri, Sep 9, 2016 at 8:44 AM, Greg KH <gregkh@linuxfoundation.org> wrote:
> > > On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
> > >> From: Todd Kjos <tkjos@android.com>
> > >>
> > >> In Android systems, the display pipeline relies on low
> > >> latency binder transactions and is therefore sensitive to
> > >> delays caused by contention for the global binder lock.
> > >> Jank is significantly reduced by disabling preemption
> > >> while the global binder lock is held.
> > >
> > > What is the technical definition of "Jank"?  :)
> > 
> > I'll rephrase in the next version to "dropped or delayed frames".
> 
> Heh, thanks :)
> 
> Also in the next version can you fix the errors found by the 0-day build
> bot?
> 
> > >> This patch was originated by Riley Andrews <riandrews@android.com>
> > >> with tweaks and forward-porting by me.
> > >>
> > >> Originally-from: Riley Andrews <riandrews@android.com>
> > >> Signed-off-by: Todd Kjos <tkjos@android.com>
> > >> ---
> > >>  drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
> > >>  1 file changed, 146 insertions(+), 48 deletions(-)
> > >>
> > >> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> > >> index 16288e7..c36e420 100644
> > >> --- a/drivers/android/binder.c
> > >> +++ b/drivers/android/binder.c
> > >> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> > >>       struct files_struct *files = proc->files;
> > >>       unsigned long rlim_cur;
> > >>       unsigned long irqs;
> > >> +     int ret;
> > >>
> > >>       if (files == NULL)
> > >>               return -ESRCH;
> > >> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> > >>       rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> > >>       unlock_task_sighand(proc->tsk, &irqs);
> > >>
> > >> -     return __alloc_fd(files, 0, rlim_cur, flags);
> > >> +     preempt_enable_no_resched();
> > >> +     ret = __alloc_fd(files, 0, rlim_cur, flags);
> > >> +     preempt_disable();
> > >> +
> > >> +     return ret;
> > >>  }
> > >>
> > >>  /*
> > >> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> > >>  static void task_fd_install(
> > >>       struct binder_proc *proc, unsigned int fd, struct file *file)
> > >>  {
> > >> -     if (proc->files)
> > >> +     if (proc->files) {
> > >> +             preempt_enable_no_resched();
> > >>               __fd_install(proc->files, fd, file);
> > >> +             preempt_disable();
> > >> +     }
> > >>  }
> > >>
> > >>  /*
> > >> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
> > >>  {
> > >>       trace_binder_lock(tag);
> > >>       mutex_lock(&binder_main_lock);
> > >> +     preempt_disable();
> > >>       trace_binder_locked(tag);
> > >>  }
> > >>
> > >> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
> > >>  {
> > >>       trace_binder_unlock(tag);
> > >>       mutex_unlock(&binder_main_lock);
> > >> +     preempt_enable();
> > >> +}
> > >> +
> > >> +static inline void *kzalloc_nopreempt(size_t size)
> > >> +{
> > >> +     void *ptr;
> > >> +
> > >> +     ptr = kzalloc(size, GFP_NOWAIT);
> > >> +     if (ptr)
> > >> +             return ptr;
> > >> +
> > >> +     preempt_enable_no_resched();
> > >> +     ptr = kzalloc(size, GFP_KERNEL);
> > >> +     preempt_disable();
> > >
> > > Doesn't the allocator retry if the first one fails anyway?  Why not
> > > GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
> > > usage?
> > 
> > I suspect we have hit the second, since we do get into cases where
> > direct reclaim is needed. I can't confirm since I haven't instrumented
> > this case. As you say, if we use GFP_ATOMIC instead, maybe we
> > wouldn't, but even then I'd be concerned that we could deplete the
> > memory reserved for atomic. The general idea of trying for a fast,
> > nowait allocation and then enabling preempt for the rare potentially
> > blocking allocation seems reasonable, doesn't it?
> 
> Yes it is, so much so that I think there's a generic kernel function for
> it already.  Adding in the linux-mm mailing list to be told that I'm
> wrong about this :)

Ok, adding the correct linux-mm list address this time...

greg k-h

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-09 17:39   ` Todd Kjos
@ 2016-09-10 11:18     ` Greg KH
  2016-09-10 11:25         ` Greg KH
  0 siblings, 1 reply; 27+ messages in thread
From: Greg KH @ 2016-09-10 11:18 UTC (permalink / raw)
  To: Todd Kjos; +Cc: arve, riandrews, devel, linux-kernel, linux-mm

On Fri, Sep 09, 2016 at 10:39:32AM -0700, Todd Kjos wrote:
> On Fri, Sep 9, 2016 at 8:44 AM, Greg KH <gregkh@linuxfoundation.org> wrote:
> > On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
> >> From: Todd Kjos <tkjos@android.com>
> >>
> >> In Android systems, the display pipeline relies on low
> >> latency binder transactions and is therefore sensitive to
> >> delays caused by contention for the global binder lock.
> >> Jank is significantly reduced by disabling preemption
> >> while the global binder lock is held.
> >
> > What is the technical definition of "Jank"?  :)
> 
> I'll rephrase in the next version to "dropped or delayed frames".

Heh, thanks :)

Also in the next version can you fix the errors found by the 0-day build
bot?

> >> This patch was originated by Riley Andrews <riandrews@android.com>
> >> with tweaks and forward-porting by me.
> >>
> >> Originally-from: Riley Andrews <riandrews@android.com>
> >> Signed-off-by: Todd Kjos <tkjos@android.com>
> >> ---
> >>  drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
> >>  1 file changed, 146 insertions(+), 48 deletions(-)
> >>
> >> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> >> index 16288e7..c36e420 100644
> >> --- a/drivers/android/binder.c
> >> +++ b/drivers/android/binder.c
> >> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> >>       struct files_struct *files = proc->files;
> >>       unsigned long rlim_cur;
> >>       unsigned long irqs;
> >> +     int ret;
> >>
> >>       if (files == NULL)
> >>               return -ESRCH;
> >> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> >>       rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
> >>       unlock_task_sighand(proc->tsk, &irqs);
> >>
> >> -     return __alloc_fd(files, 0, rlim_cur, flags);
> >> +     preempt_enable_no_resched();
> >> +     ret = __alloc_fd(files, 0, rlim_cur, flags);
> >> +     preempt_disable();
> >> +
> >> +     return ret;
> >>  }
> >>
> >>  /*
> >> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
> >>  static void task_fd_install(
> >>       struct binder_proc *proc, unsigned int fd, struct file *file)
> >>  {
> >> -     if (proc->files)
> >> +     if (proc->files) {
> >> +             preempt_enable_no_resched();
> >>               __fd_install(proc->files, fd, file);
> >> +             preempt_disable();
> >> +     }
> >>  }
> >>
> >>  /*
> >> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
> >>  {
> >>       trace_binder_lock(tag);
> >>       mutex_lock(&binder_main_lock);
> >> +     preempt_disable();
> >>       trace_binder_locked(tag);
> >>  }
> >>
> >> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
> >>  {
> >>       trace_binder_unlock(tag);
> >>       mutex_unlock(&binder_main_lock);
> >> +     preempt_enable();
> >> +}
> >> +
> >> +static inline void *kzalloc_nopreempt(size_t size)
> >> +{
> >> +     void *ptr;
> >> +
> >> +     ptr = kzalloc(size, GFP_NOWAIT);
> >> +     if (ptr)
> >> +             return ptr;
> >> +
> >> +     preempt_enable_no_resched();
> >> +     ptr = kzalloc(size, GFP_KERNEL);
> >> +     preempt_disable();
> >
> > Doesn't the allocator retry if the first one fails anyway?  Why not
> > GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
> > usage?
> 
> I suspect we have hit the second, since we do get into cases where
> direct reclaim is needed. I can't confirm since I haven't instrumented
> this case. As you say, if we use GFP_ATOMIC instead, maybe we
> wouldn't, but even then I'd be concerned that we could deplete the
> memory reserved for atomic. The general idea of trying for a fast,
> nowait allocation and then enabling preempt for the rare potentially
> blocking allocation seems reasonable, doesn't it?

Yes it is, so much so that I think there's a generic kernel function for
it already.  Adding in the linux-mm mailing list to be told that I'm
wrong about this :)

> >> +     return ptr;
> >> +}
> >> +
> >> +static inline long copy_to_user_nopreempt(void __user *to,
> >> +                                       const void *from, long n)
> >> +{
> >> +     long ret;
> >> +
> >> +     preempt_enable_no_resched();
> >> +     ret = copy_to_user(to, from, n);
> >> +     preempt_disable();
> >> +     return ret;
> >> +}
> >> +
> >> +static inline long copy_from_user_nopreempt(void *to,
> >> +                                         const void __user *from,
> >> +                                         long n)
> >> +{
> >> +     long ret;
> >> +
> >> +     preempt_enable_no_resched();
> >> +     ret = copy_from_user(to, from, n);
> >> +     preempt_disable();
> >> +     return ret;
> >>  }
> >>
> >> +#define get_user_nopreempt(x, ptr)   \
> >> +({                                           \
> >> +     int __ret;                              \
> >> +     preempt_enable_no_resched();            \
> >> +     __ret = get_user(x, ptr);               \
> >> +     preempt_disable();                      \
> >> +     __ret;                                  \
> >> +})
> >> +
> >> +#define put_user_nopreempt(x, ptr)   \
> >> +({                                           \
> >> +     int __ret;                              \
> >> +     preempt_enable_no_resched();            \
> >> +     __ret = put_user(x, ptr);               \
> >> +     preempt_disable();                      \
> >> +     __ret;                                  \
> >> +})
> >
> > Any reason some of these are #defines and some are static inline
> > functions?
> 
> Not that I know of. I'll change to static inline for the next version.
> 
> >
> > Anyway, these all seem a bit strange to me, what type of latency spikes
> > are you seeing that these changes resolve?  Shouldn't that be an issue
> > with the scheduler more than just the binder driver?
> 
> Well, it certainly is partially a scheduler issue in the sense that we
> are using SCHED_NORMAL for latency sensitive tasks that comprise the
> display pipeline (surfaceflinger, UI threads, Render threads). This
> means that a task being preempted might have to wait for a while
> before getting scheduled again. It would be nice to use SCHED_FIFO for
> these, but at the moment we can't do that since that opens us up to
> user code DOS'ing cores. There are some ideas being investigated here,
> but that's a ways off.
> 
> It is also partially a binder issue since there is a single global
> mutex that is acquired for any binder transaction. There are periods
> where the IPC rate is very high. Most of those binder calls are not so
> latency sensitive that a little contention for the binder lock
> matters, but the binder calls between RenderThreads and Surfaceflinger
> are sensitive since there are frame deadlines that need to be met. The
> more time spent holding the binder lock, the higher probability for
> contention and resulting delays which can cause us to miss the
> deadline for the frame.
> 
> Without this change, it is common for the task holding the mutex be
> preempted, creating a backlog of tasks waiting on the mutex. We
> measured the lock being held for over 400us regularly in these cases,
> greatly increasing the likelyhood of contention for the mutex. Since
> there a several binder calls in the pipeline as part of drawing a
> frame, this was observed to induce more than 10ms of frame delay and
> sometimes as high as 20ms which translates directly to missed frames.
> This change fixed that issue.

This single-mutex problem has always bothered me about binder, and it's
just going to get worse with the upcoming changes in the Android systems
to rely more on binder.

So perhaps that should be fixed instead of messing around with
scheduling issues?  Messing with the scheduler is just going to cause
more and more problems over time, and make it harder to have to "tune" a
system for good "jank" given the horrid hacks the vendors already do to
the kernel scheduler that aren't upstream :(

Can we break the mutex up a bit?  That seems like the best solution in
the long run (becides moving to a bus1-based-solution which is probably
much longer term...)

> > I don't know of any other driver or IPC that does this type of thing
> > with the scheduler in order to make things "go faster", so it feels
> > wrong to me, and is probably why we don't have global functions like
> > put_user_nopreempt() :)
> 
> I suspect that for most folks, "go faster" means "increase
> throughput". In this case "go faster" means "decrease frame render
> times" (or, more precisely "meet frame deadlines"). Maybe binder is
> special.

Yes, binder is unique and special, just like everyone else :)

"meet frame deadlines" is a very good and specific measurement, so you
are doing well there, but messing with the preempt logic to try to
achieve this seems like an odd solution when the lock seems like a
better long-term issue to be solved.

> > And is enabling and disabling preemption around single byte copies
> > to/from userspace really a good idea?  That seems like a lot of overhead
> > you are now adding to your "fastpath" that you need to go even faster.
> 
> Don't the user copy functions assume that preemption is enabled?

Yes. Well, usually, but not always, they are messy :)

> If it is safe to avoid the toggling here, then I'd agree that it isn't
> a "good idea". The overhead of this toggle is in the noise compared a
> task being preempted while holding the binder lock.

Yes, but you are doing this just for single character read/writes, the
set-up/tear-down for doing that seems huge, shouldn't binder look to
make those be a bit more "batched"?  That might improve the throughput
as well, as it would reduce the amount of time the global mutex was
held.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-09 15:44 ` Greg KH
@ 2016-09-09 17:39   ` Todd Kjos
  2016-09-10 11:18     ` Greg KH
  0 siblings, 1 reply; 27+ messages in thread
From: Todd Kjos @ 2016-09-09 17:39 UTC (permalink / raw)
  To: Greg KH; +Cc: arve, riandrews, devel, linux-kernel

On Fri, Sep 9, 2016 at 8:44 AM, Greg KH <gregkh@linuxfoundation.org> wrote:
> On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
>> From: Todd Kjos <tkjos@android.com>
>>
>> In Android systems, the display pipeline relies on low
>> latency binder transactions and is therefore sensitive to
>> delays caused by contention for the global binder lock.
>> Jank is significantly reduced by disabling preemption
>> while the global binder lock is held.
>
> What is the technical definition of "Jank"?  :)

I'll rephrase in the next version to "dropped or delayed frames".

>
>>
>> This patch was originated by Riley Andrews <riandrews@android.com>
>> with tweaks and forward-porting by me.
>>
>> Originally-from: Riley Andrews <riandrews@android.com>
>> Signed-off-by: Todd Kjos <tkjos@android.com>
>> ---
>>  drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
>>  1 file changed, 146 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
>> index 16288e7..c36e420 100644
>> --- a/drivers/android/binder.c
>> +++ b/drivers/android/binder.c
>> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
>>       struct files_struct *files = proc->files;
>>       unsigned long rlim_cur;
>>       unsigned long irqs;
>> +     int ret;
>>
>>       if (files == NULL)
>>               return -ESRCH;
>> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
>>       rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>>       unlock_task_sighand(proc->tsk, &irqs);
>>
>> -     return __alloc_fd(files, 0, rlim_cur, flags);
>> +     preempt_enable_no_resched();
>> +     ret = __alloc_fd(files, 0, rlim_cur, flags);
>> +     preempt_disable();
>> +
>> +     return ret;
>>  }
>>
>>  /*
>> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
>>  static void task_fd_install(
>>       struct binder_proc *proc, unsigned int fd, struct file *file)
>>  {
>> -     if (proc->files)
>> +     if (proc->files) {
>> +             preempt_enable_no_resched();
>>               __fd_install(proc->files, fd, file);
>> +             preempt_disable();
>> +     }
>>  }
>>
>>  /*
>> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
>>  {
>>       trace_binder_lock(tag);
>>       mutex_lock(&binder_main_lock);
>> +     preempt_disable();
>>       trace_binder_locked(tag);
>>  }
>>
>> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
>>  {
>>       trace_binder_unlock(tag);
>>       mutex_unlock(&binder_main_lock);
>> +     preempt_enable();
>> +}
>> +
>> +static inline void *kzalloc_nopreempt(size_t size)
>> +{
>> +     void *ptr;
>> +
>> +     ptr = kzalloc(size, GFP_NOWAIT);
>> +     if (ptr)
>> +             return ptr;
>> +
>> +     preempt_enable_no_resched();
>> +     ptr = kzalloc(size, GFP_KERNEL);
>> +     preempt_disable();
>
> Doesn't the allocator retry if the first one fails anyway?  Why not
> GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
> usage?

I suspect we have hit the second, since we do get into cases where
direct reclaim is needed. I can't confirm since I haven't instrumented
this case. As you say, if we use GFP_ATOMIC instead, maybe we
wouldn't, but even then I'd be concerned that we could deplete the
memory reserved for atomic. The general idea of trying for a fast,
nowait allocation and then enabling preempt for the rare potentially
blocking allocation seems reasonable, doesn't it?

>> +
>> +     return ptr;
>> +}
>> +
>> +static inline long copy_to_user_nopreempt(void __user *to,
>> +                                       const void *from, long n)
>> +{
>> +     long ret;
>> +
>> +     preempt_enable_no_resched();
>> +     ret = copy_to_user(to, from, n);
>> +     preempt_disable();
>> +     return ret;
>> +}
>> +
>> +static inline long copy_from_user_nopreempt(void *to,
>> +                                         const void __user *from,
>> +                                         long n)
>> +{
>> +     long ret;
>> +
>> +     preempt_enable_no_resched();
>> +     ret = copy_from_user(to, from, n);
>> +     preempt_disable();
>> +     return ret;
>>  }
>>
>> +#define get_user_nopreempt(x, ptr)   \
>> +({                                           \
>> +     int __ret;                              \
>> +     preempt_enable_no_resched();            \
>> +     __ret = get_user(x, ptr);               \
>> +     preempt_disable();                      \
>> +     __ret;                                  \
>> +})
>> +
>> +#define put_user_nopreempt(x, ptr)   \
>> +({                                           \
>> +     int __ret;                              \
>> +     preempt_enable_no_resched();            \
>> +     __ret = put_user(x, ptr);               \
>> +     preempt_disable();                      \
>> +     __ret;                                  \
>> +})
>
> Any reason some of these are #defines and some are static inline
> functions?

Not that I know of. I'll change to static inline for the next version.

>
> Anyway, these all seem a bit strange to me, what type of latency spikes
> are you seeing that these changes resolve?  Shouldn't that be an issue
> with the scheduler more than just the binder driver?

Well, it certainly is partially a scheduler issue in the sense that we
are using SCHED_NORMAL for latency sensitive tasks that comprise the
display pipeline (surfaceflinger, UI threads, Render threads). This
means that a task being preempted might have to wait for a while
before getting scheduled again. It would be nice to use SCHED_FIFO for
these, but at the moment we can't do that since that opens us up to
user code DOS'ing cores. There are some ideas being investigated here,
but that's a ways off.

It is also partially a binder issue since there is a single global
mutex that is acquired for any binder transaction. There are periods
where the IPC rate is very high. Most of those binder calls are not so
latency sensitive that a little contention for the binder lock
matters, but the binder calls between RenderThreads and Surfaceflinger
are sensitive since there are frame deadlines that need to be met. The
more time spent holding the binder lock, the higher probability for
contention and resulting delays which can cause us to miss the
deadline for the frame.

Without this change, it is common for the task holding the mutex be
preempted, creating a backlog of tasks waiting on the mutex. We
measured the lock being held for over 400us regularly in these cases,
greatly increasing the likelyhood of contention for the mutex. Since
there a several binder calls in the pipeline as part of drawing a
frame, this was observed to induce more than 10ms of frame delay and
sometimes as high as 20ms which translates directly to missed frames.
This change fixed that issue.

>
> I don't know of any other driver or IPC that does this type of thing
> with the scheduler in order to make things "go faster", so it feels
> wrong to me, and is probably why we don't have global functions like
> put_user_nopreempt() :)

I suspect that for most folks, "go faster" means "increase
throughput". In this case "go faster" means "decrease frame render
times" (or, more precisely "meet frame deadlines"). Maybe binder is
special.

>
> And is enabling and disabling preemption around single byte copies
> to/from userspace really a good idea?  That seems like a lot of overhead
> you are now adding to your "fastpath" that you need to go even faster.

Don't the user copy functions assume that preemption is enabled? If it
is safe to avoid the toggling here, then I'd agree that it isn't a
"good idea". The overhead of this toggle is in the noise compared a
task being preempted while holding the binder lock.

>
> And finally, I'm guessing this has passed the binder test suite that is
> out there for testing binder changes?  If so, can you please mention it
> in the changelog text?

Yes, it did. Will mention in the next version.

>
> thanks,
>
> greg k-h

-Todd

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-09 15:17 Todd Kjos
  2016-09-09 15:44 ` Greg KH
@ 2016-09-09 16:37 ` kbuild test robot
  1 sibling, 0 replies; 27+ messages in thread
From: kbuild test robot @ 2016-09-09 16:37 UTC (permalink / raw)
  To: Todd Kjos
  Cc: kbuild-all, gregkh, arve, riandrews, devel, linux-kernel, Todd Kjos

[-- Attachment #1: Type: text/plain, Size: 4386 bytes --]

Hi Todd,

[auto build test WARNING on staging/staging-testing]
[also build test WARNING on v4.8-rc5 next-20160909]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:    https://github.com/0day-ci/linux/commits/Todd-Kjos/android-binder-Disable-preemption-while-holding-the-global-binder-lock/20160909-233333
config: x86_64-randconfig-x008-201636 (attached as .config)
compiler: gcc-6 (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   drivers/android/binder.c: In function 'binder_thread_read':
>> drivers/android/binder.c:2432:4: warning: this 'else' clause does not guard... [-Wmisleading-indentation]
       else
       ^~~~
   drivers/android/binder.c:2434:5: note: ...this statement, but the latter is misleadingly indented as if it is guarded by the 'else'
        if (put_user_nopreempt(cmd,
        ^~

vim +/else +2432 drivers/android/binder.c

da49889d drivers/staging/android/binder.c Arve Hjønnevåg     2014-02-21  2416  						     proc->pid, thread->pid,
da49889d drivers/staging/android/binder.c Arve Hjønnevåg     2014-02-21  2417  						     node->debug_id,
da49889d drivers/staging/android/binder.c Arve Hjønnevåg     2014-02-21  2418  						     (u64)node->ptr,
da49889d drivers/staging/android/binder.c Arve Hjønnevåg     2014-02-21  2419  						     (u64)node->cookie);
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2420  				}
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2421  			}
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2422  		} break;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2423  		case BINDER_WORK_DEAD_BINDER:
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2424  		case BINDER_WORK_DEAD_BINDER_AND_CLEAR:
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2425  		case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: {
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2426  			struct binder_ref_death *death;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2427  			uint32_t cmd;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2428  
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2429  			death = container_of(w, struct binder_ref_death, work);
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2430  			if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION)
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2431  				cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30 @2432  			else
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2433  				cmd = BR_DEAD_BINDER;
ddd4adb7 drivers/android/binder.c         Todd Kjos          2016-09-09  2434  				if (put_user_nopreempt(cmd,
ddd4adb7 drivers/android/binder.c         Todd Kjos          2016-09-09  2435  					       (uint32_t __user *) ptr))
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2436  				return -EFAULT;
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2437  			ptr += sizeof(uint32_t);
ddd4adb7 drivers/android/binder.c         Todd Kjos          2016-09-09  2438  			if (put_user_nopreempt(death->cookie,
da49889d drivers/staging/android/binder.c Arve Hjønnevåg     2014-02-21  2439  				       (binder_uintptr_t __user *) ptr))
355b0502 drivers/staging/android/binder.c Greg Kroah-Hartman 2011-11-30  2440  				return -EFAULT;

:::::: The code at line 2432 was first introduced by commit
:::::: 355b0502f6efea0ff9492753888772c96972d2a3 Revert "Staging: android: delete android drivers"

:::::: TO: Greg Kroah-Hartman <gregkh@suse.de>
:::::: CC: Greg Kroah-Hartman <gregkh@suse.de>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 21144 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] android: binder: Disable preemption while holding the global binder lock
  2016-09-09 15:17 Todd Kjos
@ 2016-09-09 15:44 ` Greg KH
  2016-09-09 17:39   ` Todd Kjos
  2016-09-09 16:37 ` kbuild test robot
  1 sibling, 1 reply; 27+ messages in thread
From: Greg KH @ 2016-09-09 15:44 UTC (permalink / raw)
  To: Todd Kjos; +Cc: arve, riandrews, devel, linux-kernel

On Fri, Sep 09, 2016 at 08:17:44AM -0700, Todd Kjos wrote:
> From: Todd Kjos <tkjos@android.com>
> 
> In Android systems, the display pipeline relies on low
> latency binder transactions and is therefore sensitive to
> delays caused by contention for the global binder lock.
> Jank is significantly reduced by disabling preemption
> while the global binder lock is held.

What is the technical definition of "Jank"?  :)

> 
> This patch was originated by Riley Andrews <riandrews@android.com>
> with tweaks and forward-porting by me.
> 
> Originally-from: Riley Andrews <riandrews@android.com>
> Signed-off-by: Todd Kjos <tkjos@android.com>
> ---
>  drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
>  1 file changed, 146 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 16288e7..c36e420 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
>  	struct files_struct *files = proc->files;
>  	unsigned long rlim_cur;
>  	unsigned long irqs;
> +	int ret;
>  
>  	if (files == NULL)
>  		return -ESRCH;
> @@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
>  	rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
>  	unlock_task_sighand(proc->tsk, &irqs);
>  
> -	return __alloc_fd(files, 0, rlim_cur, flags);
> +	preempt_enable_no_resched();
> +	ret = __alloc_fd(files, 0, rlim_cur, flags);
> +	preempt_disable();
> +
> +	return ret;
>  }
>  
>  /*
> @@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
>  static void task_fd_install(
>  	struct binder_proc *proc, unsigned int fd, struct file *file)
>  {
> -	if (proc->files)
> +	if (proc->files) {
> +		preempt_enable_no_resched();
>  		__fd_install(proc->files, fd, file);
> +		preempt_disable();
> +	}
>  }
>  
>  /*
> @@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
>  {
>  	trace_binder_lock(tag);
>  	mutex_lock(&binder_main_lock);
> +	preempt_disable();
>  	trace_binder_locked(tag);
>  }
>  
> @@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
>  {
>  	trace_binder_unlock(tag);
>  	mutex_unlock(&binder_main_lock);
> +	preempt_enable();
> +}
> +
> +static inline void *kzalloc_nopreempt(size_t size)
> +{
> +	void *ptr;
> +
> +	ptr = kzalloc(size, GFP_NOWAIT);
> +	if (ptr)
> +		return ptr;
> +
> +	preempt_enable_no_resched();
> +	ptr = kzalloc(size, GFP_KERNEL);
> +	preempt_disable();

Doesn't the allocator retry if the first one fails anyway?  Why not
GFP_NOIO or GFP_ATOMIC?  Have you really hit the second GFP_KERNEL
usage?

> +
> +	return ptr;
> +}
> +
> +static inline long copy_to_user_nopreempt(void __user *to,
> +					  const void *from, long n)
> +{
> +	long ret;
> +
> +	preempt_enable_no_resched();
> +	ret = copy_to_user(to, from, n);
> +	preempt_disable();
> +	return ret;
> +}
> +
> +static inline long copy_from_user_nopreempt(void *to,
> +					    const void __user *from,
> +					    long n)
> +{
> +	long ret;
> +
> +	preempt_enable_no_resched();
> +	ret = copy_from_user(to, from, n);
> +	preempt_disable();
> +	return ret;
>  }
>  
> +#define get_user_nopreempt(x, ptr)	\
> +({						\
> +	int __ret;				\
> +	preempt_enable_no_resched();		\
> +	__ret = get_user(x, ptr);		\
> +	preempt_disable();			\
> +	__ret;					\
> +})
> +
> +#define put_user_nopreempt(x, ptr)	\
> +({						\
> +	int __ret;				\
> +	preempt_enable_no_resched();		\
> +	__ret = put_user(x, ptr);		\
> +	preempt_disable();			\
> +	__ret;					\
> +})

Any reason some of these are #defines and some are static inline
functions?

Anyway, these all seem a bit strange to me, what type of latency spikes
are you seeing that these changes resolve?  Shouldn't that be an issue
with the scheduler more than just the binder driver?

I don't know of any other driver or IPC that does this type of thing
with the scheduler in order to make things "go faster", so it feels
wrong to me, and is probably why we don't have global functions like
put_user_nopreempt() :)

And is enabling and disabling preemption around single byte copies
to/from userspace really a good idea?  That seems like a lot of overhead
you are now adding to your "fastpath" that you need to go even faster.

And finally, I'm guessing this has passed the binder test suite that is
out there for testing binder changes?  If so, can you please mention it
in the changelog text?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH] android: binder: Disable preemption while holding the global binder lock
@ 2016-09-09 15:17 Todd Kjos
  2016-09-09 15:44 ` Greg KH
  2016-09-09 16:37 ` kbuild test robot
  0 siblings, 2 replies; 27+ messages in thread
From: Todd Kjos @ 2016-09-09 15:17 UTC (permalink / raw)
  To: gregkh, arve; +Cc: riandrews, devel, linux-kernel, Todd Kjos

From: Todd Kjos <tkjos@android.com>

In Android systems, the display pipeline relies on low
latency binder transactions and is therefore sensitive to
delays caused by contention for the global binder lock.
Jank is significantly reduced by disabling preemption
while the global binder lock is held.

This patch was originated by Riley Andrews <riandrews@android.com>
with tweaks and forward-porting by me.

Originally-from: Riley Andrews <riandrews@android.com>
Signed-off-by: Todd Kjos <tkjos@android.com>
---
 drivers/android/binder.c | 194 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 146 insertions(+), 48 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 16288e7..c36e420 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -379,6 +379,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 	struct files_struct *files = proc->files;
 	unsigned long rlim_cur;
 	unsigned long irqs;
+	int ret;
 
 	if (files == NULL)
 		return -ESRCH;
@@ -389,7 +390,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 	rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
 	unlock_task_sighand(proc->tsk, &irqs);
 
-	return __alloc_fd(files, 0, rlim_cur, flags);
+	preempt_enable_no_resched();
+	ret = __alloc_fd(files, 0, rlim_cur, flags);
+	preempt_disable();
+
+	return ret;
 }
 
 /*
@@ -398,8 +403,11 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 static void task_fd_install(
 	struct binder_proc *proc, unsigned int fd, struct file *file)
 {
-	if (proc->files)
+	if (proc->files) {
+		preempt_enable_no_resched();
 		__fd_install(proc->files, fd, file);
+		preempt_disable();
+	}
 }
 
 /*
@@ -427,6 +435,7 @@ static inline void binder_lock(const char *tag)
 {
 	trace_binder_lock(tag);
 	mutex_lock(&binder_main_lock);
+	preempt_disable();
 	trace_binder_locked(tag);
 }
 
@@ -434,8 +443,65 @@ static inline void binder_unlock(const char *tag)
 {
 	trace_binder_unlock(tag);
 	mutex_unlock(&binder_main_lock);
+	preempt_enable();
+}
+
+static inline void *kzalloc_nopreempt(size_t size)
+{
+	void *ptr;
+
+	ptr = kzalloc(size, GFP_NOWAIT);
+	if (ptr)
+		return ptr;
+
+	preempt_enable_no_resched();
+	ptr = kzalloc(size, GFP_KERNEL);
+	preempt_disable();
+
+	return ptr;
+}
+
+static inline long copy_to_user_nopreempt(void __user *to,
+					  const void *from, long n)
+{
+	long ret;
+
+	preempt_enable_no_resched();
+	ret = copy_to_user(to, from, n);
+	preempt_disable();
+	return ret;
+}
+
+static inline long copy_from_user_nopreempt(void *to,
+					    const void __user *from,
+					    long n)
+{
+	long ret;
+
+	preempt_enable_no_resched();
+	ret = copy_from_user(to, from, n);
+	preempt_disable();
+	return ret;
 }
 
+#define get_user_nopreempt(x, ptr)	\
+({						\
+	int __ret;				\
+	preempt_enable_no_resched();		\
+	__ret = get_user(x, ptr);		\
+	preempt_disable();			\
+	__ret;					\
+})
+
+#define put_user_nopreempt(x, ptr)	\
+({						\
+	int __ret;				\
+	preempt_enable_no_resched();		\
+	__ret = put_user(x, ptr);		\
+	preempt_disable();			\
+	__ret;					\
+})
+
 static void binder_set_nice(long nice)
 {
 	long min_nice;
@@ -568,6 +634,8 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 	else
 		mm = get_task_mm(proc->tsk);
 
+	preempt_enable_no_resched();
+
 	if (mm) {
 		down_write(&mm->mmap_sem);
 		vma = proc->vma;
@@ -622,6 +690,9 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 		up_write(&mm->mmap_sem);
 		mmput(mm);
 	}
+
+	preempt_disable();
+
 	return 0;
 
 free_range:
@@ -644,6 +715,9 @@ err_no_vma:
 		up_write(&mm->mmap_sem);
 		mmput(mm);
 	}
+
+	preempt_disable();
+
 	return -ENOMEM;
 }
 
@@ -903,7 +977,7 @@ static struct binder_node *binder_new_node(struct binder_proc *proc,
 			return NULL;
 	}
 
-	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	node = kzalloc_nopreempt(sizeof(*node));
 	if (node == NULL)
 		return NULL;
 	binder_stats_created(BINDER_STAT_NODE);
@@ -1040,7 +1114,7 @@ static struct binder_ref *binder_get_ref_for_node(struct binder_proc *proc,
 		else
 			return ref;
 	}
-	new_ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+	new_ref = kzalloc_nopreempt(sizeof(*ref));
 	if (new_ref == NULL)
 		return NULL;
 	binder_stats_created(BINDER_STAT_REF);
@@ -1438,14 +1512,14 @@ static void binder_transaction(struct binder_proc *proc,
 	e->to_proc = target_proc->pid;
 
 	/* TODO: reuse incoming transaction for reply */
-	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	t = kzalloc_nopreempt(sizeof(*t));
 	if (t == NULL) {
 		return_error = BR_FAILED_REPLY;
 		goto err_alloc_t_failed;
 	}
 	binder_stats_created(BINDER_STAT_TRANSACTION);
 
-	tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL);
+	tcomplete = kzalloc_nopreempt(sizeof(*tcomplete));
 	if (tcomplete == NULL) {
 		return_error = BR_FAILED_REPLY;
 		goto err_alloc_tcomplete_failed;
@@ -1502,15 +1576,16 @@ static void binder_transaction(struct binder_proc *proc,
 	offp = (binder_size_t *)(t->buffer->data +
 				 ALIGN(tr->data_size, sizeof(void *)));
 
-	if (copy_from_user(t->buffer->data, (const void __user *)(uintptr_t)
-			   tr->data.ptr.buffer, tr->data_size)) {
+	if (copy_from_user_nopreempt(t->buffer->data,
+			(const void __user *)(uintptr_t) tr->data.ptr.buffer,
+			tr->data_size)) {
 		binder_user_error("%d:%d got transaction with invalid data ptr\n",
 				proc->pid, thread->pid);
 		return_error = BR_FAILED_REPLY;
 		goto err_copy_data_failed;
 	}
-	if (copy_from_user(offp, (const void __user *)(uintptr_t)
-			   tr->data.ptr.offsets, tr->offsets_size)) {
+	if (copy_from_user_nopreempt(offp, (const void __user *)(uintptr_t)
+				     tr->data.ptr.offsets, tr->offsets_size)) {
 		binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
 				proc->pid, thread->pid);
 		return_error = BR_FAILED_REPLY;
@@ -1770,7 +1845,7 @@ static int binder_thread_write(struct binder_proc *proc,
 	void __user *end = buffer + size;
 
 	while (ptr < end && thread->return_error == BR_OK) {
-		if (get_user(cmd, (uint32_t __user *)ptr))
+		if (get_user_nopreempt(cmd, (uint32_t __user *)ptr))
 			return -EFAULT;
 		ptr += sizeof(uint32_t);
 		trace_binder_command(cmd);
@@ -1788,7 +1863,7 @@ static int binder_thread_write(struct binder_proc *proc,
 			struct binder_ref *ref;
 			const char *debug_string;
 
-			if (get_user(target, (uint32_t __user *)ptr))
+			if (get_user_nopreempt(target, (uint32_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(uint32_t);
 			if (target == 0 && binder_context_mgr_node &&
@@ -1838,10 +1913,12 @@ static int binder_thread_write(struct binder_proc *proc,
 			binder_uintptr_t cookie;
 			struct binder_node *node;
 
-			if (get_user(node_ptr, (binder_uintptr_t __user *)ptr))
+			if (get_user_nopreempt(node_ptr,
+					(binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(binder_uintptr_t);
-			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+			if (get_user_nopreempt(cookie,
+					(binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(binder_uintptr_t);
 			node = binder_get_node(proc, node_ptr);
@@ -1899,7 +1976,8 @@ static int binder_thread_write(struct binder_proc *proc,
 			binder_uintptr_t data_ptr;
 			struct binder_buffer *buffer;
 
-			if (get_user(data_ptr, (binder_uintptr_t __user *)ptr))
+			if (get_user_nopreempt(data_ptr,
+					(binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(binder_uintptr_t);
 
@@ -1941,7 +2019,7 @@ static int binder_thread_write(struct binder_proc *proc,
 		case BC_REPLY: {
 			struct binder_transaction_data tr;
 
-			if (copy_from_user(&tr, ptr, sizeof(tr)))
+			if (copy_from_user_nopreempt(&tr, ptr, sizeof(tr)))
 				return -EFAULT;
 			ptr += sizeof(tr);
 			binder_transaction(proc, thread, &tr, cmd == BC_REPLY);
@@ -1991,10 +2069,12 @@ static int binder_thread_write(struct binder_proc *proc,
 			struct binder_ref *ref;
 			struct binder_ref_death *death;
 
-			if (get_user(target, (uint32_t __user *)ptr))
+			if (get_user_nopreempt(target,
+					(uint32_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(uint32_t);
-			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+			if (get_user_nopreempt(cookie,
+					(binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(binder_uintptr_t);
 			ref = binder_get_ref(proc, target);
@@ -2023,7 +2103,7 @@ static int binder_thread_write(struct binder_proc *proc,
 						proc->pid, thread->pid);
 					break;
 				}
-				death = kzalloc(sizeof(*death), GFP_KERNEL);
+				death = kzalloc_nopreempt(sizeof(*death));
 				if (death == NULL) {
 					thread->return_error = BR_ERROR;
 					binder_debug(BINDER_DEBUG_FAILED_TRANSACTION,
@@ -2077,8 +2157,8 @@ static int binder_thread_write(struct binder_proc *proc,
 			struct binder_work *w;
 			binder_uintptr_t cookie;
 			struct binder_ref_death *death = NULL;
-
-			if (get_user(cookie, (binder_uintptr_t __user *)ptr))
+			if (get_user_nopreempt(cookie,
+					(binder_uintptr_t __user *)ptr))
 				return -EFAULT;
 
 			ptr += sizeof(cookie);
@@ -2159,7 +2239,7 @@ static int binder_thread_read(struct binder_proc *proc,
 	int wait_for_proc_work;
 
 	if (*consumed == 0) {
-		if (put_user(BR_NOOP, (uint32_t __user *)ptr))
+		if (put_user_nopreempt(BR_NOOP, (uint32_t __user *)ptr))
 			return -EFAULT;
 		ptr += sizeof(uint32_t);
 	}
@@ -2170,7 +2250,8 @@ retry:
 
 	if (thread->return_error != BR_OK && ptr < end) {
 		if (thread->return_error2 != BR_OK) {
-			if (put_user(thread->return_error2, (uint32_t __user *)ptr))
+			if (put_user_nopreempt(thread->return_error2,
+					       (uint32_t __user *)ptr))
 				return -EFAULT;
 			ptr += sizeof(uint32_t);
 			binder_stat_br(proc, thread, thread->return_error2);
@@ -2178,7 +2259,8 @@ retry:
 				goto done;
 			thread->return_error2 = BR_OK;
 		}
-		if (put_user(thread->return_error, (uint32_t __user *)ptr))
+		if (put_user_nopreempt(thread->return_error,
+				       (uint32_t __user *)ptr))
 			return -EFAULT;
 		ptr += sizeof(uint32_t);
 		binder_stat_br(proc, thread, thread->return_error);
@@ -2256,7 +2338,7 @@ retry:
 		} break;
 		case BINDER_WORK_TRANSACTION_COMPLETE: {
 			cmd = BR_TRANSACTION_COMPLETE;
-			if (put_user(cmd, (uint32_t __user *)ptr))
+			if (put_user_nopreempt(cmd, (uint32_t __user *) ptr))
 				return -EFAULT;
 			ptr += sizeof(uint32_t);
 
@@ -2298,15 +2380,16 @@ retry:
 				node->has_weak_ref = 0;
 			}
 			if (cmd != BR_NOOP) {
-				if (put_user(cmd, (uint32_t __user *)ptr))
+				if (put_user_nopreempt(cmd,
+						(uint32_t __user *) ptr))
 					return -EFAULT;
 				ptr += sizeof(uint32_t);
-				if (put_user(node->ptr,
-					     (binder_uintptr_t __user *)ptr))
+					if (put_user_nopreempt(node->ptr,
+					      (binder_uintptr_t __user *)ptr))
 					return -EFAULT;
 				ptr += sizeof(binder_uintptr_t);
-				if (put_user(node->cookie,
-					     (binder_uintptr_t __user *)ptr))
+					if (put_user_nopreempt(node->cookie,
+					      (binder_uintptr_t __user *)ptr))
 					return -EFAULT;
 				ptr += sizeof(binder_uintptr_t);
 
@@ -2349,11 +2432,12 @@ retry:
 				cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;
 			else
 				cmd = BR_DEAD_BINDER;
-			if (put_user(cmd, (uint32_t __user *)ptr))
+				if (put_user_nopreempt(cmd,
+					       (uint32_t __user *) ptr))
 				return -EFAULT;
 			ptr += sizeof(uint32_t);
-			if (put_user(death->cookie,
-				     (binder_uintptr_t __user *)ptr))
+			if (put_user_nopreempt(death->cookie,
+				       (binder_uintptr_t __user *) ptr))
 				return -EFAULT;
 			ptr += sizeof(binder_uintptr_t);
 			binder_stat_br(proc, thread, cmd);
@@ -2420,10 +2504,10 @@ retry:
 					ALIGN(t->buffer->data_size,
 					    sizeof(void *));
 
-		if (put_user(cmd, (uint32_t __user *)ptr))
+		if (put_user_nopreempt(cmd, (uint32_t __user *) ptr))
 			return -EFAULT;
 		ptr += sizeof(uint32_t);
-		if (copy_to_user(ptr, &tr, sizeof(tr)))
+		if (copy_to_user_nopreempt(ptr, &tr, sizeof(tr)))
 			return -EFAULT;
 		ptr += sizeof(tr);
 
@@ -2465,7 +2549,8 @@ done:
 		binder_debug(BINDER_DEBUG_THREADS,
 			     "%d:%d BR_SPAWN_LOOPER\n",
 			     proc->pid, thread->pid);
-		if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer))
+		if (put_user_nopreempt(BR_SPAWN_LOOPER,
+				       (uint32_t __user *) buffer))
 			return -EFAULT;
 		binder_stat_br(proc, thread, BR_SPAWN_LOOPER);
 	}
@@ -2540,7 +2625,7 @@ static struct binder_thread *binder_get_thread(struct binder_proc *proc)
 			break;
 	}
 	if (*p == NULL) {
-		thread = kzalloc(sizeof(*thread), GFP_KERNEL);
+		thread = kzalloc_nopreempt(sizeof(*thread));
 		if (thread == NULL)
 			return NULL;
 		binder_stats_created(BINDER_STAT_THREAD);
@@ -2644,7 +2729,7 @@ static int binder_ioctl_write_read(struct file *filp,
 		ret = -EINVAL;
 		goto out;
 	}
-	if (copy_from_user(&bwr, ubuf, sizeof(bwr))) {
+	if (copy_from_user_nopreempt(&bwr, ubuf, sizeof(bwr))) {
 		ret = -EFAULT;
 		goto out;
 	}
@@ -2662,7 +2747,7 @@ static int binder_ioctl_write_read(struct file *filp,
 		trace_binder_write_done(ret);
 		if (ret < 0) {
 			bwr.read_consumed = 0;
-			if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
+			if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr)))
 				ret = -EFAULT;
 			goto out;
 		}
@@ -2676,7 +2761,7 @@ static int binder_ioctl_write_read(struct file *filp,
 		if (!list_empty(&proc->todo))
 			wake_up_interruptible(&proc->wait);
 		if (ret < 0) {
-			if (copy_to_user(ubuf, &bwr, sizeof(bwr)))
+			if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr)))
 				ret = -EFAULT;
 			goto out;
 		}
@@ -2686,7 +2771,7 @@ static int binder_ioctl_write_read(struct file *filp,
 		     proc->pid, thread->pid,
 		     (u64)bwr.write_consumed, (u64)bwr.write_size,
 		     (u64)bwr.read_consumed, (u64)bwr.read_size);
-	if (copy_to_user(ubuf, &bwr, sizeof(bwr))) {
+	if (copy_to_user_nopreempt(ubuf, &bwr, sizeof(bwr))) {
 		ret = -EFAULT;
 		goto out;
 	}
@@ -2768,7 +2853,8 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			goto err;
 		break;
 	case BINDER_SET_MAX_THREADS:
-		if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) {
+		if (copy_from_user_nopreempt(&proc->max_threads, ubuf,
+					     sizeof(proc->max_threads))) {
 			ret = -EINVAL;
 			goto err;
 		}
@@ -2791,9 +2877,9 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			ret = -EINVAL;
 			goto err;
 		}
-		if (put_user(BINDER_CURRENT_PROTOCOL_VERSION,
-			     &ver->protocol_version)) {
-			ret = -EINVAL;
+			if (put_user_nopreempt(BINDER_CURRENT_PROTOCOL_VERSION,
+					       &ver->protocol_version)) {
+				ret = -EINVAL;
 			goto err;
 		}
 		break;
@@ -2854,6 +2940,7 @@ static const struct vm_operations_struct binder_vm_ops = {
 static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	int ret;
+
 	struct vm_struct *area;
 	struct binder_proc *proc = filp->private_data;
 	const char *failure_string;
@@ -2914,7 +3001,12 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 	vma->vm_ops = &binder_vm_ops;
 	vma->vm_private_data = proc;
 
-	if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma)) {
+	/* binder_update_page_range assumes preemption is disabled */
+	preempt_disable();
+	ret = binder_update_page_range(proc, 1, proc->buffer,
+				       proc->buffer + PAGE_SIZE, vma);
+	preempt_enable_no_resched();
+	if (ret) {
 		ret = -ENOMEM;
 		failure_string = "alloc small buf";
 		goto err_alloc_small_buf_failed;
@@ -3185,8 +3277,12 @@ static void binder_deferred_func(struct work_struct *work)
 	int defer;
 
 	do {
-		binder_lock(__func__);
+		trace_binder_lock(__func__);
+		mutex_lock(&binder_main_lock);
+		trace_binder_locked(__func__);
+
 		mutex_lock(&binder_deferred_lock);
+		preempt_disable();
 		if (!hlist_empty(&binder_deferred_list)) {
 			proc = hlist_entry(binder_deferred_list.first,
 					struct binder_proc, deferred_work_node);
@@ -3212,7 +3308,9 @@ static void binder_deferred_func(struct work_struct *work)
 		if (defer & BINDER_DEFERRED_RELEASE)
 			binder_deferred_release(proc); /* frees proc */
 
-		binder_unlock(__func__);
+		trace_binder_unlock(__func__);
+		mutex_unlock(&binder_main_lock);
+		preempt_enable_no_resched();
 		if (files)
 			put_files_struct(files);
 	} while (proc);
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2016-09-17  0:42 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-08 16:12 [PATCH] android: binder: Disable preemption while holding the global binder lock Todd Kjos
2016-09-08 16:15 ` Todd Kjos
2016-09-08 17:46 ` Greg Kroah-Hartman
2016-09-10 16:16 ` Christoph Hellwig
2016-09-10 16:22   ` Peter Zijlstra
2016-09-10 16:37     ` Thomas Gleixner
2016-09-10 17:28       ` Greg Kroah-Hartman
2016-09-12 15:49         ` Todd Kjos
2016-09-13  3:44         ` Arve Hjønnevåg
2016-09-13  6:42           ` Greg Kroah-Hartman
2016-09-13 19:52             ` Arve Hjønnevåg
2016-09-13  7:32           ` Peter Zijlstra
2016-09-13 19:53             ` Arve Hjønnevåg
2016-09-14  7:10               ` Peter Zijlstra
2016-09-14  7:41                 ` Peter Zijlstra
2016-09-14 13:38                 ` Peter Zijlstra
2016-09-14 16:11               ` Peter Zijlstra
2016-09-14 16:13                 ` Peter Zijlstra
2016-09-14 16:55                   ` Peter Zijlstra
2016-09-17  0:42                     ` Todd Kjos
2016-09-09 15:17 Todd Kjos
2016-09-09 15:44 ` Greg KH
2016-09-09 17:39   ` Todd Kjos
2016-09-10 11:18     ` Greg KH
2016-09-10 11:25       ` Greg KH
2016-09-10 11:25         ` Greg KH
2016-09-09 16:37 ` kbuild test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.