All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] binder: tell userspace to dump current backtrace when detecting oneway spamming
@ 2021-03-31  7:34 Hang Lu
  2021-03-31  7:44 ` Greg KH
  0 siblings, 1 reply; 12+ messages in thread
From: Hang Lu @ 2021-03-31  7:34 UTC (permalink / raw)
  To: gregkh, tkjos, maco
  Cc: arve, joel, christian, hridya, surenb, rdunlap, linux-kernel, Hang Lu

When async binder buffer got exhausted, some normal oneway transaction
will also be discarded and finally caused system/app stop. By that time,
the binder debug information we dump may not relevant to the root cause.
And this issue is difficult to debug if without the backtrace of thread
sending spam.

This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when detecting
oneway spamming, request to dump current backtrace. The detection will
happened only once when exceeding the threshold (target process dips
below 80% of its oneway space, and current process is responsible for
either more than 50 transactions, or more than 50% of the oneway space).
And the detection will restart when the async buffer has returned to a
healthy state.

Signed-off-by: Hang Lu <hangl@codeaurora.org>
---
 drivers/android/binder.c            | 25 ++++++++++++++++++++++---
 drivers/android/binder_alloc.c      | 15 ++++++++++++---
 drivers/android/binder_alloc.h      |  8 +++++++-
 drivers/android/binder_internal.h   |  1 +
 include/uapi/linux/android/binder.h |  8 ++++++++
 5 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c119736..28ceaf9 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -87,6 +87,7 @@ static DEFINE_SPINLOCK(binder_dead_nodes_lock);
 static struct dentry *binder_debugfs_dir_entry_root;
 static struct dentry *binder_debugfs_dir_entry_proc;
 static atomic_t binder_last_id;
+static bool oneway_spam_detection_enabled;
 
 static int proc_show(struct seq_file *m, void *unused);
 DEFINE_SHOW_ATTRIBUTE(proc);
@@ -3007,7 +3008,10 @@ static void binder_transaction(struct binder_proc *proc,
 			goto err_bad_object_type;
 		}
 	}
-	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
+	if (t->buffer->oneway_spam_suspect)
+		tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
+	else
+		tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
 	t->work.type = BINDER_WORK_TRANSACTION;
 
 	if (reply) {
@@ -3875,9 +3879,14 @@ static int binder_thread_read(struct binder_proc *proc,
 
 			binder_stat_br(proc, thread, cmd);
 		} break;
-		case BINDER_WORK_TRANSACTION_COMPLETE: {
+		case BINDER_WORK_TRANSACTION_COMPLETE:
+		case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: {
 			binder_inner_proc_unlock(proc);
-			cmd = BR_TRANSACTION_COMPLETE;
+			if (oneway_spam_detection_enabled &&
+				   w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT)
+				cmd = BR_ONEWAY_SPAM_SUSPECT;
+			else
+				cmd = BR_TRANSACTION_COMPLETE;
 			kfree(w);
 			binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 			if (put_user(cmd, (uint32_t __user *)ptr))
@@ -4727,6 +4736,16 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 		break;
 	}
+	case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: {
+		uint32_t enable;
+
+		if (copy_from_user(&enable, ubuf, sizeof(enable))) {
+			ret = -EINVAL;
+			goto err;
+		}
+		oneway_spam_detection_enabled = (bool)enable;
+		break;
+	}
 	default:
 		ret = -EINVAL;
 		goto err;
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 7caf74a..a09872b 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma(
 	return vma;
 }
 
-static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
+static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 {
 	/*
 	 * Find the amount and size of buffers allocated by the current caller;
@@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 
 	/*
 	 * Warn if this pid has more than 50 transactions, or more than 50% of
-	 * async space (which is 25% of total buffer size).
+	 * async space (which is 25% of total buffer size). Oneway spam only
+	 * detect once when exceed the threshold.
 	 */
 	if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
 		binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
 			     "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
 			      alloc->pid, pid, num_buffers, total_alloc_size);
+		if (!alloc->oneway_spam_detected) {
+			alloc->oneway_spam_detected = true;
+			return true;
+		}
 	}
+	return false;
 }
 
 static struct binder_buffer *binder_alloc_new_buf_locked(
@@ -525,6 +531,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 	buffer->async_transaction = is_async;
 	buffer->extra_buffers_size = extra_buffers_size;
 	buffer->pid = pid;
+	buffer->oneway_spam_suspect = false;
 	if (is_async) {
 		alloc->free_async_space -= size + sizeof(struct binder_buffer);
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
@@ -536,7 +543,9 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 			 * of async space left (which is less than 10% of total
 			 * buffer size).
 			 */
-			debug_low_async_space_locked(alloc, pid);
+			buffer->oneway_spam_suspect = debug_low_async_space_locked(alloc, pid);
+		} else {
+			alloc->oneway_spam_detected = false;
 		}
 	}
 	return buffer;
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 6e8e001..7dea57a 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -26,6 +26,8 @@ struct binder_transaction;
  * @clear_on_free:      %true if buffer must be zeroed after use
  * @allow_user_free:    %true if user is allowed to free buffer
  * @async_transaction:  %true if buffer is in use for an async txn
+ * @oneway_spam_suspect: %true if total async allocate size just exceed
+ * spamming detect threshold
  * @debug_id:           unique ID for debugging
  * @transaction:        pointer to associated struct binder_transaction
  * @target_node:        struct binder_node associated with this buffer
@@ -45,7 +47,8 @@ struct binder_buffer {
 	unsigned clear_on_free:1;
 	unsigned allow_user_free:1;
 	unsigned async_transaction:1;
-	unsigned debug_id:28;
+	unsigned oneway_spam_suspect:1;
+	unsigned debug_id:27;
 
 	struct binder_transaction *transaction;
 
@@ -87,6 +90,8 @@ struct binder_lru_page {
  * @buffer_size:        size of address space specified via mmap
  * @pid:                pid for associated binder_proc (invariant after init)
  * @pages_high:         high watermark of offset in @pages
+ * @oneway_spam_detected: %true if oneway spam detection fired, clear that
+ * flag once the async buffer has returned to a healthy state
  *
  * Bookkeeping structure for per-proc address space management for binder
  * buffers. It is normally initialized during binder_init() and binder_mmap()
@@ -107,6 +112,7 @@ struct binder_alloc {
 	uint32_t buffer_free;
 	int pid;
 	size_t pages_high;
+	bool oneway_spam_detected;
 };
 
 #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 6cd7901..e380545 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -174,6 +174,7 @@ struct binder_work {
 	enum binder_work_type {
 		BINDER_WORK_TRANSACTION = 1,
 		BINDER_WORK_TRANSACTION_COMPLETE,
+		BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT,
 		BINDER_WORK_RETURN_ERROR,
 		BINDER_WORK_NODE,
 		BINDER_WORK_DEAD_BINDER,
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index ec84ad1..d0da772 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -227,6 +227,7 @@ struct binder_node_info_for_ref {
 #define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
 #define BINDER_GET_NODE_INFO_FOR_REF	_IOWR('b', 12, struct binder_node_info_for_ref)
 #define BINDER_SET_CONTEXT_MGR_EXT	_IOW('b', 13, struct flat_binder_object)
+#define BINDER_ENABLE_ONEWAY_SPAM_DETECTION	_IOW('b', 15, __u32)
 
 /*
  * NOTE: Two special error codes you should check for when calling
@@ -408,6 +409,13 @@ enum binder_driver_return_protocol {
 	 * The last transaction (either a bcTRANSACTION or
 	 * a bcATTEMPT_ACQUIRE) failed (e.g. out of memory).  No parameters.
 	 */
+
+	BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19),
+	/*
+	 * Current process sent too many oneway calls to target, and the last
+	 * asynchronous transaction makes the allocated async buffer size exceed
+	 * detection threshold.  No parameters.
+	 */
 };
 
 enum binder_driver_command_protocol {
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] binder: tell userspace to dump current backtrace when detecting oneway spamming
  2021-03-31  7:34 [PATCH] binder: tell userspace to dump current backtrace when detecting oneway spamming Hang Lu
@ 2021-03-31  7:44 ` Greg KH
  2021-04-01  8:05   ` Hang Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Greg KH @ 2021-03-31  7:44 UTC (permalink / raw)
  To: Hang Lu
  Cc: tkjos, maco, arve, joel, christian, hridya, surenb, rdunlap,
	linux-kernel

On Wed, Mar 31, 2021 at 03:34:16PM +0800, Hang Lu wrote:
> When async binder buffer got exhausted, some normal oneway transaction
> will also be discarded and finally caused system/app stop. By that time,
> the binder debug information we dump may not relevant to the root cause.
> And this issue is difficult to debug if without the backtrace of thread
> sending spam.
> 
> This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when detecting
> oneway spamming, request to dump current backtrace. The detection will
> happened only once when exceeding the threshold (target process dips
> below 80% of its oneway space, and current process is responsible for
> either more than 50 transactions, or more than 50% of the oneway space).
> And the detection will restart when the async buffer has returned to a
> healthy state.
> 
> Signed-off-by: Hang Lu <hangl@codeaurora.org>
> ---
>  drivers/android/binder.c            | 25 ++++++++++++++++++++++---
>  drivers/android/binder_alloc.c      | 15 ++++++++++++---
>  drivers/android/binder_alloc.h      |  8 +++++++-
>  drivers/android/binder_internal.h   |  1 +
>  include/uapi/linux/android/binder.h |  8 ++++++++
>  5 files changed, 50 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index c119736..28ceaf9 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -87,6 +87,7 @@ static DEFINE_SPINLOCK(binder_dead_nodes_lock);
>  static struct dentry *binder_debugfs_dir_entry_root;
>  static struct dentry *binder_debugfs_dir_entry_proc;
>  static atomic_t binder_last_id;
> +static bool oneway_spam_detection_enabled;

Why is this a "whole system" value and not a "per binder instance"
value?  You just allowed one binder instance to affect another one,
which does not seem like a good idea to me :(

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] binder: tell userspace to dump current backtrace when detecting oneway spamming
  2021-03-31  7:44 ` Greg KH
@ 2021-04-01  8:05   ` Hang Lu
  2021-04-01  8:28     ` [PATCH v2] " Hang Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Hang Lu @ 2021-04-01  8:05 UTC (permalink / raw)
  To: Greg KH
  Cc: tkjos, maco, arve, joel, christian, hridya, surenb, rdunlap,
	linux-kernel

On 3/31/2021 3:44 PM, Greg KH wrote:
> On Wed, Mar 31, 2021 at 03:34:16PM +0800, Hang Lu wrote:
>> When async binder buffer got exhausted, some normal oneway transaction
>> will also be discarded and finally caused system/app stop. By that time,
>> the binder debug information we dump may not relevant to the root cause.
>> And this issue is difficult to debug if without the backtrace of thread
>> sending spam.
>>
>> This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when detecting
>> oneway spamming, request to dump current backtrace. The detection will
>> happened only once when exceeding the threshold (target process dips
>> below 80% of its oneway space, and current process is responsible for
>> either more than 50 transactions, or more than 50% of the oneway space).
>> And the detection will restart when the async buffer has returned to a
>> healthy state.
>>
>> Signed-off-by: Hang Lu <hangl@codeaurora.org>
>> ---
>>  drivers/android/binder.c            | 25 ++++++++++++++++++++++---
>>  drivers/android/binder_alloc.c      | 15 ++++++++++++---
>>  drivers/android/binder_alloc.h      |  8 +++++++-
>>  drivers/android/binder_internal.h   |  1 +
>>  include/uapi/linux/android/binder.h |  8 ++++++++
>>  5 files changed, 50 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
>> index c119736..28ceaf9 100644
>> --- a/drivers/android/binder.c
>> +++ b/drivers/android/binder.c
>> @@ -87,6 +87,7 @@ static DEFINE_SPINLOCK(binder_dead_nodes_lock);
>>  static struct dentry *binder_debugfs_dir_entry_root;
>>  static struct dentry *binder_debugfs_dir_entry_proc;
>>  static atomic_t binder_last_id;
>> +static bool oneway_spam_detection_enabled;
> 
> Why is this a "whole system" value and not a "per binder instance"
> value?  You just allowed one binder instance to affect another one,
> which does not seem like a good idea to me :(

Sorry for the late reply. Actually I add this as it needs to be enabled to protect against user-space that doesn't understand the new command, so I make it a global setting. But making this flag per-proc will gain a finer control granularity, so I'll follow your suggestion and submit a v2 patch, thanks!

Regards,
Hang Lu


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] binder: tell userspace to dump current backtrace when detecting oneway spamming
  2021-04-01  8:05   ` Hang Lu
@ 2021-04-01  8:28     ` Hang Lu
  2021-04-06 20:26       ` Todd Kjos
  0 siblings, 1 reply; 12+ messages in thread
From: Hang Lu @ 2021-04-01  8:28 UTC (permalink / raw)
  To: gregkh
  Cc: tkjos, maco, arve, joel, christian, hridya, surenb, rdunlap,
	linux-kernel, Hang Lu

When async binder buffer got exhausted, some normal oneway transaction
will also be discarded and finally caused system/app stop. By that time,
the binder debug information we dump may not relevant to the root cause.
And this issue is difficult to debug if without the backtrace of thread
sending spam.

This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when detecting
oneway spamming, request to dump current backtrace. The detection will
happened only once when exceeding the threshold (target process dips
below 80% of its oneway space, and current process is responsible for
either more than 50 transactions, or more than 50% of the oneway space).
And the detection will restart when the async buffer has returned to a
healthy state.

Signed-off-by: Hang Lu <hangl@codeaurora.org>
---
v2: make the detection on/off switch to be per-proc

 drivers/android/binder.c            | 26 +++++++++++++++++++++++---
 drivers/android/binder_alloc.c      | 15 ++++++++++++---
 drivers/android/binder_alloc.h      |  8 +++++++-
 drivers/android/binder_internal.h   |  4 ++++
 include/uapi/linux/android/binder.h |  8 ++++++++
 5 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c119736..93964d1 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3007,7 +3007,10 @@ static void binder_transaction(struct binder_proc *proc,
 			goto err_bad_object_type;
 		}
 	}
-	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
+	if (t->buffer->oneway_spam_suspect)
+		tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
+	else
+		tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
 	t->work.type = BINDER_WORK_TRANSACTION;
 
 	if (reply) {
@@ -3875,9 +3878,14 @@ static int binder_thread_read(struct binder_proc *proc,
 
 			binder_stat_br(proc, thread, cmd);
 		} break;
-		case BINDER_WORK_TRANSACTION_COMPLETE: {
+		case BINDER_WORK_TRANSACTION_COMPLETE:
+		case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: {
+			if (proc->oneway_spam_detection_enabled &&
+				   w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT)
+				cmd = BR_ONEWAY_SPAM_SUSPECT;
+			else
+				cmd = BR_TRANSACTION_COMPLETE;
 			binder_inner_proc_unlock(proc);
-			cmd = BR_TRANSACTION_COMPLETE;
 			kfree(w);
 			binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 			if (put_user(cmd, (uint32_t __user *)ptr))
@@ -4727,6 +4735,18 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 		break;
 	}
+	case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: {
+		uint32_t enable;
+
+		if (copy_from_user(&enable, ubuf, sizeof(enable))) {
+			ret = -EINVAL;
+			goto err;
+		}
+		binder_inner_proc_lock(proc);
+		proc->oneway_spam_detection_enabled = (bool)enable;
+		binder_inner_proc_unlock(proc);
+		break;
+	}
 	default:
 		ret = -EINVAL;
 		goto err;
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 7caf74a..a09872b 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma(
 	return vma;
 }
 
-static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
+static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 {
 	/*
 	 * Find the amount and size of buffers allocated by the current caller;
@@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 
 	/*
 	 * Warn if this pid has more than 50 transactions, or more than 50% of
-	 * async space (which is 25% of total buffer size).
+	 * async space (which is 25% of total buffer size). Oneway spam only
+	 * detect once when exceed the threshold.
 	 */
 	if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
 		binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
 			     "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
 			      alloc->pid, pid, num_buffers, total_alloc_size);
+		if (!alloc->oneway_spam_detected) {
+			alloc->oneway_spam_detected = true;
+			return true;
+		}
 	}
+	return false;
 }
 
 static struct binder_buffer *binder_alloc_new_buf_locked(
@@ -525,6 +531,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 	buffer->async_transaction = is_async;
 	buffer->extra_buffers_size = extra_buffers_size;
 	buffer->pid = pid;
+	buffer->oneway_spam_suspect = false;
 	if (is_async) {
 		alloc->free_async_space -= size + sizeof(struct binder_buffer);
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
@@ -536,7 +543,9 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 			 * of async space left (which is less than 10% of total
 			 * buffer size).
 			 */
-			debug_low_async_space_locked(alloc, pid);
+			buffer->oneway_spam_suspect = debug_low_async_space_locked(alloc, pid);
+		} else {
+			alloc->oneway_spam_detected = false;
 		}
 	}
 	return buffer;
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 6e8e001..7dea57a 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -26,6 +26,8 @@ struct binder_transaction;
  * @clear_on_free:      %true if buffer must be zeroed after use
  * @allow_user_free:    %true if user is allowed to free buffer
  * @async_transaction:  %true if buffer is in use for an async txn
+ * @oneway_spam_suspect: %true if total async allocate size just exceed
+ * spamming detect threshold
  * @debug_id:           unique ID for debugging
  * @transaction:        pointer to associated struct binder_transaction
  * @target_node:        struct binder_node associated with this buffer
@@ -45,7 +47,8 @@ struct binder_buffer {
 	unsigned clear_on_free:1;
 	unsigned allow_user_free:1;
 	unsigned async_transaction:1;
-	unsigned debug_id:28;
+	unsigned oneway_spam_suspect:1;
+	unsigned debug_id:27;
 
 	struct binder_transaction *transaction;
 
@@ -87,6 +90,8 @@ struct binder_lru_page {
  * @buffer_size:        size of address space specified via mmap
  * @pid:                pid for associated binder_proc (invariant after init)
  * @pages_high:         high watermark of offset in @pages
+ * @oneway_spam_detected: %true if oneway spam detection fired, clear that
+ * flag once the async buffer has returned to a healthy state
  *
  * Bookkeeping structure for per-proc address space management for binder
  * buffers. It is normally initialized during binder_init() and binder_mmap()
@@ -107,6 +112,7 @@ struct binder_alloc {
 	uint32_t buffer_free;
 	int pid;
 	size_t pages_high;
+	bool oneway_spam_detected;
 };
 
 #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 6cd7901..eb87669 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -174,6 +174,7 @@ struct binder_work {
 	enum binder_work_type {
 		BINDER_WORK_TRANSACTION = 1,
 		BINDER_WORK_TRANSACTION_COMPLETE,
+		BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT,
 		BINDER_WORK_RETURN_ERROR,
 		BINDER_WORK_NODE,
 		BINDER_WORK_DEAD_BINDER,
@@ -396,6 +397,8 @@ struct binder_ref {
  * @outer_lock:           no nesting under innor or node lock
  *                        Lock order: 1) outer, 2) node, 3) inner
  * @binderfs_entry:       process-specific binderfs log file
+ * @oneway_spam_detection_enabled: process enabled oneway spam detection
+ *                        or not
  *
  * Bookkeeping structure for binder processes
  */
@@ -426,6 +429,7 @@ struct binder_proc {
 	spinlock_t inner_lock;
 	spinlock_t outer_lock;
 	struct dentry *binderfs_entry;
+	bool oneway_spam_detection_enabled;
 };
 
 /**
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index ec84ad1..d0da772 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -227,6 +227,7 @@ struct binder_node_info_for_ref {
 #define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
 #define BINDER_GET_NODE_INFO_FOR_REF	_IOWR('b', 12, struct binder_node_info_for_ref)
 #define BINDER_SET_CONTEXT_MGR_EXT	_IOW('b', 13, struct flat_binder_object)
+#define BINDER_ENABLE_ONEWAY_SPAM_DETECTION	_IOW('b', 15, __u32)
 
 /*
  * NOTE: Two special error codes you should check for when calling
@@ -408,6 +409,13 @@ enum binder_driver_return_protocol {
 	 * The last transaction (either a bcTRANSACTION or
 	 * a bcATTEMPT_ACQUIRE) failed (e.g. out of memory).  No parameters.
 	 */
+
+	BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19),
+	/*
+	 * Current process sent too many oneway calls to target, and the last
+	 * asynchronous transaction makes the allocated async buffer size exceed
+	 * detection threshold.  No parameters.
+	 */
 };
 
 enum binder_driver_command_protocol {
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] binder: tell userspace to dump current backtrace when detecting oneway spamming
  2021-04-01  8:28     ` [PATCH v2] " Hang Lu
@ 2021-04-06 20:26       ` Todd Kjos
  2021-04-07  3:14         ` [PATCH v3] " Hang Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Todd Kjos @ 2021-04-06 20:26 UTC (permalink / raw)
  To: Hang Lu
  Cc: Greg Kroah-Hartman, Todd Kjos, Martijn Coenen,
	Arve Hjønnevåg, Joel Fernandes (Google),
	Christian Brauner, Hridya Valsaraju, Suren Baghdasaryan, rdunlap,
	LKML

On Thu, Apr 1, 2021 at 1:29 AM Hang Lu <hangl@codeaurora.org> wrote:
>
> When async binder buffer got exhausted, some normal oneway transaction
> will also be discarded and finally caused system/app stop.

"...be discarded and may cause system or application failures" ?

> By that time,
> the binder debug information we dump may not relevant to the root cause.

"may not be relevant"

> And this issue is difficult to debug if without the backtrace of thread
> sending spam.

"...backtrace of the thread..."

>
> This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when detecting
> oneway spamming, request to dump current backtrace.

"to userspace when oneway spamming is detected

> The detection will
> happened only once when exceeding the threshold (target process dips

"Oneway spamming will be reported only once..."

> below 80% of its oneway space, and current process is responsible for
> either more than 50 transactions, or more than 50% of the oneway space).
> And the detection will restart when the async buffer has returned to a
> healthy state.
>
> Signed-off-by: Hang Lu <hangl@codeaurora.org>
> ---
> v2: make the detection on/off switch to be per-proc
>
>  drivers/android/binder.c            | 26 +++++++++++++++++++++++---
>  drivers/android/binder_alloc.c      | 15 ++++++++++++---
>  drivers/android/binder_alloc.h      |  8 +++++++-
>  drivers/android/binder_internal.h   |  4 ++++
>  include/uapi/linux/android/binder.h |  8 ++++++++
>  5 files changed, 54 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index c119736..93964d1 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -3007,7 +3007,10 @@ static void binder_transaction(struct binder_proc *proc,
>                         goto err_bad_object_type;
>                 }
>         }
> -       tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
> +       if (t->buffer->oneway_spam_suspect)
> +               tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
> +       else
> +               tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
>         t->work.type = BINDER_WORK_TRANSACTION;
>
>         if (reply) {
> @@ -3875,9 +3878,14 @@ static int binder_thread_read(struct binder_proc *proc,
>
>                         binder_stat_br(proc, thread, cmd);
>                 } break;
> -               case BINDER_WORK_TRANSACTION_COMPLETE: {
> +               case BINDER_WORK_TRANSACTION_COMPLETE:
> +               case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: {
> +                       if (proc->oneway_spam_detection_enabled &&
> +                                  w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT)
> +                               cmd = BR_ONEWAY_SPAM_SUSPECT;

Doesn't "BR_ONEWAY_SPAM_SUSPECT" need to be added to binder_return_strings[]?

> +                       else
> +                               cmd = BR_TRANSACTION_COMPLETE;
>                         binder_inner_proc_unlock(proc);
> -                       cmd = BR_TRANSACTION_COMPLETE;
>                         kfree(w);
>                         binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
>                         if (put_user(cmd, (uint32_t __user *)ptr))
> @@ -4727,6 +4735,18 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>                 }
>                 break;
>         }
> +       case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: {
> +               uint32_t enable;
> +
> +               if (copy_from_user(&enable, ubuf, sizeof(enable))) {
> +                       ret = -EINVAL;
> +                       goto err;
> +               }
> +               binder_inner_proc_lock(proc);
> +               proc->oneway_spam_detection_enabled = (bool)enable;
> +               binder_inner_proc_unlock(proc);
> +               break;
> +       }
>         default:
>                 ret = -EINVAL;
>                 goto err;
> diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
> index 7caf74a..a09872b 100644
> --- a/drivers/android/binder_alloc.c
> +++ b/drivers/android/binder_alloc.c
> @@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma(
>         return vma;
>  }
>
> -static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
> +static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
>  {
>         /*
>          * Find the amount and size of buffers allocated by the current caller;
> @@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
>
>         /*
>          * Warn if this pid has more than 50 transactions, or more than 50% of
> -        * async space (which is 25% of total buffer size).
> +        * async space (which is 25% of total buffer size). Oneway spam only
> +        * detect once when exceed the threshold.

"Oneway spam is only detected when the threshold is exceeded"

>          */
>         if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
>                 binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
>                              "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
>                               alloc->pid, pid, num_buffers, total_alloc_size);
> +               if (!alloc->oneway_spam_detected) {
> +                       alloc->oneway_spam_detected = true;
> +                       return true;
> +               }
>         }
> +       return false;
>  }
>
>  static struct binder_buffer *binder_alloc_new_buf_locked(
> @@ -525,6 +531,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
>         buffer->async_transaction = is_async;
>         buffer->extra_buffers_size = extra_buffers_size;
>         buffer->pid = pid;
> +       buffer->oneway_spam_suspect = false;
>         if (is_async) {
>                 alloc->free_async_space -= size + sizeof(struct binder_buffer);
>                 binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
> @@ -536,7 +543,9 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
>                          * of async space left (which is less than 10% of total
>                          * buffer size).
>                          */
> -                       debug_low_async_space_locked(alloc, pid);
> +                       buffer->oneway_spam_suspect = debug_low_async_space_locked(alloc, pid);
> +               } else {
> +                       alloc->oneway_spam_detected = false;
>                 }
>         }
>         return buffer;
> diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
> index 6e8e001..7dea57a 100644
> --- a/drivers/android/binder_alloc.h
> +++ b/drivers/android/binder_alloc.h
> @@ -26,6 +26,8 @@ struct binder_transaction;
>   * @clear_on_free:      %true if buffer must be zeroed after use
>   * @allow_user_free:    %true if user is allowed to free buffer
>   * @async_transaction:  %true if buffer is in use for an async txn
> + * @oneway_spam_suspect: %true if total async allocate size just exceed
> + * spamming detect threshold
>   * @debug_id:           unique ID for debugging
>   * @transaction:        pointer to associated struct binder_transaction
>   * @target_node:        struct binder_node associated with this buffer
> @@ -45,7 +47,8 @@ struct binder_buffer {
>         unsigned clear_on_free:1;
>         unsigned allow_user_free:1;
>         unsigned async_transaction:1;
> -       unsigned debug_id:28;
> +       unsigned oneway_spam_suspect:1;
> +       unsigned debug_id:27;
>
>         struct binder_transaction *transaction;
>
> @@ -87,6 +90,8 @@ struct binder_lru_page {
>   * @buffer_size:        size of address space specified via mmap
>   * @pid:                pid for associated binder_proc (invariant after init)
>   * @pages_high:         high watermark of offset in @pages
> + * @oneway_spam_detected: %true if oneway spam detection fired, clear that
> + * flag once the async buffer has returned to a healthy state
>   *
>   * Bookkeeping structure for per-proc address space management for binder
>   * buffers. It is normally initialized during binder_init() and binder_mmap()
> @@ -107,6 +112,7 @@ struct binder_alloc {
>         uint32_t buffer_free;
>         int pid;
>         size_t pages_high;
> +       bool oneway_spam_detected;
>  };
>
>  #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
> diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
> index 6cd7901..eb87669 100644
> --- a/drivers/android/binder_internal.h
> +++ b/drivers/android/binder_internal.h
> @@ -174,6 +174,7 @@ struct binder_work {
>         enum binder_work_type {
>                 BINDER_WORK_TRANSACTION = 1,
>                 BINDER_WORK_TRANSACTION_COMPLETE,
> +               BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT,
>                 BINDER_WORK_RETURN_ERROR,
>                 BINDER_WORK_NODE,
>                 BINDER_WORK_DEAD_BINDER,
> @@ -396,6 +397,8 @@ struct binder_ref {
>   * @outer_lock:           no nesting under innor or node lock
>   *                        Lock order: 1) outer, 2) node, 3) inner
>   * @binderfs_entry:       process-specific binderfs log file
> + * @oneway_spam_detection_enabled: process enabled oneway spam detection
> + *                        or not
>   *
>   * Bookkeeping structure for binder processes
>   */
> @@ -426,6 +429,7 @@ struct binder_proc {
>         spinlock_t inner_lock;
>         spinlock_t outer_lock;
>         struct dentry *binderfs_entry;
> +       bool oneway_spam_detection_enabled;
>  };
>
>  /**
> diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
> index ec84ad1..d0da772 100644
> --- a/include/uapi/linux/android/binder.h
> +++ b/include/uapi/linux/android/binder.h
> @@ -227,6 +227,7 @@ struct binder_node_info_for_ref {
>  #define BINDER_GET_NODE_DEBUG_INFO     _IOWR('b', 11, struct binder_node_debug_info)
>  #define BINDER_GET_NODE_INFO_FOR_REF   _IOWR('b', 12, struct binder_node_info_for_ref)
>  #define BINDER_SET_CONTEXT_MGR_EXT     _IOW('b', 13, struct flat_binder_object)
> +#define BINDER_ENABLE_ONEWAY_SPAM_DETECTION    _IOW('b', 15, __u32)
>
>  /*
>   * NOTE: Two special error codes you should check for when calling
> @@ -408,6 +409,13 @@ enum binder_driver_return_protocol {
>          * The last transaction (either a bcTRANSACTION or
>          * a bcATTEMPT_ACQUIRE) failed (e.g. out of memory).  No parameters.
>          */
> +
> +       BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19),
> +       /*
> +        * Current process sent too many oneway calls to target, and the last
> +        * asynchronous transaction makes the allocated async buffer size exceed
> +        * detection threshold.  No parameters.
> +        */
>  };
>
>  enum binder_driver_command_protocol {
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3] binder: tell userspace to dump current backtrace when detecting oneway spamming
  2021-04-06 20:26       ` Todd Kjos
@ 2021-04-07  3:14         ` Hang Lu
  2021-04-07  4:11           ` [PATCH v4] " Hang Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Hang Lu @ 2021-04-07  3:14 UTC (permalink / raw)
  To: tkjos, gregkh
  Cc: tkjos, maco, arve, joel, christian, hridya, surenb, rdunlap,
	linux-kernel, Hang Lu

When async binder buffer got exhausted, some normal oneway transactions
will also be discarded and may cause system or application failures. By
that time, the binder debug information we dump may not be relevant to
the root cause. And this issue is difficult to debug if without the
backtrace of the thread sending spam.

This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway
spamming is detected, request to dump current backtrace. Oneway spamming
will be reported only once when exceeding the threshold (target process
dips below 80% of its oneway space, and current process is responsible for
either more than 50 transactions, or more than 50% of the oneway space).
And the detection will restart when the async buffer has returned to a
healthy state.

Signed-off-by: Hang Lu <hangl@codeaurora.org>
---
v3: add BR_ONEWAY_SPAM_SUSPECT to binder_return_strings

v2: make the detection on/off switch to be per-proc

 drivers/android/binder.c            | 29 +++++++++++++++++++++++++----
 drivers/android/binder_alloc.c      | 15 ++++++++++++---
 drivers/android/binder_alloc.h      |  8 +++++++-
 drivers/android/binder_internal.h   |  4 ++++
 include/uapi/linux/android/binder.h |  8 ++++++++
 5 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c119736..7038924 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3007,7 +3007,10 @@ static void binder_transaction(struct binder_proc *proc,
 			goto err_bad_object_type;
 		}
 	}
-	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
+	if (t->buffer->oneway_spam_suspect)
+		tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
+	else
+		tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
 	t->work.type = BINDER_WORK_TRANSACTION;
 
 	if (reply) {
@@ -3875,9 +3878,14 @@ static int binder_thread_read(struct binder_proc *proc,
 
 			binder_stat_br(proc, thread, cmd);
 		} break;
-		case BINDER_WORK_TRANSACTION_COMPLETE: {
+		case BINDER_WORK_TRANSACTION_COMPLETE:
+		case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: {
+			if (proc->oneway_spam_detection_enabled &&
+				   w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT)
+				cmd = BR_ONEWAY_SPAM_SUSPECT;
+			else
+				cmd = BR_TRANSACTION_COMPLETE;
 			binder_inner_proc_unlock(proc);
-			cmd = BR_TRANSACTION_COMPLETE;
 			kfree(w);
 			binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 			if (put_user(cmd, (uint32_t __user *)ptr))
@@ -4727,6 +4735,18 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 		break;
 	}
+	case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: {
+		uint32_t enable;
+
+		if (copy_from_user(&enable, ubuf, sizeof(enable))) {
+			ret = -EINVAL;
+			goto err;
+		}
+		binder_inner_proc_lock(proc);
+		proc->oneway_spam_detection_enabled = (bool)enable;
+		binder_inner_proc_unlock(proc);
+		break;
+	}
 	default:
 		ret = -EINVAL;
 		goto err;
@@ -5385,7 +5405,8 @@ static const char * const binder_return_strings[] = {
 	"BR_FINISHED",
 	"BR_DEAD_BINDER",
 	"BR_CLEAR_DEATH_NOTIFICATION_DONE",
-	"BR_FAILED_REPLY"
+	"BR_FAILED_REPLY",
+	"BR_ONEWAY_SPAM_SUSPECT"
 };
 
 static const char * const binder_command_strings[] = {
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 7caf74a..340515f 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma(
 	return vma;
 }
 
-static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
+static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 {
 	/*
 	 * Find the amount and size of buffers allocated by the current caller;
@@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 
 	/*
 	 * Warn if this pid has more than 50 transactions, or more than 50% of
-	 * async space (which is 25% of total buffer size).
+	 * async space (which is 25% of total buffer size). Oneway spam is only
+	 * detected when the threshold is exceeded.
 	 */
 	if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
 		binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
 			     "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
 			      alloc->pid, pid, num_buffers, total_alloc_size);
+		if (!alloc->oneway_spam_detected) {
+			alloc->oneway_spam_detected = true;
+			return true;
+		}
 	}
+	return false;
 }
 
 static struct binder_buffer *binder_alloc_new_buf_locked(
@@ -525,6 +531,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 	buffer->async_transaction = is_async;
 	buffer->extra_buffers_size = extra_buffers_size;
 	buffer->pid = pid;
+	buffer->oneway_spam_suspect = false;
 	if (is_async) {
 		alloc->free_async_space -= size + sizeof(struct binder_buffer);
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
@@ -536,7 +543,9 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 			 * of async space left (which is less than 10% of total
 			 * buffer size).
 			 */
-			debug_low_async_space_locked(alloc, pid);
+			buffer->oneway_spam_suspect = debug_low_async_space_locked(alloc, pid);
+		} else {
+			alloc->oneway_spam_detected = false;
 		}
 	}
 	return buffer;
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 6e8e001..7dea57a 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -26,6 +26,8 @@ struct binder_transaction;
  * @clear_on_free:      %true if buffer must be zeroed after use
  * @allow_user_free:    %true if user is allowed to free buffer
  * @async_transaction:  %true if buffer is in use for an async txn
+ * @oneway_spam_suspect: %true if total async allocate size just exceed
+ * spamming detect threshold
  * @debug_id:           unique ID for debugging
  * @transaction:        pointer to associated struct binder_transaction
  * @target_node:        struct binder_node associated with this buffer
@@ -45,7 +47,8 @@ struct binder_buffer {
 	unsigned clear_on_free:1;
 	unsigned allow_user_free:1;
 	unsigned async_transaction:1;
-	unsigned debug_id:28;
+	unsigned oneway_spam_suspect:1;
+	unsigned debug_id:27;
 
 	struct binder_transaction *transaction;
 
@@ -87,6 +90,8 @@ struct binder_lru_page {
  * @buffer_size:        size of address space specified via mmap
  * @pid:                pid for associated binder_proc (invariant after init)
  * @pages_high:         high watermark of offset in @pages
+ * @oneway_spam_detected: %true if oneway spam detection fired, clear that
+ * flag once the async buffer has returned to a healthy state
  *
  * Bookkeeping structure for per-proc address space management for binder
  * buffers. It is normally initialized during binder_init() and binder_mmap()
@@ -107,6 +112,7 @@ struct binder_alloc {
 	uint32_t buffer_free;
 	int pid;
 	size_t pages_high;
+	bool oneway_spam_detected;
 };
 
 #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 6cd7901..eb87669 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -174,6 +174,7 @@ struct binder_work {
 	enum binder_work_type {
 		BINDER_WORK_TRANSACTION = 1,
 		BINDER_WORK_TRANSACTION_COMPLETE,
+		BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT,
 		BINDER_WORK_RETURN_ERROR,
 		BINDER_WORK_NODE,
 		BINDER_WORK_DEAD_BINDER,
@@ -396,6 +397,8 @@ struct binder_ref {
  * @outer_lock:           no nesting under innor or node lock
  *                        Lock order: 1) outer, 2) node, 3) inner
  * @binderfs_entry:       process-specific binderfs log file
+ * @oneway_spam_detection_enabled: process enabled oneway spam detection
+ *                        or not
  *
  * Bookkeeping structure for binder processes
  */
@@ -426,6 +429,7 @@ struct binder_proc {
 	spinlock_t inner_lock;
 	spinlock_t outer_lock;
 	struct dentry *binderfs_entry;
+	bool oneway_spam_detection_enabled;
 };
 
 /**
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index ec84ad1..d0da772 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -227,6 +227,7 @@ struct binder_node_info_for_ref {
 #define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
 #define BINDER_GET_NODE_INFO_FOR_REF	_IOWR('b', 12, struct binder_node_info_for_ref)
 #define BINDER_SET_CONTEXT_MGR_EXT	_IOW('b', 13, struct flat_binder_object)
+#define BINDER_ENABLE_ONEWAY_SPAM_DETECTION	_IOW('b', 15, __u32)
 
 /*
  * NOTE: Two special error codes you should check for when calling
@@ -408,6 +409,13 @@ enum binder_driver_return_protocol {
 	 * The last transaction (either a bcTRANSACTION or
 	 * a bcATTEMPT_ACQUIRE) failed (e.g. out of memory).  No parameters.
 	 */
+
+	BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19),
+	/*
+	 * Current process sent too many oneway calls to target, and the last
+	 * asynchronous transaction makes the allocated async buffer size exceed
+	 * detection threshold.  No parameters.
+	 */
 };
 
 enum binder_driver_command_protocol {
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v4] binder: tell userspace to dump current backtrace when detecting oneway spamming
  2021-04-07  3:14         ` [PATCH v3] " Hang Lu
@ 2021-04-07  4:11           ` Hang Lu
  2021-04-07 19:38             ` Todd Kjos
  0 siblings, 1 reply; 12+ messages in thread
From: Hang Lu @ 2021-04-07  4:11 UTC (permalink / raw)
  To: tkjos, gregkh
  Cc: tkjos, maco, arve, joel, christian, hridya, surenb, rdunlap,
	linux-kernel, Hang Lu

When async binder buffer got exhausted, some normal oneway transactions
will also be discarded and may cause system or application failures. By
that time, the binder debug information we dump may not be relevant to
the root cause. And this issue is difficult to debug if without the
backtrace of the thread sending spam.

This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway
spamming is detected, request to dump current backtrace. Oneway spamming
will be reported only once when exceeding the threshold (target process
dips below 80% of its oneway space, and current process is responsible for
either more than 50 transactions, or more than 50% of the oneway space).
And the detection will restart when the async buffer has returned to a
healthy state.

Signed-off-by: Hang Lu <hangl@codeaurora.org>
---
v4: add placeholder for BR_FROZEN_REPLY in binder_return_strings for not triggering BUG_ON in print_binder_stats

v3: add BR_ONEWAY_SPAM_SUSPECT to binder_return_strings

v2: make the detection on/off switch to be per-proc

 drivers/android/binder.c            | 31 +++++++++++++++++++++++++++----
 drivers/android/binder_alloc.c      | 15 ++++++++++++---
 drivers/android/binder_alloc.h      |  8 +++++++-
 drivers/android/binder_internal.h   |  6 +++++-
 include/uapi/linux/android/binder.h |  8 ++++++++
 5 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c119736..7046af90 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3007,7 +3007,10 @@ static void binder_transaction(struct binder_proc *proc,
 			goto err_bad_object_type;
 		}
 	}
-	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
+	if (t->buffer->oneway_spam_suspect)
+		tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
+	else
+		tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
 	t->work.type = BINDER_WORK_TRANSACTION;
 
 	if (reply) {
@@ -3875,9 +3878,14 @@ static int binder_thread_read(struct binder_proc *proc,
 
 			binder_stat_br(proc, thread, cmd);
 		} break;
-		case BINDER_WORK_TRANSACTION_COMPLETE: {
+		case BINDER_WORK_TRANSACTION_COMPLETE:
+		case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: {
+			if (proc->oneway_spam_detection_enabled &&
+				   w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT)
+				cmd = BR_ONEWAY_SPAM_SUSPECT;
+			else
+				cmd = BR_TRANSACTION_COMPLETE;
 			binder_inner_proc_unlock(proc);
-			cmd = BR_TRANSACTION_COMPLETE;
 			kfree(w);
 			binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 			if (put_user(cmd, (uint32_t __user *)ptr))
@@ -4727,6 +4735,18 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 		break;
 	}
+	case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: {
+		uint32_t enable;
+
+		if (copy_from_user(&enable, ubuf, sizeof(enable))) {
+			ret = -EINVAL;
+			goto err;
+		}
+		binder_inner_proc_lock(proc);
+		proc->oneway_spam_detection_enabled = (bool)enable;
+		binder_inner_proc_unlock(proc);
+		break;
+	}
 	default:
 		ret = -EINVAL;
 		goto err;
@@ -5385,7 +5405,10 @@ static const char * const binder_return_strings[] = {
 	"BR_FINISHED",
 	"BR_DEAD_BINDER",
 	"BR_CLEAR_DEATH_NOTIFICATION_DONE",
-	"BR_FAILED_REPLY"
+	"BR_FAILED_REPLY",
+	/* set placeholder for BR_FROZEN_REPLY */
+	"PLACEHOLDER",
+	"BR_ONEWAY_SPAM_SUSPECT"
 };
 
 static const char * const binder_command_strings[] = {
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 7caf74a..340515f 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma(
 	return vma;
 }
 
-static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
+static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 {
 	/*
 	 * Find the amount and size of buffers allocated by the current caller;
@@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 
 	/*
 	 * Warn if this pid has more than 50 transactions, or more than 50% of
-	 * async space (which is 25% of total buffer size).
+	 * async space (which is 25% of total buffer size). Oneway spam is only
+	 * detected when the threshold is exceeded.
 	 */
 	if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
 		binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
 			     "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
 			      alloc->pid, pid, num_buffers, total_alloc_size);
+		if (!alloc->oneway_spam_detected) {
+			alloc->oneway_spam_detected = true;
+			return true;
+		}
 	}
+	return false;
 }
 
 static struct binder_buffer *binder_alloc_new_buf_locked(
@@ -525,6 +531,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 	buffer->async_transaction = is_async;
 	buffer->extra_buffers_size = extra_buffers_size;
 	buffer->pid = pid;
+	buffer->oneway_spam_suspect = false;
 	if (is_async) {
 		alloc->free_async_space -= size + sizeof(struct binder_buffer);
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
@@ -536,7 +543,9 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 			 * of async space left (which is less than 10% of total
 			 * buffer size).
 			 */
-			debug_low_async_space_locked(alloc, pid);
+			buffer->oneway_spam_suspect = debug_low_async_space_locked(alloc, pid);
+		} else {
+			alloc->oneway_spam_detected = false;
 		}
 	}
 	return buffer;
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 6e8e001..7dea57a 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -26,6 +26,8 @@ struct binder_transaction;
  * @clear_on_free:      %true if buffer must be zeroed after use
  * @allow_user_free:    %true if user is allowed to free buffer
  * @async_transaction:  %true if buffer is in use for an async txn
+ * @oneway_spam_suspect: %true if total async allocate size just exceed
+ * spamming detect threshold
  * @debug_id:           unique ID for debugging
  * @transaction:        pointer to associated struct binder_transaction
  * @target_node:        struct binder_node associated with this buffer
@@ -45,7 +47,8 @@ struct binder_buffer {
 	unsigned clear_on_free:1;
 	unsigned allow_user_free:1;
 	unsigned async_transaction:1;
-	unsigned debug_id:28;
+	unsigned oneway_spam_suspect:1;
+	unsigned debug_id:27;
 
 	struct binder_transaction *transaction;
 
@@ -87,6 +90,8 @@ struct binder_lru_page {
  * @buffer_size:        size of address space specified via mmap
  * @pid:                pid for associated binder_proc (invariant after init)
  * @pages_high:         high watermark of offset in @pages
+ * @oneway_spam_detected: %true if oneway spam detection fired, clear that
+ * flag once the async buffer has returned to a healthy state
  *
  * Bookkeeping structure for per-proc address space management for binder
  * buffers. It is normally initialized during binder_init() and binder_mmap()
@@ -107,6 +112,7 @@ struct binder_alloc {
 	uint32_t buffer_free;
 	int pid;
 	size_t pages_high;
+	bool oneway_spam_detected;
 };
 
 #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 6cd7901..94a9133 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -155,7 +155,7 @@ enum binder_stat_types {
 };
 
 struct binder_stats {
-	atomic_t br[_IOC_NR(BR_FAILED_REPLY) + 1];
+	atomic_t br[_IOC_NR(BR_ONEWAY_SPAM_SUSPECT) + 1];
 	atomic_t bc[_IOC_NR(BC_REPLY_SG) + 1];
 	atomic_t obj_created[BINDER_STAT_COUNT];
 	atomic_t obj_deleted[BINDER_STAT_COUNT];
@@ -174,6 +174,7 @@ struct binder_work {
 	enum binder_work_type {
 		BINDER_WORK_TRANSACTION = 1,
 		BINDER_WORK_TRANSACTION_COMPLETE,
+		BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT,
 		BINDER_WORK_RETURN_ERROR,
 		BINDER_WORK_NODE,
 		BINDER_WORK_DEAD_BINDER,
@@ -396,6 +397,8 @@ struct binder_ref {
  * @outer_lock:           no nesting under innor or node lock
  *                        Lock order: 1) outer, 2) node, 3) inner
  * @binderfs_entry:       process-specific binderfs log file
+ * @oneway_spam_detection_enabled: process enabled oneway spam detection
+ *                        or not
  *
  * Bookkeeping structure for binder processes
  */
@@ -426,6 +429,7 @@ struct binder_proc {
 	spinlock_t inner_lock;
 	spinlock_t outer_lock;
 	struct dentry *binderfs_entry;
+	bool oneway_spam_detection_enabled;
 };
 
 /**
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index ec84ad1..d0da772 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -227,6 +227,7 @@ struct binder_node_info_for_ref {
 #define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
 #define BINDER_GET_NODE_INFO_FOR_REF	_IOWR('b', 12, struct binder_node_info_for_ref)
 #define BINDER_SET_CONTEXT_MGR_EXT	_IOW('b', 13, struct flat_binder_object)
+#define BINDER_ENABLE_ONEWAY_SPAM_DETECTION	_IOW('b', 15, __u32)
 
 /*
  * NOTE: Two special error codes you should check for when calling
@@ -408,6 +409,13 @@ enum binder_driver_return_protocol {
 	 * The last transaction (either a bcTRANSACTION or
 	 * a bcATTEMPT_ACQUIRE) failed (e.g. out of memory).  No parameters.
 	 */
+
+	BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19),
+	/*
+	 * Current process sent too many oneway calls to target, and the last
+	 * asynchronous transaction makes the allocated async buffer size exceed
+	 * detection threshold.  No parameters.
+	 */
 };
 
 enum binder_driver_command_protocol {
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] binder: tell userspace to dump current backtrace when detecting oneway spamming
  2021-04-07  4:11           ` [PATCH v4] " Hang Lu
@ 2021-04-07 19:38             ` Todd Kjos
  2021-04-09  3:40               ` [PATCH v4] binder: tell userspace to dump current backtrace when detected " Hang Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Todd Kjos @ 2021-04-07 19:38 UTC (permalink / raw)
  To: Hang Lu
  Cc: Greg Kroah-Hartman, Todd Kjos, Martijn Coenen,
	Arve Hjønnevåg, Joel Fernandes (Google),
	Christian Brauner, Hridya Valsaraju, Suren Baghdasaryan, rdunlap,
	LKML

On Tue, Apr 6, 2021 at 9:15 PM Hang Lu <hangl@codeaurora.org> wrote:
>
> When async binder buffer got exhausted, some normal oneway transactions
> will also be discarded and may cause system or application failures. By
> that time, the binder debug information we dump may not be relevant to
> the root cause. And this issue is difficult to debug if without the
> backtrace of the thread sending spam.
>
> This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway
> spamming is detected, request to dump current backtrace. Oneway spamming
> will be reported only once when exceeding the threshold (target process
> dips below 80% of its oneway space, and current process is responsible for
> either more than 50 transactions, or more than 50% of the oneway space).
> And the detection will restart when the async buffer has returned to a
> healthy state.
>
> Signed-off-by: Hang Lu <hangl@codeaurora.org>
> ---
> v4: add placeholder for BR_FROZEN_REPLY in binder_return_strings for not triggering BUG_ON in print_binder_stats

Instead of a placeholder, please rebase this series onto Greg's
char-misc-next branch in
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git and
add a new patch that fixes the missing "BR_FROZEN_REPLY".

>
> v3: add BR_ONEWAY_SPAM_SUSPECT to binder_return_strings
>
> v2: make the detection on/off switch to be per-proc
>
>  drivers/android/binder.c            | 31 +++++++++++++++++++++++++++----
>  drivers/android/binder_alloc.c      | 15 ++++++++++++---
>  drivers/android/binder_alloc.h      |  8 +++++++-
>  drivers/android/binder_internal.h   |  6 +++++-
>  include/uapi/linux/android/binder.h |  8 ++++++++
>  5 files changed, 59 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index c119736..7046af90 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -3007,7 +3007,10 @@ static void binder_transaction(struct binder_proc *proc,
>                         goto err_bad_object_type;
>                 }
>         }
> -       tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
> +       if (t->buffer->oneway_spam_suspect)
> +               tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
> +       else
> +               tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
>         t->work.type = BINDER_WORK_TRANSACTION;
>
>         if (reply) {
> @@ -3875,9 +3878,14 @@ static int binder_thread_read(struct binder_proc *proc,
>
>                         binder_stat_br(proc, thread, cmd);
>                 } break;
> -               case BINDER_WORK_TRANSACTION_COMPLETE: {
> +               case BINDER_WORK_TRANSACTION_COMPLETE:
> +               case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: {
> +                       if (proc->oneway_spam_detection_enabled &&
> +                                  w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT)
> +                               cmd = BR_ONEWAY_SPAM_SUSPECT;
> +                       else
> +                               cmd = BR_TRANSACTION_COMPLETE;
>                         binder_inner_proc_unlock(proc);
> -                       cmd = BR_TRANSACTION_COMPLETE;
>                         kfree(w);
>                         binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
>                         if (put_user(cmd, (uint32_t __user *)ptr))
> @@ -4727,6 +4735,18 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>                 }
>                 break;
>         }
> +       case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: {
> +               uint32_t enable;
> +
> +               if (copy_from_user(&enable, ubuf, sizeof(enable))) {
> +                       ret = -EINVAL;
> +                       goto err;
> +               }
> +               binder_inner_proc_lock(proc);
> +               proc->oneway_spam_detection_enabled = (bool)enable;
> +               binder_inner_proc_unlock(proc);
> +               break;
> +       }
>         default:
>                 ret = -EINVAL;
>                 goto err;
> @@ -5385,7 +5405,10 @@ static const char * const binder_return_strings[] = {
>         "BR_FINISHED",
>         "BR_DEAD_BINDER",
>         "BR_CLEAR_DEATH_NOTIFICATION_DONE",
> -       "BR_FAILED_REPLY"
> +       "BR_FAILED_REPLY",
> +       /* set placeholder for BR_FROZEN_REPLY */
> +       "PLACEHOLDER",

This should be in a new patch that fixes the issue for the previous patch.

> +       "BR_ONEWAY_SPAM_SUSPECT"
>  };
>
>  static const char * const binder_command_strings[] = {
> diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
> index 7caf74a..340515f 100644
> --- a/drivers/android/binder_alloc.c
> +++ b/drivers/android/binder_alloc.c
> @@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma(
>         return vma;
>  }
>
> -static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
> +static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
>  {
>         /*
>          * Find the amount and size of buffers allocated by the current caller;
> @@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
>
>         /*
>          * Warn if this pid has more than 50 transactions, or more than 50% of
> -        * async space (which is 25% of total buffer size).
> +        * async space (which is 25% of total buffer size). Oneway spam is only
> +        * detected when the threshold is exceeded.
>          */
>         if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
>                 binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
>                              "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
>                               alloc->pid, pid, num_buffers, total_alloc_size);
> +               if (!alloc->oneway_spam_detected) {
> +                       alloc->oneway_spam_detected = true;
> +                       return true;
> +               }
>         }
> +       return false;
>  }
>
>  static struct binder_buffer *binder_alloc_new_buf_locked(
> @@ -525,6 +531,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
>         buffer->async_transaction = is_async;
>         buffer->extra_buffers_size = extra_buffers_size;
>         buffer->pid = pid;
> +       buffer->oneway_spam_suspect = false;
>         if (is_async) {
>                 alloc->free_async_space -= size + sizeof(struct binder_buffer);
>                 binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
> @@ -536,7 +543,9 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
>                          * of async space left (which is less than 10% of total
>                          * buffer size).
>                          */
> -                       debug_low_async_space_locked(alloc, pid);
> +                       buffer->oneway_spam_suspect = debug_low_async_space_locked(alloc, pid);
> +               } else {
> +                       alloc->oneway_spam_detected = false;
>                 }
>         }
>         return buffer;
> diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
> index 6e8e001..7dea57a 100644
> --- a/drivers/android/binder_alloc.h
> +++ b/drivers/android/binder_alloc.h
> @@ -26,6 +26,8 @@ struct binder_transaction;
>   * @clear_on_free:      %true if buffer must be zeroed after use
>   * @allow_user_free:    %true if user is allowed to free buffer
>   * @async_transaction:  %true if buffer is in use for an async txn
> + * @oneway_spam_suspect: %true if total async allocate size just exceed
> + * spamming detect threshold
>   * @debug_id:           unique ID for debugging
>   * @transaction:        pointer to associated struct binder_transaction
>   * @target_node:        struct binder_node associated with this buffer
> @@ -45,7 +47,8 @@ struct binder_buffer {
>         unsigned clear_on_free:1;
>         unsigned allow_user_free:1;
>         unsigned async_transaction:1;
> -       unsigned debug_id:28;
> +       unsigned oneway_spam_suspect:1;
> +       unsigned debug_id:27;
>
>         struct binder_transaction *transaction;
>
> @@ -87,6 +90,8 @@ struct binder_lru_page {
>   * @buffer_size:        size of address space specified via mmap
>   * @pid:                pid for associated binder_proc (invariant after init)
>   * @pages_high:         high watermark of offset in @pages
> + * @oneway_spam_detected: %true if oneway spam detection fired, clear that
> + * flag once the async buffer has returned to a healthy state
>   *
>   * Bookkeeping structure for per-proc address space management for binder
>   * buffers. It is normally initialized during binder_init() and binder_mmap()
> @@ -107,6 +112,7 @@ struct binder_alloc {
>         uint32_t buffer_free;
>         int pid;
>         size_t pages_high;
> +       bool oneway_spam_detected;
>  };
>
>  #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
> diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
> index 6cd7901..94a9133 100644
> --- a/drivers/android/binder_internal.h
> +++ b/drivers/android/binder_internal.h
> @@ -155,7 +155,7 @@ enum binder_stat_types {
>  };
>
>  struct binder_stats {
> -       atomic_t br[_IOC_NR(BR_FAILED_REPLY) + 1];
> +       atomic_t br[_IOC_NR(BR_ONEWAY_SPAM_SUSPECT) + 1];
>         atomic_t bc[_IOC_NR(BC_REPLY_SG) + 1];
>         atomic_t obj_created[BINDER_STAT_COUNT];
>         atomic_t obj_deleted[BINDER_STAT_COUNT];
> @@ -174,6 +174,7 @@ struct binder_work {
>         enum binder_work_type {
>                 BINDER_WORK_TRANSACTION = 1,
>                 BINDER_WORK_TRANSACTION_COMPLETE,
> +               BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT,
>                 BINDER_WORK_RETURN_ERROR,
>                 BINDER_WORK_NODE,
>                 BINDER_WORK_DEAD_BINDER,
> @@ -396,6 +397,8 @@ struct binder_ref {
>   * @outer_lock:           no nesting under innor or node lock
>   *                        Lock order: 1) outer, 2) node, 3) inner
>   * @binderfs_entry:       process-specific binderfs log file
> + * @oneway_spam_detection_enabled: process enabled oneway spam detection
> + *                        or not
>   *
>   * Bookkeeping structure for binder processes
>   */
> @@ -426,6 +429,7 @@ struct binder_proc {
>         spinlock_t inner_lock;
>         spinlock_t outer_lock;
>         struct dentry *binderfs_entry;
> +       bool oneway_spam_detection_enabled;
>  };
>
>  /**
> diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
> index ec84ad1..d0da772 100644
> --- a/include/uapi/linux/android/binder.h
> +++ b/include/uapi/linux/android/binder.h
> @@ -227,6 +227,7 @@ struct binder_node_info_for_ref {
>  #define BINDER_GET_NODE_DEBUG_INFO     _IOWR('b', 11, struct binder_node_debug_info)
>  #define BINDER_GET_NODE_INFO_FOR_REF   _IOWR('b', 12, struct binder_node_info_for_ref)
>  #define BINDER_SET_CONTEXT_MGR_EXT     _IOW('b', 13, struct flat_binder_object)
> +#define BINDER_ENABLE_ONEWAY_SPAM_DETECTION    _IOW('b', 15, __u32)
>
>  /*
>   * NOTE: Two special error codes you should check for when calling
> @@ -408,6 +409,13 @@ enum binder_driver_return_protocol {
>          * The last transaction (either a bcTRANSACTION or
>          * a bcATTEMPT_ACQUIRE) failed (e.g. out of memory).  No parameters.
>          */
> +
> +       BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19),
> +       /*
> +        * Current process sent too many oneway calls to target, and the last
> +        * asynchronous transaction makes the allocated async buffer size exceed
> +        * detection threshold.  No parameters.
> +        */
>  };
>
>  enum binder_driver_command_protocol {
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4] binder: tell userspace to dump current backtrace when detected oneway spamming
  2021-04-07 19:38             ` Todd Kjos
@ 2021-04-09  3:40               ` Hang Lu
  2021-04-09  6:08                 ` Greg KH
  0 siblings, 1 reply; 12+ messages in thread
From: Hang Lu @ 2021-04-09  3:40 UTC (permalink / raw)
  To: tkjos, gregkh
  Cc: tkjos, maco, arve, joel, christian, hridya, surenb, rdunlap,
	linux-kernel, Hang Lu

When async binder buffer got exhausted, some normal oneway transactions
will also be discarded and may cause system or application failures. By
that time, the binder debug information we dump may not be relevant to
the root cause. And this issue is difficult to debug if without the
backtrace of the thread sending spam.

This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway
spamming is detected, request to dump current backtrace. Oneway spamming
will be reported only once when exceeding the threshold (target process
dips below 80% of its oneway space, and current process is responsible for
either more than 50 transactions, or more than 50% of the oneway space).
And the detection will restart when the async buffer has returned to a
healthy state.

Signed-off-by: Hang Lu <hangl@codeaurora.org>
---
v4: add missing BR_FROZEN_REPLY in binder_return_strings and change the size of binder_stats.br array

v3: add BR_ONEWAY_SPAM_SUSPECT to binder_return_strings

v2: make the detection on/off switch to be per-proc

 drivers/android/binder.c            | 30 ++++++++++++++++++++++++++----
 drivers/android/binder_alloc.c      | 15 ++++++++++++---
 drivers/android/binder_alloc.h      |  8 +++++++-
 drivers/android/binder_internal.h   |  6 +++++-
 include/uapi/linux/android/binder.h |  8 ++++++++
 5 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index e1a484a..63d2c43 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3020,7 +3020,10 @@ static void binder_transaction(struct binder_proc *proc,
 			goto err_bad_object_type;
 		}
 	}
-	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
+	if (t->buffer->oneway_spam_suspect)
+		tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
+	else
+		tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
 	t->work.type = BINDER_WORK_TRANSACTION;
 
 	if (reply) {
@@ -3893,9 +3896,14 @@ static int binder_thread_read(struct binder_proc *proc,
 
 			binder_stat_br(proc, thread, cmd);
 		} break;
-		case BINDER_WORK_TRANSACTION_COMPLETE: {
+		case BINDER_WORK_TRANSACTION_COMPLETE:
+		case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: {
+			if (proc->oneway_spam_detection_enabled &&
+				   w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT)
+				cmd = BR_ONEWAY_SPAM_SUSPECT;
+			else
+				cmd = BR_TRANSACTION_COMPLETE;
 			binder_inner_proc_unlock(proc);
-			cmd = BR_TRANSACTION_COMPLETE;
 			kfree(w);
 			binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 			if (put_user(cmd, (uint32_t __user *)ptr))
@@ -4897,6 +4905,18 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 		break;
 	}
+	case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: {
+		uint32_t enable;
+
+		if (copy_from_user(&enable, ubuf, sizeof(enable))) {
+			ret = -EINVAL;
+			goto err;
+		}
+		binder_inner_proc_lock(proc);
+		proc->oneway_spam_detection_enabled = (bool)enable;
+		binder_inner_proc_unlock(proc);
+		break;
+	}
 	default:
 		ret = -EINVAL;
 		goto err;
@@ -5559,7 +5579,9 @@ static const char * const binder_return_strings[] = {
 	"BR_FINISHED",
 	"BR_DEAD_BINDER",
 	"BR_CLEAR_DEATH_NOTIFICATION_DONE",
-	"BR_FAILED_REPLY"
+	"BR_FAILED_REPLY",
+	"BR_FROZEN_REPLY",
+	"BR_ONEWAY_SPAM_SUSPECT",
 };
 
 static const char * const binder_command_strings[] = {
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 7caf74a..340515f 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -338,7 +338,7 @@ static inline struct vm_area_struct *binder_alloc_get_vma(
 	return vma;
 }
 
-static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
+static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 {
 	/*
 	 * Find the amount and size of buffers allocated by the current caller;
@@ -366,13 +366,19 @@ static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
 
 	/*
 	 * Warn if this pid has more than 50 transactions, or more than 50% of
-	 * async space (which is 25% of total buffer size).
+	 * async space (which is 25% of total buffer size). Oneway spam is only
+	 * detected when the threshold is exceeded.
 	 */
 	if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
 		binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
 			     "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
 			      alloc->pid, pid, num_buffers, total_alloc_size);
+		if (!alloc->oneway_spam_detected) {
+			alloc->oneway_spam_detected = true;
+			return true;
+		}
 	}
+	return false;
 }
 
 static struct binder_buffer *binder_alloc_new_buf_locked(
@@ -525,6 +531,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 	buffer->async_transaction = is_async;
 	buffer->extra_buffers_size = extra_buffers_size;
 	buffer->pid = pid;
+	buffer->oneway_spam_suspect = false;
 	if (is_async) {
 		alloc->free_async_space -= size + sizeof(struct binder_buffer);
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
@@ -536,7 +543,9 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 			 * of async space left (which is less than 10% of total
 			 * buffer size).
 			 */
-			debug_low_async_space_locked(alloc, pid);
+			buffer->oneway_spam_suspect = debug_low_async_space_locked(alloc, pid);
+		} else {
+			alloc->oneway_spam_detected = false;
 		}
 	}
 	return buffer;
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 6e8e001..7dea57a 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -26,6 +26,8 @@ struct binder_transaction;
  * @clear_on_free:      %true if buffer must be zeroed after use
  * @allow_user_free:    %true if user is allowed to free buffer
  * @async_transaction:  %true if buffer is in use for an async txn
+ * @oneway_spam_suspect: %true if total async allocate size just exceed
+ * spamming detect threshold
  * @debug_id:           unique ID for debugging
  * @transaction:        pointer to associated struct binder_transaction
  * @target_node:        struct binder_node associated with this buffer
@@ -45,7 +47,8 @@ struct binder_buffer {
 	unsigned clear_on_free:1;
 	unsigned allow_user_free:1;
 	unsigned async_transaction:1;
-	unsigned debug_id:28;
+	unsigned oneway_spam_suspect:1;
+	unsigned debug_id:27;
 
 	struct binder_transaction *transaction;
 
@@ -87,6 +90,8 @@ struct binder_lru_page {
  * @buffer_size:        size of address space specified via mmap
  * @pid:                pid for associated binder_proc (invariant after init)
  * @pages_high:         high watermark of offset in @pages
+ * @oneway_spam_detected: %true if oneway spam detection fired, clear that
+ * flag once the async buffer has returned to a healthy state
  *
  * Bookkeeping structure for per-proc address space management for binder
  * buffers. It is normally initialized during binder_init() and binder_mmap()
@@ -107,6 +112,7 @@ struct binder_alloc {
 	uint32_t buffer_free;
 	int pid;
 	size_t pages_high;
+	bool oneway_spam_detected;
 };
 
 #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 2872a7d..810c0b8 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -155,7 +155,7 @@ enum binder_stat_types {
 };
 
 struct binder_stats {
-	atomic_t br[_IOC_NR(BR_FAILED_REPLY) + 1];
+	atomic_t br[_IOC_NR(BR_ONEWAY_SPAM_SUSPECT) + 1];
 	atomic_t bc[_IOC_NR(BC_REPLY_SG) + 1];
 	atomic_t obj_created[BINDER_STAT_COUNT];
 	atomic_t obj_deleted[BINDER_STAT_COUNT];
@@ -174,6 +174,7 @@ struct binder_work {
 	enum binder_work_type {
 		BINDER_WORK_TRANSACTION = 1,
 		BINDER_WORK_TRANSACTION_COMPLETE,
+		BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT,
 		BINDER_WORK_RETURN_ERROR,
 		BINDER_WORK_NODE,
 		BINDER_WORK_DEAD_BINDER,
@@ -409,6 +410,8 @@ struct binder_ref {
  * @outer_lock:           no nesting under innor or node lock
  *                        Lock order: 1) outer, 2) node, 3) inner
  * @binderfs_entry:       process-specific binderfs log file
+ * @oneway_spam_detection_enabled: process enabled oneway spam detection
+ *                        or not
  *
  * Bookkeeping structure for binder processes
  */
@@ -444,6 +447,7 @@ struct binder_proc {
 	spinlock_t inner_lock;
 	spinlock_t outer_lock;
 	struct dentry *binderfs_entry;
+	bool oneway_spam_detection_enabled;
 };
 
 /**
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index 156070d..20e435f 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -241,6 +241,7 @@ struct binder_frozen_status_info {
 #define BINDER_SET_CONTEXT_MGR_EXT	_IOW('b', 13, struct flat_binder_object)
 #define BINDER_FREEZE			_IOW('b', 14, struct binder_freeze_info)
 #define BINDER_GET_FROZEN_INFO		_IOWR('b', 15, struct binder_frozen_status_info)
+#define BINDER_ENABLE_ONEWAY_SPAM_DETECTION	_IOW('b', 16, __u32)
 
 /*
  * NOTE: Two special error codes you should check for when calling
@@ -428,6 +429,13 @@ enum binder_driver_return_protocol {
 	 * The target of the last transaction (either a bcTRANSACTION or
 	 * a bcATTEMPT_ACQUIRE) is frozen.  No parameters.
 	 */
+
+	BR_ONEWAY_SPAM_SUSPECT = _IO('r', 19),
+	/*
+	 * Current process sent too many oneway calls to target, and the last
+	 * asynchronous transaction makes the allocated async buffer size exceed
+	 * detection threshold.  No parameters.
+	 */
 };
 
 enum binder_driver_command_protocol {
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] binder: tell userspace to dump current backtrace when detected oneway spamming
  2021-04-09  3:40               ` [PATCH v4] binder: tell userspace to dump current backtrace when detected " Hang Lu
@ 2021-04-09  6:08                 ` Greg KH
  2021-04-09  6:21                   ` Hang Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Greg KH @ 2021-04-09  6:08 UTC (permalink / raw)
  To: Hang Lu
  Cc: tkjos, tkjos, maco, arve, joel, christian, hridya, surenb,
	rdunlap, linux-kernel

On Fri, Apr 09, 2021 at 11:40:57AM +0800, Hang Lu wrote:
> When async binder buffer got exhausted, some normal oneway transactions
> will also be discarded and may cause system or application failures. By
> that time, the binder debug information we dump may not be relevant to
> the root cause. And this issue is difficult to debug if without the
> backtrace of the thread sending spam.
> 
> This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway
> spamming is detected, request to dump current backtrace. Oneway spamming
> will be reported only once when exceeding the threshold (target process
> dips below 80% of its oneway space, and current process is responsible for
> either more than 50 transactions, or more than 50% of the oneway space).
> And the detection will restart when the async buffer has returned to a
> healthy state.
> 
> Signed-off-by: Hang Lu <hangl@codeaurora.org>
> ---
> v4: add missing BR_FROZEN_REPLY in binder_return_strings and change the size of binder_stats.br array

Should the BR_FROZEN_REPLY string be a separate patch as it's a fix for
the "binder frozen feature", not this new feature, right?  Or does this
patch require that change and the frozen patch did not?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] binder: tell userspace to dump current backtrace when detected oneway spamming
  2021-04-09  6:08                 ` Greg KH
@ 2021-04-09  6:21                   ` Hang Lu
  2021-04-09  6:25                     ` Greg KH
  0 siblings, 1 reply; 12+ messages in thread
From: Hang Lu @ 2021-04-09  6:21 UTC (permalink / raw)
  To: Greg KH
  Cc: tkjos, tkjos, maco, arve, joel, christian, hridya, surenb,
	rdunlap, linux-kernel

On 4/9/2021 2:08 PM, Greg KH wrote:
> On Fri, Apr 09, 2021 at 11:40:57AM +0800, Hang Lu wrote:
>> When async binder buffer got exhausted, some normal oneway transactions
>> will also be discarded and may cause system or application failures. By
>> that time, the binder debug information we dump may not be relevant to
>> the root cause. And this issue is difficult to debug if without the
>> backtrace of the thread sending spam.
>>
>> This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway
>> spamming is detected, request to dump current backtrace. Oneway spamming
>> will be reported only once when exceeding the threshold (target process
>> dips below 80% of its oneway space, and current process is responsible for
>> either more than 50 transactions, or more than 50% of the oneway space).
>> And the detection will restart when the async buffer has returned to a
>> healthy state.
>>
>> Signed-off-by: Hang Lu <hangl@codeaurora.org>
>> ---
>> v4: add missing BR_FROZEN_REPLY in binder_return_strings and change the size of binder_stats.br array
> 
> Should the BR_FROZEN_REPLY string be a separate patch as it's a fix for
> the "binder frozen feature", not this new feature, right?  Or does this
> patch require that change and the frozen patch did not?

Yes, BR_FROZEN_REPLY string is a fix and seems should to be separated from this new feature. But I'm still wondering how to submit these 2 separate patches as they edit the same place(maybe merge conflict). Do you know which of the following two commit methods is more suitable? Thanks!

1. char-misc-next HEAD --> BR_FROZEN_REPLY fix patch --> new feature patch

2. char-misc-next HEAD --> BR_FROZEN_REPLY fix patch
                   \-----> new feature patch


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4] binder: tell userspace to dump current backtrace when detected oneway spamming
  2021-04-09  6:21                   ` Hang Lu
@ 2021-04-09  6:25                     ` Greg KH
  0 siblings, 0 replies; 12+ messages in thread
From: Greg KH @ 2021-04-09  6:25 UTC (permalink / raw)
  To: Hang Lu
  Cc: tkjos, tkjos, maco, arve, joel, christian, hridya, surenb,
	rdunlap, linux-kernel

On Fri, Apr 09, 2021 at 02:21:58PM +0800, Hang Lu wrote:
> On 4/9/2021 2:08 PM, Greg KH wrote:
> > On Fri, Apr 09, 2021 at 11:40:57AM +0800, Hang Lu wrote:
> >> When async binder buffer got exhausted, some normal oneway transactions
> >> will also be discarded and may cause system or application failures. By
> >> that time, the binder debug information we dump may not be relevant to
> >> the root cause. And this issue is difficult to debug if without the
> >> backtrace of the thread sending spam.
> >>
> >> This change will send BR_ONEWAY_SPAM_SUSPECT to userspace when oneway
> >> spamming is detected, request to dump current backtrace. Oneway spamming
> >> will be reported only once when exceeding the threshold (target process
> >> dips below 80% of its oneway space, and current process is responsible for
> >> either more than 50 transactions, or more than 50% of the oneway space).
> >> And the detection will restart when the async buffer has returned to a
> >> healthy state.
> >>
> >> Signed-off-by: Hang Lu <hangl@codeaurora.org>
> >> ---
> >> v4: add missing BR_FROZEN_REPLY in binder_return_strings and change the size of binder_stats.br array
> > 
> > Should the BR_FROZEN_REPLY string be a separate patch as it's a fix for
> > the "binder frozen feature", not this new feature, right?  Or does this
> > patch require that change and the frozen patch did not?
> 
> Yes, BR_FROZEN_REPLY string is a fix and seems should to be separated from this new feature. But I'm still wondering how to submit these 2 separate patches as they edit the same place(maybe merge conflict). Do you know which of the following two commit methods is more suitable? Thanks!
> 
> 1. char-misc-next HEAD --> BR_FROZEN_REPLY fix patch --> new feature patch

Yes, do it this way, a 2 patch series is fine.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-04-09  6:25 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-31  7:34 [PATCH] binder: tell userspace to dump current backtrace when detecting oneway spamming Hang Lu
2021-03-31  7:44 ` Greg KH
2021-04-01  8:05   ` Hang Lu
2021-04-01  8:28     ` [PATCH v2] " Hang Lu
2021-04-06 20:26       ` Todd Kjos
2021-04-07  3:14         ` [PATCH v3] " Hang Lu
2021-04-07  4:11           ` [PATCH v4] " Hang Lu
2021-04-07 19:38             ` Todd Kjos
2021-04-09  3:40               ` [PATCH v4] binder: tell userspace to dump current backtrace when detected " Hang Lu
2021-04-09  6:08                 ` Greg KH
2021-04-09  6:21                   ` Hang Lu
2021-04-09  6:25                     ` Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.