[PATCH v2] audit: report audit wait metric in audit status reply

* [PATCH v2] audit: report audit wait metric in audit status reply
@ 2020-07-01 21:32 Max Englander
  2020-07-02 20:42 ` Paul Moore
                   ` (2 more replies)
  0 siblings, 3 replies; 31+ messages in thread
From: Max Englander @ 2020-07-01 21:32 UTC (permalink / raw)
  To: Paul Moore, Eric Paris, linux-audit

In environments where the preservation of audit events and predictable
usage of system memory are prioritized, admins may use a combination of
--backlog_wait_time and -b options at the risk of degraded performance
resulting from backlog waiting. In some cases, this risk may be
preferred to lost events or unbounded memory usage. Ideally, this risk
can be mitigated by making adjustments when backlog waiting is detected.

However, detection can be diffult using the currently available metrics.
For example, an admin attempting to debug degraded performance may
falsely believe a full backlog indicates backlog waiting. It may turn
out the backlog frequently fills up but drains quickly.

To make it easier to reliably track degraded performance to backlog
waiting, this patch makes the following changes:

Add a new field backlog_wait_sum to the audit status reply. Initialize
this field to zero. Add to this field the total time spent by the
current task on scheduled timeouts while the backlog limit is exceeded.

Tested on Ubuntu 18.04 using complementary changes to the audit
userspace: https://github.com/linux-audit/audit-userspace/pull/134.

Signed-off-by: Max Englander <max.englander@gmail.com>
---
 Patch changelogs between v1 and v2:
 - Instead of printing a warning when backlog waiting occurs, add
   duration of backlog waiting to cumulative sum, and report this
   sum in audit status reply.

 include/uapi/linux/audit.h | 7 ++++++-
 kernel/audit.c             | 9 +++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index a534d71e689a..ea0cc364beca 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -340,6 +340,7 @@ enum {
 #define AUDIT_STATUS_BACKLOG_LIMIT	0x0010
 #define AUDIT_STATUS_BACKLOG_WAIT_TIME	0x0020
 #define AUDIT_STATUS_LOST		0x0040
+#define AUDIT_STATUS_BACKLOG_WAIT_SUM	0x0080
 
 #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT	0x00000001
 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME	0x00000002
@@ -348,6 +349,7 @@ enum {
 #define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER	0x00000010
 #define AUDIT_FEATURE_BITMAP_LOST_RESET		0x00000020
 #define AUDIT_FEATURE_BITMAP_FILTER_FS		0x00000040
+#define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM	0x00000080
 
 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
 				  AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
@@ -355,12 +357,14 @@ enum {
 				  AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
 				  AUDIT_FEATURE_BITMAP_SESSIONID_FILTER | \
 				  AUDIT_FEATURE_BITMAP_LOST_RESET | \
-				  AUDIT_FEATURE_BITMAP_FILTER_FS)
+				  AUDIT_FEATURE_BITMAP_FILTER_FS | \
+				  AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM)
 
 /* deprecated: AUDIT_VERSION_* */
 #define AUDIT_VERSION_LATEST 		AUDIT_FEATURE_BITMAP_ALL
 #define AUDIT_VERSION_BACKLOG_LIMIT	AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT
 #define AUDIT_VERSION_BACKLOG_WAIT_TIME	AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME
+#define AUDIT_VERSION_BACKLOG_WAIT_SUM	AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM
 
 				/* Failure-to-log actions */
 #define AUDIT_FAIL_SILENT	0
@@ -466,6 +470,7 @@ struct audit_status {
 		__u32	feature_bitmap;	/* bitmap of kernel audit features */
 	};
 	__u32		backlog_wait_time;/* message queue wait timeout */
+	__u32           backlog_wait_sum;/* time spent waiting while message limit exceeded */
 };
 
 struct audit_features {
diff --git a/kernel/audit.c b/kernel/audit.c
index 87f31bf1f0a0..301ea4f3d750 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -136,6 +136,11 @@ u32		audit_sig_sid = 0;
 */
 static atomic_t	audit_lost = ATOMIC_INIT(0);
 
+/* Monotonically increasing sum of time the kernel has spent
+ * waiting while the backlog limit is exceeded.
+ */
+static atomic_t audit_backlog_wait_sum = ATOMIC_INIT(0);
+
 /* Hash for inode-based rules */
 struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
 
@@ -1204,6 +1209,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		s.backlog		= skb_queue_len(&audit_queue);
 		s.feature_bitmap	= AUDIT_FEATURE_BITMAP_ALL;
 		s.backlog_wait_time	= audit_backlog_wait_time;
+		s.backlog_wait_sum      = atomic_read(&audit_backlog_wait_sum);
 		audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
 		break;
 	}
@@ -1794,6 +1800,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 				return NULL;
 			}
 		}
+
+		if (stime != audit_backlog_wait_time)
+			atomic_add(audit_backlog_wait_time - stime, &audit_backlog_wait_sum);
 	}
 
 	ab = audit_buffer_alloc(ctx, gfp_mask, type);
-- 
2.17.1

--
Linux-audit mailing list
Linux-audit@redhat.com
https://www.redhat.com/mailman/listinfo/linux-audit


^ permalink raw reply related	[flat|nested] 31+ messages in thread