[RFC PATCH] audit: reduce the number of kauditd_thread wakeups

Paul Moore paul at paul-moore.com
Sun Jun 6 03:23:13 UTC 2021


[NOTE: As this is an RFC patch, I wanted to add some commentary at
 the top of the patch description explaining where this patch came
 from and what testing has been done.  This patch is a derivative
 of another unreleased patch that removed all of the wake up calls
 from the audit_log_start() and audit_log_end() functions; while
 that patch worked well, there we some record losees with high
 volume burst traffic in the case of `auditctl -a task,never` or
 CONFIG_AUDITSYSCALL=n.  As this patch doesn't completely remove
 the wake up calls these two cases should behave similarly to how
 they do today.  As far as testing is concerned, this patch passes
 both the audit-testsuite and selinux-testsuite without problem and
 with expected audit queue losses (no losses outside of the tests
 which attempt to generate losses).  This patch also preserves the
 ability for the system to continue to function, in the
 `auditctl -a exit,always -S all` case, albeit very slowly.]

When audit is enabled, the kauditd_thread() function runs in its own
kernel thread, emptying the audit record queue and sending the
entries to userspace via different means.  As part of its normal
processing it goes to sleep after emptying the queue and waits for
the audit functions to wake it.

The current audit kernel code attempts to wake the kauditd thread
after each entry is added to the queue.  Considering that a single
syscall can have multiple audit records, with each wake attempt
involving locking and additional processing, this can be rather
wasteful.  In an effort to limit the number of wake attempts without
unnecessarily risking the audit queue size this patch does the
following:

* In the case of syscall auditing the wake up call is moved from
  audit_log_end() to audit_log_exit() meaning that the kauditd
  thread is only woken once, at the end of the syscall, after all of
  the syscall's audit records have been added to the queue.

* In the case where audit records are generated outside of a syscall
  context, the wake up call in audit_log_end() is preserved in order
  to ensure that these records do not suffer any additional latency
  or put unnecessary pressure on the queue.  This is done through
  some additional checking in audit_log_start() and an additional
  flag in the audit_buffer struct.

* The audit_log_start() function never attempts to wake the kauditd
  thread.  In the current code this is only done when the queue is
  under pressure, but at that point it is extremely likely that the
  thread is already active or scheduled, do we should be able to
  safely remove this wake attempt.

* Always wake the kauditd thread after processing management and
  user records in audit_receive_msg().  This should be relatively
  low frequency compared to the other audit sources, and doing a
  wake call here helps keep record latency low and the queue size
  in check.

* The kauditd thread itself is now a bit better at handling high
  volume audit record bursts.  Previously after emptying the queue
  the thread would wake every process that was blocked and then go
  to sleep; possibly going to sleep just a flood of new records
  are added to the queue.  The new kauditd thread approach wakes all
  of the blocked processes and then reschedules itself in
  anticipation of new records.  When the thread returns to execution
  it checks the queue and if there are any records present it
  immediately starts processing them, if the queue is empty the
  kauditd thread goes back to sleep.

Signed-off-by: Paul Moore <paul at paul-moore.com>
---
 kernel/audit.c   |   66 ++++++++++++++++++++++++++++++++++++++----------------
 kernel/audit.h   |    2 ++
 kernel/auditsc.c |    2 ++
 3 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index 551a394bc8f42..7c3368835bb71 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -197,9 +197,10 @@ static struct audit_ctl_mutex {
  * to place it on a transmit queue.  Multiple audit_buffers can be in
  * use simultaneously. */
 struct audit_buffer {
-	struct sk_buff       *skb;	/* formatted skb ready to send */
-	struct audit_context *ctx;	/* NULL or associated context */
-	gfp_t		     gfp_mask;
+	struct sk_buff *skb;
+	struct audit_context *ctx;
+	gfp_t gfp_mask;
+	unsigned int kauditd_wake:1;
 };
 
 struct audit_reply {
@@ -804,6 +805,17 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb)
 	nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
 }
 
+/**
+ * kauditd_wakeup - Wake the kauditd_thread
+ *
+ * Description:
+ * Wake up the kauditd_thread thread so that it can process the audit queues.
+ */
+void kauditd_wakeup(void)
+{
+	wake_up_interruptible(&kauditd_wait);
+}
+
 /**
  * kauditd_thread - Worker thread to send audit records to userspace
  * @dummy: unused
@@ -832,6 +844,7 @@ static int kauditd_thread(void *dummy)
 		portid = ac->portid;
 		rcu_read_unlock();
 
+all_queues:
 		/* attempt to flush the hold queue */
 		rc = kauditd_send_queue(sk, portid,
 					&audit_hold_queue, UNICAST_RETRIES,
@@ -861,25 +874,33 @@ static int kauditd_thread(void *dummy)
 					kauditd_send_multicast_skb,
 					(sk ?
 					 kauditd_retry_skb : kauditd_hold_skb));
-		if (ac && rc < 0)
+		if (ac && rc < 0) {
 			auditd_reset(ac);
-		sk = NULL;
+			goto disconnect;
+		}
+
+		/* we have processed all the queues so wake everyone who might
+		 * be waiting, reschedule ourselves to give others a chance to
+		 * run, and check the queue once again when we return; if
+		 * everything is quiet then we can go back to sleep */
+		wake_up(&audit_backlog_wait);
+		cond_resched();
+		if (skb_queue_len(&audit_queue))
+			goto all_queues;
 
+disconnect:
 		/* drop our netns reference, no auditd sends past this line */
+		sk = NULL;
 		if (net) {
 			put_net(net);
 			net = NULL;
 		}
 
-		/* we have processed all the queues so wake everyone */
-		wake_up(&audit_backlog_wait);
-
 		/* NOTE: we want to wake up if there is anything on the queue,
 		 *       regardless of if an auditd is connected, as we need to
 		 *       do the multicast send and rotate records from the
 		 *       main queue to the retry/hold queues */
-		wait_event_freezable(kauditd_wait,
-				     (skb_queue_len(&audit_queue) ? 1 : 0));
+		wait_event_freezable(kauditd_wait, skb_queue_len(&audit_queue));
 	}
 
 	return 0;
@@ -1509,6 +1530,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		break;
 	}
 
+	/* poke the kauditd_thread to handle any records we generated */
+	kauditd_wakeup();
+
 	return err < 0 ? err : 0;
 }
 
@@ -1750,6 +1774,7 @@ static struct audit_buffer *audit_buffer_alloc(struct audit_context *ctx,
 
 	ab->ctx = ctx;
 	ab->gfp_mask = gfp_mask;
+	ab->kauditd_wake = 0;
 
 	return ab;
 
@@ -1831,14 +1856,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 
 		while (audit_backlog_limit &&
 		       (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
-			/* wake kauditd to try and flush the queue */
-			wake_up_interruptible(&kauditd_wait);
-
 			/* sleep if we are allowed and we haven't exhausted our
 			 * backlog wait limit */
 			if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) {
 				long rtime = stime;
-
 				DECLARE_WAITQUEUE(wait, current);
 
 				add_wait_queue_exclusive(&audit_backlog_wait,
@@ -1864,10 +1885,14 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 		return NULL;
 	}
 
-	audit_get_stamp(ab->ctx, &t, &serial);
 	/* cancel dummy context to enable supporting records */
 	if (ctx)
 		ctx->dummy = 0;
+	/* force a kauditd wakeup if the normal syscall exit isn't doing it */
+	if (!ctx || !ctx->in_syscall)
+		ab->kauditd_wake = 1;
+
+	audit_get_stamp(ab->ctx, &t, &serial);
 	audit_log_format(ab, "audit(%llu.%03lu:%u): ",
 			 (unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial);
 
@@ -2365,8 +2390,8 @@ int audit_signal_info(int sig, struct task_struct *t)
  *
  * We can not do a netlink send inside an irq context because it blocks (last
  * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a
- * queue and a kthread is scheduled to remove them from the queue outside the
- * irq context.  May be called in any context.
+ * queue where a kthread processes the buffer from outside the irq context.
+ * May be called in any context.
  */
 void audit_log_end(struct audit_buffer *ab)
 {
@@ -2385,9 +2410,12 @@ void audit_log_end(struct audit_buffer *ab)
 		nlh = nlmsg_hdr(skb);
 		nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
 
-		/* queue the netlink packet and poke the kauditd thread */
+		/* queue the netlink packet */
 		skb_queue_tail(&audit_queue, skb);
-		wake_up_interruptible(&kauditd_wait);
+
+		/* only wakeup kauditd if we can't do it later */
+		if (ab->kauditd_wake)
+			kauditd_wakeup();
 	} else
 		audit_log_lost("rate limit exceeded");
 
diff --git a/kernel/audit.h b/kernel/audit.h
index 3b9c0945225a1..b5f677017e975 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -213,6 +213,8 @@ static inline int audit_hash_ino(u32 ino)
 /* Indicates that audit should log the full pathname. */
 #define AUDIT_NAME_FULL -1
 
+extern void kauditd_wakeup(void);
+
 extern int audit_match_class(int class, unsigned syscall);
 extern int audit_comparator(const u32 left, const u32 op, const u32 right);
 extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 47fb48f42c934..bad776497438f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1600,6 +1600,8 @@ static void audit_log_exit(void)
 		audit_log_end(ab);
 	if (call_panic)
 		audit_panic("error converting sid to string");
+
+	kauditd_wakeup();
 }
 
 /**




More information about the Linux-audit mailing list