[RFC PATCH 5/9] audit: rework the audit queue handling
Richard Guy Briggs
rgb at redhat.com
Thu Nov 24 06:35:36 UTC 2016
On 2016-11-23 20:41, Paul Moore wrote:
> From: Paul Moore <paul at paul-moore.com>
>
> The audit record backlog queue has always been a bit of a mess, and
> the moving the multicast send into kauditd_thread() from
> audit_log_end() only makes things worse. This patch attempts to fix
> the backlog queue with a better design that should hold up better
> under load and have less of a performance impact at syscall
> invocation time.
>
> While it looks like there is a log going on in this patch, the main
> change is the move from a single backlog queue to three queues:
>
> * A queue for holding records generated from audit_log_end() that
> haven't been consumed by kauditd_thread() (audit_queue).
>
> * A queue for holding records that have been sent via multicast but
> had a temporary failure when sending via unicast and need a resend
> (audit_retry_queue).
>
> * A queue for holding records that haven't been sent via unicast
> because no one is listening (audit_hold_queue).
>
> Special care is taken in this patch to ensure that the proper
> record ordering is preserved, e.g. we send everything in the hold
> queue first, then the retry queue, and finally the main queue.
>
> Signed-off-by: Paul Moore <paul at paul-moore.com>
> ---
> kernel/audit.c | 347 ++++++++++++++++++++++++++++++++++++--------------------
> 1 file changed, 226 insertions(+), 121 deletions(-)
>
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 6ac1df1..f4056bc 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -138,11 +138,18 @@ static DEFINE_SPINLOCK(audit_freelist_lock);
> static int audit_freelist_count;
> static LIST_HEAD(audit_freelist);
>
> +/* queue msgs to send via kauditd_task */
> static struct sk_buff_head audit_queue;
> -/* queue of skbs to send to auditd when/if it comes back */
> +/* queue msgs due to temporary unicast send problems */
> +static struct sk_buff_head audit_retry_queue;
> +/* queue msgs waiting for new auditd connection */
> static struct sk_buff_head audit_hold_queue;
> +
> +/* queue servicing thread */
> static struct task_struct *kauditd_task;
> static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
> +
> +/* waitqueue for callers who are blocked on the audit backlog */
> static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
>
> static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
> @@ -365,25 +372,6 @@ static int audit_set_failure(u32 state)
> }
>
> /*
> - * Queue skbs to be sent to auditd when/if it comes back. These skbs should
> - * already have been sent via prink/syslog and so if these messages are dropped
> - * it is not a huge concern since we already passed the audit_log_lost()
> - * notification and stuff. This is just nice to get audit messages during
> - * boot before auditd is running or messages generated while auditd is stopped.
> - * This only holds messages is audit_default is set, aka booting with audit=1
> - * or building your kernel that way.
> - */
> -static void audit_hold_skb(struct sk_buff *skb)
> -{
> - if (audit_default &&
> - (!audit_backlog_limit ||
> - skb_queue_len(&audit_hold_queue) < audit_backlog_limit))
> - skb_queue_tail(&audit_hold_queue, skb);
> - else
> - kfree_skb(skb);
> -}
> -
> -/*
> * For one reason or another this nlh isn't getting delivered to the userspace
> * audit daemon, just send it to printk.
> */
> @@ -398,58 +386,114 @@ static void kauditd_printk_skb(struct sk_buff *skb)
> else
> audit_log_lost("printk limit exceeded");
> }
> +}
> +
> +/**
> + * kauditd_hold_skb - Queue an audit record, waiting for auditd
> + * @skb: audit record
> + *
> + * Description:
> + * Queue the audit record, waiting for an instance of auditd. When this
> + * function is called we haven't given up yet on sending the record, but things
> + * are not looking good. The first thing we want to do is try to write the
> + * record via printk and then see if we want to try and hold on to the record
> + * and queue it, if we have room. If we want to hold on to the record, but we
> + * don't have room, record a record lost message.
> + */
> +static void kauditd_hold_skb(struct sk_buff *skb)
> +{
> + /* at this point it is uncertain if we will ever send this to auditd so
> + * try to send the message via printk before we go any further */
> + kauditd_printk_skb(skb);
> +
> + /* can we just silently drop the message? */
> + if (!audit_default) {
> + kfree_skb(skb);
> + return;
> + }
> +
> + /* if we have room, queue the message */
> + if (!audit_backlog_limit ||
> + skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
> + skb_queue_tail(&audit_hold_queue, skb);
> + return;
> + }
>
> - audit_hold_skb(skb);
> + /* we have no other options - drop the message */
> + audit_log_lost("kauditd hold queue overflow");
> + kfree_skb(skb);
> }
>
> -static void kauditd_send_unicast_skb(struct sk_buff *skb)
> +/**
> + * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
> + * @skb: audit record
> + *
> + * Description:
> + * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
> + * but for some reason we are having problems sending it audit records so
> + * queue the given record and attempt to resend.
> + */
> +static void kauditd_retry_skb(struct sk_buff *skb)
> {
> - int err;
> - int attempts = 0;
> -#define AUDITD_RETRIES 5
> + /* NOTE: because records should only live in the retry queue for a
> + * short period of time, before either being sent or moved to the hold
> + * queue, we don't currently enforce a limit on this queue */
> + skb_queue_tail(&audit_retry_queue, skb);
> +}
Can kauditd_retry_skb() be eliminated entirely and just call
skb_queue_tail(&audit_retry_queue, skb) directly? The comment is
helpful, but found the code harder to follow in kauditd_thread() as a
result.
> +
> +/**
> + * auditd_reset - Disconnect the auditd connection
> + *
> + * Description:
> + * Break the auditd/kauditd connection and move all the records in the retry
> + * queue into the hold queue in case auditd reconnects.
> + */
> +static void auditd_reset(void)
> +{
> + struct sk_buff *skb;
> +
> + /* break the connection */
> + audit_pid = 0;
> + audit_sock = NULL;
> +
> + /* flush all of the retry queue to the hold queue */
> + while ((skb = skb_dequeue(&audit_retry_queue)))
> + kauditd_hold_skb(skb);
> +}
> +
> +/**
> + * kauditd_send_unicast_skb - Send a record via unicast to auditd
> + * @skb: audit record
> + */
> +static int kauditd_send_unicast_skb(struct sk_buff *skb)
> +{
> + int rc;
>
> -restart:
> - /* take a reference in case we can't send it and we want to hold it */
> + /* get an extra skb reference in case we fail to send */
> skb_get(skb);
> - err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
> - if (err < 0) {
> - pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n",
> - audit_pid, err);
> - if (audit_pid) {
> - if (err == -ECONNREFUSED || err == -EPERM
> - || ++attempts >= AUDITD_RETRIES) {
> - char s[32];
> -
> - snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid);
> - audit_log_lost(s);
> - audit_pid = 0;
> - audit_sock = NULL;
> - } else {
> - pr_warn("re-scheduling(#%d) write to audit_pid=%d\n",
> - attempts, audit_pid);
> - set_current_state(TASK_INTERRUPTIBLE);
> - schedule();
> - goto restart;
> - }
> - }
> - /* we might get lucky and get this in the next auditd */
> - audit_hold_skb(skb);
> - } else
> - /* drop the extra reference if sent ok */
> + rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
> + if (rc >= 0) {
> consume_skb(skb);
> + rc = 0;
> + }
> +
> + return rc;
> }
>
> /*
> - * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
> + * kauditd_send_multicast_skb - Send a record to any multicast listeners
> + * @skb: audit record
> *
> + * Description:
> * This function doesn't consume an skb as might be expected since it has to
> * copy it anyways.
> */
> -static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
> +static void kauditd_send_multicast_skb(struct sk_buff *skb)
> {
> - struct sk_buff *copy;
> - struct audit_net *aunet = net_generic(&init_net, audit_net_id);
> - struct sock *sock = aunet->nlsk;
> + struct sk_buff *copy;
> + struct audit_net *aunet = net_generic(&init_net, audit_net_id);
> + struct sock *sock = aunet->nlsk;
> + struct nlmsghdr *nlh;
>
> if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
> return;
> @@ -464,94 +508,155 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
> * no reason for new multicast clients to continue with this
> * non-compliance.
> */
> - copy = skb_copy(skb, gfp_mask);
> + copy = skb_copy(skb, GFP_KERNEL);
> if (!copy)
> return;
> + nlh = nlmsg_hdr(copy);
> + nlh->nlmsg_len = skb->len;
>
> - nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
> + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
> }
>
> -/*
> - * flush_hold_queue - empty the hold queue if auditd appears
> - *
> - * If auditd just started, drain the queue of messages already
> - * sent to syslog/printk. Remember loss here is ok. We already
> - * called audit_log_lost() if it didn't go out normally. so the
> - * race between the skb_dequeue and the next check for audit_pid
> - * doesn't matter.
> +/**
> + * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
> *
> - * If you ever find kauditd to be too slow we can get a perf win
> - * by doing our own locking and keeping better track if there
> - * are messages in this queue. I don't see the need now, but
> - * in 5 years when I want to play with this again I'll see this
> - * note and still have no friggin idea what i'm thinking today.
> + * Description:
> + * This function is for use by the wait_event_freezable() call in
> + * kauditd_thread().
> */
> -static void flush_hold_queue(void)
> +static int kauditd_wake_condition(void)
> {
> - struct sk_buff *skb;
> -
> - if (!audit_default || !audit_pid)
> - return;
> -
> - skb = skb_dequeue(&audit_hold_queue);
> - if (likely(!skb))
> - return;
> + static int pid_last = 0;
> + int rc;
> + int pid = audit_pid;
>
> - while (skb && audit_pid) {
> - kauditd_send_unicast_skb(skb);
> - skb = skb_dequeue(&audit_hold_queue);
> - }
> + /* wake on new messages or a change in the connected auditd */
> + rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
> + if (rc)
> + pid_last = pid;
>
> - /*
> - * if auditd just disappeared but we
> - * dequeued an skb we need to drop ref
> - */
> - consume_skb(skb);
> + return rc;
> }
>
> static int kauditd_thread(void *dummy)
> {
> + int rc;
> + int auditd = 0;
> + int reschedule = 0;
> struct sk_buff *skb;
> struct nlmsghdr *nlh;
>
> +#define UNICAST_RETRIES 5
> +#define AUDITD_BAD(x,y) \
> + ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
> +
> + /* NOTE: we do invalidate the auditd connection flag on any sending
> + * errors, but we only "restore" the connection flag at specific places
> + * in the loop in order to help ensure proper ordering of audit
> + * records */
> +
> set_freezable();
> while (!kthread_should_stop()) {
> - flush_hold_queue();
> + /* NOTE: possible area for future improvement is to look at
> + * the hold and retry queues, since only this thread
> + * has access to these queues we might be able to do
> + * our own queuing and skip some/all of the locking */
> +
> + /* NOTE: it might be a fun experiment to split the hold and
> + * retry queue handling to another thread, but the
> + * synchronization issues and other overhead might kill
> + * any performance gains */
> +
> + /* attempt to flush the hold queue */
> + while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
> + rc = kauditd_send_unicast_skb(skb);
> + if (rc) {
> + /* requeue to the same spot */
> + skb_queue_head(&audit_hold_queue, skb);
> +
> + auditd = 0;
> + if (AUDITD_BAD(rc, reschedule)) {
> + auditd_reset();
> + reschedule = 0;
> + }
> + } else
> + /* we were able to send successfully */
> + reschedule = 0;
> + }
> +
> + /* attempt to flush the retry queue */
> + while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
> + rc = kauditd_send_unicast_skb(skb);
> + if (rc) {
> + auditd = 0;
> + if (AUDITD_BAD(rc, reschedule)) {
> + kauditd_hold_skb(skb);
> + auditd_reset();
> + reschedule = 0;
> + } else
> + /* temporary problem (we hope), queue
> + * to the same spot and retry */
> + skb_queue_head(&audit_retry_queue, skb);
> + } else
> + /* we were able to send successfully */
> + reschedule = 0;
> + }
>
> + /* standard queue processing, try to be as quick as possible */
> +quick_loop:
> skb = skb_dequeue(&audit_queue);
> if (skb) {
> + /* setup the netlink header, see the comments in
> + * kauditd_send_multicast_skb() for length quirks */
> nlh = nlmsg_hdr(skb);
> -
> - /* if nlh->nlmsg_len is zero then we haven't attempted
> - * to send the message to userspace yet, if nlmsg_len
> - * is non-zero then we have attempted to send it to
> - * the multicast listeners as well as auditd; keep
> - * trying to send to auditd but don't repeat the
> - * multicast send */
> - if (nlh->nlmsg_len == 0) {
> - nlh->nlmsg_len = skb->len;
> - kauditd_send_multicast_skb(skb, GFP_KERNEL);
> -
> - /* see the note in kauditd_send_multicast_skb
> - * regarding the nlh->nlmsg_len value and why
> - * it differs between the multicast and unicast
> - * clients */
> - nlh->nlmsg_len -= NLMSG_HDRLEN;
> - }
> -
> - if (audit_pid)
> - kauditd_send_unicast_skb(skb);
> + nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
> +
> + /* attempt to send to any multicast listeners */
> + kauditd_send_multicast_skb(skb);
> +
> + /* attempt to send to auditd, queue on failure */
> + if (auditd) {
> + rc = kauditd_send_unicast_skb(skb);
> + if (rc) {
> + auditd = 0;
> + if (AUDITD_BAD(rc, reschedule)) {
> + auditd_reset();
> + reschedule = 0;
> + }
> +
> + /* move to the retry queue */
> + kauditd_retry_skb(skb);
> + } else
> + /* everything is working so go fast! */
> + goto quick_loop;
> + } else if (reschedule)
> + /* we are currently having problems, move to
> + * the retry queue */
> + kauditd_retry_skb(skb);
> else
> - kauditd_printk_skb(skb);
> + /* dump the message via printk and hold it */
> + kauditd_hold_skb(skb);
> } else {
> - /* we have flushed the backlog so wake everyone up who
> - * is blocked and go to sleep until we have something
> - * in the backlog again */
> + /* we have flushed the backlog so wake everyone */
> wake_up(&audit_backlog_wait);
> - wait_event_freezable(kauditd_wait,
> - skb_queue_len(&audit_queue));
> +
> + /* if everything is okay with auditd (if present), go
> + * to sleep until there is something new in the queue
> + * or we have a change in the connected auditd;
> + * otherwise simply reschedule to give things a chance
> + * to recover */
> + if (reschedule) {
> + set_current_state(TASK_INTERRUPTIBLE);
> + schedule();
> + } else
> + wait_event_freezable(kauditd_wait,
> + kauditd_wake_condition());
> +
> + /* update the auditd connection status */
> + auditd = (audit_pid ? 1 : 0);
> }
> }
> +
> return 0;
> }
>
> @@ -616,6 +721,7 @@ static int audit_send_reply_thread(void *arg)
> kfree(reply);
> return 0;
> }
> +
> /**
> * audit_send_reply - send an audit reply message via netlink
> * @request_skb: skb of request we are replying to (used to target the reply)
> @@ -1171,10 +1277,8 @@ static void __net_exit audit_net_exit(struct net *net)
> {
> struct audit_net *aunet = net_generic(net, audit_net_id);
> struct sock *sock = aunet->nlsk;
> - if (sock == audit_sock) {
> - audit_pid = 0;
> - audit_sock = NULL;
> - }
> + if (sock == audit_sock)
> + auditd_reset();
>
> RCU_INIT_POINTER(aunet->nlsk, NULL);
> synchronize_net();
> @@ -1201,6 +1305,7 @@ static int __init audit_init(void)
> register_pernet_subsys(&audit_net_ops);
>
> skb_queue_head_init(&audit_queue);
> + skb_queue_head_init(&audit_retry_queue);
> skb_queue_head_init(&audit_hold_queue);
> audit_initialized = AUDIT_INITIALIZED;
> audit_enabled = audit_default;
>
> --
> Linux-audit mailing list
> Linux-audit at redhat.com
> https://www.redhat.com/mailman/listinfo/linux-audit
- RGB
--
Richard Guy Briggs <rgb at redhat.com>
Kernel Security Engineering, Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635
More information about the Linux-audit
mailing list