[PATCH v3] audit: report audit wait metric in audit status reply

Paul Moore paul at paul-moore.com
Wed Jul 8 23:13:08 UTC 2020


On Sat, Jul 4, 2020 at 11:15 AM Max Englander <max.englander at gmail.com> wrote:
>
> In environments where the preservation of audit events and predictable
> usage of system memory are prioritized, admins may use a combination of
> --backlog_wait_time and -b options at the risk of degraded performance
> resulting from backlog waiting. In some cases, this risk may be
> preferred to lost events or unbounded memory usage. Ideally, this risk
> can be mitigated by making adjustments when backlog waiting is detected.
>
> However, detection can be difficult using the currently available
> metrics. For example, an admin attempting to debug degraded performance
> may falsely believe a full backlog indicates backlog waiting. It may
> turn out the backlog frequently fills up but drains quickly.
>
> To make it easier to reliably track degraded performance to backlog
> waiting, this patch makes the following changes:
>
> Add a new field backlog_wait_time_total to the audit status reply.
> Initialize this field to zero. Add to this field the total time spent
> by the current task on scheduled timeouts while the backlog limit is
> exceeded. Reset field to zero upon request via AUDIT_SET.
>
> Tested on Ubuntu 18.04 using complementary changes to the
> audit-userspace and audit-testsuite:
> - https://github.com/linux-audit/audit-userspace/pull/134
> - https://github.com/linux-audit/audit-testsuite/pull/97
>
> Signed-off-by: Max Englander <max.englander at gmail.com>
> ---
> Patch changelogs between v1 and v2:
>   - Instead of printing a warning when backlog waiting occurs, add
>     duration of backlog waiting to cumulative sum, and report this
>     sum in audit status reply.
>
> Patch changelogs between v2 and v3:
>  - Rename backlog_wait_sum to backlog_wait_time_actual.
>  - Drop unneeded and unwanted header flags
>    AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM and
>    AUDIT_VERSION_BACKLOG_WAIT_SUM.
>  - Increment backlog_wait_time_actual counter after every call to
>    schedule_timeout rather than once after enqueuing (or losing) an
>    audit record.
>  - Add support for resetting backlog_wait_time_actual counter to zero
>    upon request via AUDIT_SET.
>
>  include/uapi/linux/audit.h | 18 +++++++++++-------
>  kernel/audit.c             | 35 +++++++++++++++++++++++++----------
>  2 files changed, 36 insertions(+), 17 deletions(-)

This looks okay to me, thanks for the fixes Max.

Steve, does the associated userspace patch look okay to you?

> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index a534d71e689a..92d72965ad44 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -332,14 +332,15 @@ enum {
>  };
>
>  /* Status symbols */
> -                               /* Mask values */
> -#define AUDIT_STATUS_ENABLED           0x0001
> -#define AUDIT_STATUS_FAILURE           0x0002
> -#define AUDIT_STATUS_PID               0x0004
> +                                               /* Mask values */
> +#define AUDIT_STATUS_ENABLED                   0x0001
> +#define AUDIT_STATUS_FAILURE                   0x0002
> +#define AUDIT_STATUS_PID                       0x0004
>  #define AUDIT_STATUS_RATE_LIMIT                0x0008
> -#define AUDIT_STATUS_BACKLOG_LIMIT     0x0010
> -#define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020
> -#define AUDIT_STATUS_LOST              0x0040
> +#define AUDIT_STATUS_BACKLOG_LIMIT             0x0010
> +#define AUDIT_STATUS_BACKLOG_WAIT_TIME         0x0020
> +#define AUDIT_STATUS_LOST                      0x0040
> +#define AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL  0x0080
>
>  #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT     0x00000001
>  #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002
> @@ -466,6 +467,9 @@ struct audit_status {
>                 __u32   feature_bitmap; /* bitmap of kernel audit features */
>         };
>         __u32           backlog_wait_time;/* message queue wait timeout */
> +       __u32           backlog_wait_time_actual;/* time spent waiting while
> +                                                 * message limit exceeded
> +                                                 */
>  };
>
>  struct audit_features {
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 87f31bf1f0a0..33c640fdacf7 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -136,6 +136,11 @@ u32                audit_sig_sid = 0;
>  */
>  static atomic_t        audit_lost = ATOMIC_INIT(0);
>
> +/* Monotonically increasing sum of time the kernel has spent
> + * waiting while the backlog limit is exceeded.
> + */
> +static atomic_t audit_backlog_wait_time_actual = ATOMIC_INIT(0);
> +
>  /* Hash for inode-based rules */
>  struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
>
> @@ -1193,17 +1198,18 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
>         case AUDIT_GET: {
>                 struct audit_status     s;
>                 memset(&s, 0, sizeof(s));
> -               s.enabled               = audit_enabled;
> -               s.failure               = audit_failure;
> +               s.enabled                  = audit_enabled;
> +               s.failure                  = audit_failure;
>                 /* NOTE: use pid_vnr() so the PID is relative to the current
>                  *       namespace */
> -               s.pid                   = auditd_pid_vnr();
> -               s.rate_limit            = audit_rate_limit;
> -               s.backlog_limit         = audit_backlog_limit;
> -               s.lost                  = atomic_read(&audit_lost);
> -               s.backlog               = skb_queue_len(&audit_queue);
> -               s.feature_bitmap        = AUDIT_FEATURE_BITMAP_ALL;
> -               s.backlog_wait_time     = audit_backlog_wait_time;
> +               s.pid                      = auditd_pid_vnr();
> +               s.rate_limit               = audit_rate_limit;
> +               s.backlog_limit            = audit_backlog_limit;
> +               s.lost                     = atomic_read(&audit_lost);
> +               s.backlog                  = skb_queue_len(&audit_queue);
> +               s.feature_bitmap           = AUDIT_FEATURE_BITMAP_ALL;
> +               s.backlog_wait_time        = audit_backlog_wait_time;
> +               s.backlog_wait_time_actual = atomic_read(&audit_backlog_wait_time_actual);
>                 audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
>                 break;
>         }
> @@ -1307,6 +1313,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
>                         audit_log_config_change("lost", 0, lost, 1);
>                         return lost;
>                 }
> +               if (s.mask == AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL) {
> +                       u32 actual = atomic_xchg(&audit_backlog_wait_time_actual, 0);
> +
> +                       audit_log_config_change("backlog_wait_time_actual", 0, actual, 1);
> +                       return actual;
> +               }
>                 break;
>         }
>         case AUDIT_GET_FEATURE:
> @@ -1778,12 +1790,15 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
>                         /* sleep if we are allowed and we haven't exhausted our
>                          * backlog wait limit */
>                         if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) {
> +                               long rtime = stime;
> +
>                                 DECLARE_WAITQUEUE(wait, current);
>
>                                 add_wait_queue_exclusive(&audit_backlog_wait,
>                                                          &wait);
>                                 set_current_state(TASK_UNINTERRUPTIBLE);
> -                               stime = schedule_timeout(stime);
> +                               stime = schedule_timeout(rtime);
> +                               atomic_add(rtime - stime, &audit_backlog_wait_time_actual);
>                                 remove_wait_queue(&audit_backlog_wait, &wait);
>                         } else {
>                                 if (audit_rate_check() && printk_ratelimit())
> --
> 2.17.1
>


-- 
paul moore
www.paul-moore.com




More information about the Linux-audit mailing list