[PATCH 13/14] audit: Replace chunk attached to mark instead of replacing mark
Richard Guy Briggs
rgb at redhat.com
Thu Oct 18 19:39:24 UTC 2018
On 2018-10-17 12:15, Jan Kara wrote:
> Audit tree code currently associates new fsnotify mark with each new
> chunk. As chunk attached to an inode is replaced when new tag is added /
> removed, we also need to remove old fsnotify mark and add a new one on
> such occasion. This is cumbersome and makes locking rules somewhat
> difficult to follow.
>
> Fix these problems by allocating fsnotify mark independently of chunk
> and keeping it all the time while there is some chunk attached to an
> inode. Also add documentation about the locking rules so that things are
> easier to follow.
Reviewed-by: Richard Guy Briggs <rgb at redhat.com>
> Signed-off-by: Jan Kara <jack at suse.cz>
> ---
> kernel/audit_tree.c | 160 +++++++++++++++++++++++++++-------------------------
> 1 file changed, 83 insertions(+), 77 deletions(-)
>
> diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> index 5deb4e1ed648..451d1b744e82 100644
> --- a/kernel/audit_tree.c
> +++ b/kernel/audit_tree.c
> @@ -27,7 +27,6 @@ struct audit_chunk {
> unsigned long key;
> struct fsnotify_mark *mark;
> struct list_head trees; /* with root here */
> - int dead;
> int count;
> atomic_long_t refs;
> struct rcu_head head;
> @@ -48,8 +47,15 @@ static LIST_HEAD(prune_list);
> static struct task_struct *prune_thread;
>
> /*
> - * One struct chunk is attached to each inode of interest.
> - * We replace struct chunk on tagging/untagging.
> + * One struct chunk is attached to each inode of interest through
> + * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
> + * untagging, the mark is stable as long as there is chunk attached. The
> + * association between mark and chunk is protected by hash_lock and
> + * audit_tree_group->mark_mutex. Thus as long as we hold
> + * audit_tree_group->mark_mutex and check that the mark is alive by
> + * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
> + * the current chunk.
> + *
> * Rules have pointer to struct audit_tree.
> * Rules have struct list_head rlist forming a list of rules over
> * the same tree.
> @@ -68,8 +74,12 @@ static struct task_struct *prune_thread;
> * tree is refcounted; one reference for "some rules on rules_list refer to
> * it", one for each chunk with pointer to it.
> *
> - * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
> - * of watch contributes 1 to .refs).
> + * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
> + * one chunk reference. This reference is dropped either when a mark is going
> + * to be freed (corresponding inode goes away) or when chunk attached to the
> + * mark gets replaced. This reference must be dropped using
> + * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
> + * grace period as it protects RCU readers of the hash table.
> *
> * node.index allows to get from node.list to containing chunk.
> * MSB of that sucker is stolen to mark taggings that we might have to
> @@ -160,8 +170,6 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
>
> static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
> {
> - struct audit_chunk *chunk = mark_chunk(entry);
> - audit_mark_put_chunk(chunk);
> kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
> }
>
> @@ -188,13 +196,6 @@ static struct audit_chunk *alloc_chunk(int count)
> if (!chunk)
> return NULL;
>
> - chunk->mark = alloc_mark();
> - if (!chunk->mark) {
> - kfree(chunk);
> - return NULL;
> - }
> - audit_mark(chunk->mark)->chunk = chunk;
> -
> INIT_LIST_HEAD(&chunk->hash);
> INIT_LIST_HEAD(&chunk->trees);
> chunk->count = count;
> @@ -277,6 +278,20 @@ static struct audit_chunk *find_chunk(struct node *p)
> return container_of(p, struct audit_chunk, owners[0]);
> }
>
> +static void replace_mark_chunk(struct fsnotify_mark *entry,
> + struct audit_chunk *chunk)
> +{
> + struct audit_chunk *old;
> +
> + assert_spin_locked(&hash_lock);
> + old = mark_chunk(entry);
> + audit_mark(entry)->chunk = chunk;
> + if (chunk)
> + chunk->mark = entry;
> + if (old)
> + old->mark = NULL;
> +}
> +
> static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
> {
> struct audit_tree *owner;
> @@ -299,6 +314,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
> get_tree(owner);
> list_replace_init(&old->owners[j].list, &new->owners[i].list);
> }
> + replace_mark_chunk(old->mark, new);
> /*
> * Make sure chunk is fully initialized before making it visible in the
> * hash. Pairs with a data dependency barrier in READ_ONCE() in
> @@ -339,23 +355,25 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
>
> mutex_lock(&audit_tree_group->mark_mutex);
> /*
> - * mark_mutex protects mark from getting detached and thus also from
> - * mark->connector->obj getting NULL.
> + * mark_mutex stabilizes chunk attached to the mark so we can check
> + * whether it didn't change while we've dropped hash_lock.
> */
> - if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> + if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
> + mark_chunk(entry) != chunk) {
> mutex_unlock(&audit_tree_group->mark_mutex);
> return;
> }
>
> size = chunk_count_trees(chunk);
> if (!size) {
> - chunk->dead = 1;
> spin_lock(&hash_lock);
> list_del_init(&chunk->trees);
> list_del_rcu(&chunk->hash);
> + replace_mark_chunk(entry, NULL);
> spin_unlock(&hash_lock);
> fsnotify_detach_mark(entry);
> mutex_unlock(&audit_tree_group->mark_mutex);
> + audit_mark_put_chunk(chunk);
> fsnotify_free_mark(entry);
> return;
> }
> @@ -364,13 +382,6 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
> if (!new)
> goto out_mutex;
>
> - if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
> - FSNOTIFY_OBJ_TYPE_INODE, 1)) {
> - fsnotify_put_mark(new->mark);
> - goto out_mutex;
> - }
> -
> - chunk->dead = 1;
> spin_lock(&hash_lock);
> /*
> * This has to go last when updating chunk as once replace_chunk() is
> @@ -378,10 +389,8 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
> */
> replace_chunk(new, chunk);
> spin_unlock(&hash_lock);
> - fsnotify_detach_mark(entry);
> mutex_unlock(&audit_tree_group->mark_mutex);
> - fsnotify_free_mark(entry);
> - fsnotify_put_mark(new->mark); /* drop initial reference */
> + audit_mark_put_chunk(chunk);
> return;
>
> out_mutex:
> @@ -399,23 +408,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
> return -ENOMEM;
> }
>
> - entry = chunk->mark;
> + entry = alloc_mark();
> + if (!entry) {
> + mutex_unlock(&audit_tree_group->mark_mutex);
> + kfree(chunk);
> + return -ENOMEM;
> + }
> +
> if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
> mutex_unlock(&audit_tree_group->mark_mutex);
> fsnotify_put_mark(entry);
> + kfree(chunk);
> return -ENOSPC;
> }
>
> spin_lock(&hash_lock);
> if (tree->goner) {
> spin_unlock(&hash_lock);
> - chunk->dead = 1;
> fsnotify_detach_mark(entry);
> mutex_unlock(&audit_tree_group->mark_mutex);
> fsnotify_free_mark(entry);
> fsnotify_put_mark(entry);
> + kfree(chunk);
> return 0;
> }
> + replace_mark_chunk(entry, chunk);
> chunk->owners[0].index = (1U << 31);
> chunk->owners[0].owner = tree;
> get_tree(tree);
> @@ -432,33 +449,41 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
> insert_hash(chunk);
> spin_unlock(&hash_lock);
> mutex_unlock(&audit_tree_group->mark_mutex);
> - fsnotify_put_mark(entry); /* drop initial reference */
> + /*
> + * Drop our initial reference. When mark we point to is getting freed,
> + * we get notification through ->freeing_mark callback and cleanup
> + * chunk pointing to this mark.
> + */
> + fsnotify_put_mark(entry);
> return 0;
> }
>
> /* the first tagged inode becomes root of tree */
> static int tag_chunk(struct inode *inode, struct audit_tree *tree)
> {
> - struct fsnotify_mark *old_entry, *chunk_entry;
> + struct fsnotify_mark *entry;
> struct audit_chunk *chunk, *old;
> struct node *p;
> int n;
>
> mutex_lock(&audit_tree_group->mark_mutex);
> - old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
> - audit_tree_group);
> - if (!old_entry)
> + entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
> + if (!entry)
> return create_chunk(inode, tree);
>
> - old = mark_chunk(old_entry);
> -
> + /*
> + * Found mark is guaranteed to be attached and mark_mutex protects mark
> + * from getting detached and thus it makes sure there is chunk attached
> + * to the mark.
> + */
> /* are we already there? */
> spin_lock(&hash_lock);
> + old = mark_chunk(entry);
> for (n = 0; n < old->count; n++) {
> if (old->owners[n].owner == tree) {
> spin_unlock(&hash_lock);
> mutex_unlock(&audit_tree_group->mark_mutex);
> - fsnotify_put_mark(old_entry);
> + fsnotify_put_mark(entry);
> return 0;
> }
> }
> @@ -467,41 +492,16 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
> chunk = alloc_chunk(old->count + 1);
> if (!chunk) {
> mutex_unlock(&audit_tree_group->mark_mutex);
> - fsnotify_put_mark(old_entry);
> + fsnotify_put_mark(entry);
> return -ENOMEM;
> }
>
> - chunk_entry = chunk->mark;
> -
> - /*
> - * mark_mutex protects mark from getting detached and thus also from
> - * mark->connector->obj getting NULL.
> - */
> - if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> - /* old_entry is being shot, lets just lie */
> - mutex_unlock(&audit_tree_group->mark_mutex);
> - fsnotify_put_mark(old_entry);
> - fsnotify_put_mark(chunk->mark);
> - return -ENOENT;
> - }
> -
> - if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
> - FSNOTIFY_OBJ_TYPE_INODE, 1)) {
> - mutex_unlock(&audit_tree_group->mark_mutex);
> - fsnotify_put_mark(chunk_entry);
> - fsnotify_put_mark(old_entry);
> - return -ENOSPC;
> - }
> -
> spin_lock(&hash_lock);
> if (tree->goner) {
> spin_unlock(&hash_lock);
> - chunk->dead = 1;
> - fsnotify_detach_mark(chunk_entry);
> mutex_unlock(&audit_tree_group->mark_mutex);
> - fsnotify_free_mark(chunk_entry);
> - fsnotify_put_mark(chunk_entry);
> - fsnotify_put_mark(old_entry);
> + fsnotify_put_mark(entry);
> + kfree(chunk);
> return 0;
> }
> p = &chunk->owners[chunk->count - 1];
> @@ -509,7 +509,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
> p->owner = tree;
> get_tree(tree);
> list_add(&p->list, &tree->chunks);
> - old->dead = 1;
> if (!tree->root) {
> tree->root = chunk;
> list_add(&tree->same_root, &chunk->trees);
> @@ -520,11 +519,10 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
> */
> replace_chunk(chunk, old);
> spin_unlock(&hash_lock);
> - fsnotify_detach_mark(old_entry);
> mutex_unlock(&audit_tree_group->mark_mutex);
> - fsnotify_free_mark(old_entry);
> - fsnotify_put_mark(chunk_entry); /* drop initial reference */
> - fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
> + fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
> + audit_mark_put_chunk(old);
> +
> return 0;
> }
>
> @@ -587,6 +585,9 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
> chunk = find_chunk(p);
> mark = chunk->mark;
> remove_chunk_node(chunk, p);
> + /* Racing with audit_tree_freeing_mark()? */
> + if (!mark)
> + continue;
> fsnotify_get_mark(mark);
> spin_unlock(&hash_lock);
>
> @@ -1009,10 +1010,6 @@ static void evict_chunk(struct audit_chunk *chunk)
> int need_prune = 0;
> int n;
>
> - if (chunk->dead)
> - return;
> -
> - chunk->dead = 1;
> mutex_lock(&audit_filter_mutex);
> spin_lock(&hash_lock);
> while (!list_empty(&chunk->trees)) {
> @@ -1051,9 +1048,18 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
>
> static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
> {
> - struct audit_chunk *chunk = mark_chunk(entry);
> + struct audit_chunk *chunk;
>
> - evict_chunk(chunk);
> + mutex_lock(&entry->group->mark_mutex);
> + spin_lock(&hash_lock);
> + chunk = mark_chunk(entry);
> + replace_mark_chunk(entry, NULL);
> + spin_unlock(&hash_lock);
> + mutex_unlock(&entry->group->mark_mutex);
> + if (chunk) {
> + evict_chunk(chunk);
> + audit_mark_put_chunk(chunk);
> + }
>
> /*
> * We are guaranteed to have at least one reference to the mark from
> --
> 2.16.4
>
- RGB
--
Richard Guy Briggs <rgb at redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
More information about the Linux-audit
mailing list