[PATCH 13/14] audit: Replace chunk attached to mark instead of replacing mark

Richard Guy Briggs rgb at redhat.com
Thu Oct 18 19:39:24 UTC 2018


On 2018-10-17 12:15, Jan Kara wrote:
> Audit tree code currently associates new fsnotify mark with each new
> chunk. As chunk attached to an inode is replaced when new tag is added /
> removed, we also need to remove old fsnotify mark and add a new one on
> such occasion.  This is cumbersome and makes locking rules somewhat
> difficult to follow.
> 
> Fix these problems by allocating fsnotify mark independently of chunk
> and keeping it all the time while there is some chunk attached to an
> inode. Also add documentation about the locking rules so that things are
> easier to follow.

Reviewed-by: Richard Guy Briggs <rgb at redhat.com>

> Signed-off-by: Jan Kara <jack at suse.cz>
> ---
>  kernel/audit_tree.c | 160 +++++++++++++++++++++++++++-------------------------
>  1 file changed, 83 insertions(+), 77 deletions(-)
> 
> diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> index 5deb4e1ed648..451d1b744e82 100644
> --- a/kernel/audit_tree.c
> +++ b/kernel/audit_tree.c
> @@ -27,7 +27,6 @@ struct audit_chunk {
>  	unsigned long key;
>  	struct fsnotify_mark *mark;
>  	struct list_head trees;		/* with root here */
> -	int dead;
>  	int count;
>  	atomic_long_t refs;
>  	struct rcu_head head;
> @@ -48,8 +47,15 @@ static LIST_HEAD(prune_list);
>  static struct task_struct *prune_thread;
>  
>  /*
> - * One struct chunk is attached to each inode of interest.
> - * We replace struct chunk on tagging/untagging.
> + * One struct chunk is attached to each inode of interest through
> + * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
> + * untagging, the mark is stable as long as there is chunk attached. The
> + * association between mark and chunk is protected by hash_lock and
> + * audit_tree_group->mark_mutex. Thus as long as we hold
> + * audit_tree_group->mark_mutex and check that the mark is alive by
> + * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
> + * the current chunk.
> + *
>   * Rules have pointer to struct audit_tree.
>   * Rules have struct list_head rlist forming a list of rules over
>   * the same tree.
> @@ -68,8 +74,12 @@ static struct task_struct *prune_thread;
>   * tree is refcounted; one reference for "some rules on rules_list refer to
>   * it", one for each chunk with pointer to it.
>   *
> - * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
> - * of watch contributes 1 to .refs).
> + * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
> + * one chunk reference. This reference is dropped either when a mark is going
> + * to be freed (corresponding inode goes away) or when chunk attached to the
> + * mark gets replaced. This reference must be dropped using
> + * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
> + * grace period as it protects RCU readers of the hash table.
>   *
>   * node.index allows to get from node.list to containing chunk.
>   * MSB of that sucker is stolen to mark taggings that we might have to
> @@ -160,8 +170,6 @@ static struct audit_chunk *mark_chunk(struct fsnotify_mark *mark)
>  
>  static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
>  {
> -	struct audit_chunk *chunk = mark_chunk(entry);
> -	audit_mark_put_chunk(chunk);
>  	kmem_cache_free(audit_tree_mark_cachep, audit_mark(entry));
>  }
>  
> @@ -188,13 +196,6 @@ static struct audit_chunk *alloc_chunk(int count)
>  	if (!chunk)
>  		return NULL;
>  
> -	chunk->mark = alloc_mark();
> -	if (!chunk->mark) {
> -		kfree(chunk);
> -		return NULL;
> -	}
> -	audit_mark(chunk->mark)->chunk = chunk;
> -
>  	INIT_LIST_HEAD(&chunk->hash);
>  	INIT_LIST_HEAD(&chunk->trees);
>  	chunk->count = count;
> @@ -277,6 +278,20 @@ static struct audit_chunk *find_chunk(struct node *p)
>  	return container_of(p, struct audit_chunk, owners[0]);
>  }
>  
> +static void replace_mark_chunk(struct fsnotify_mark *entry,
> +			       struct audit_chunk *chunk)
> +{
> +	struct audit_chunk *old;
> +
> +	assert_spin_locked(&hash_lock);
> +	old = mark_chunk(entry);
> +	audit_mark(entry)->chunk = chunk;
> +	if (chunk)
> +		chunk->mark = entry;
> +	if (old)
> +		old->mark = NULL;
> +}
> +
>  static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
>  {
>  	struct audit_tree *owner;
> @@ -299,6 +314,7 @@ static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old)
>  		get_tree(owner);
>  		list_replace_init(&old->owners[j].list, &new->owners[i].list);
>  	}
> +	replace_mark_chunk(old->mark, new);
>  	/*
>  	 * Make sure chunk is fully initialized before making it visible in the
>  	 * hash. Pairs with a data dependency barrier in READ_ONCE() in
> @@ -339,23 +355,25 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
>  
>  	mutex_lock(&audit_tree_group->mark_mutex);
>  	/*
> -	 * mark_mutex protects mark from getting detached and thus also from
> -	 * mark->connector->obj getting NULL.
> +	 * mark_mutex stabilizes chunk attached to the mark so we can check
> +	 * whether it didn't change while we've dropped hash_lock.
>  	 */
> -	if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> +	if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
> +	    mark_chunk(entry) != chunk) {
>  		mutex_unlock(&audit_tree_group->mark_mutex);
>  		return;
>  	}
>  
>  	size = chunk_count_trees(chunk);
>  	if (!size) {
> -		chunk->dead = 1;
>  		spin_lock(&hash_lock);
>  		list_del_init(&chunk->trees);
>  		list_del_rcu(&chunk->hash);
> +		replace_mark_chunk(entry, NULL);
>  		spin_unlock(&hash_lock);
>  		fsnotify_detach_mark(entry);
>  		mutex_unlock(&audit_tree_group->mark_mutex);
> +		audit_mark_put_chunk(chunk);
>  		fsnotify_free_mark(entry);
>  		return;
>  	}
> @@ -364,13 +382,6 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
>  	if (!new)
>  		goto out_mutex;
>  
> -	if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
> -				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
> -		fsnotify_put_mark(new->mark);
> -		goto out_mutex;
> -	}
> -
> -	chunk->dead = 1;
>  	spin_lock(&hash_lock);
>  	/*
>  	 * This has to go last when updating chunk as once replace_chunk() is
> @@ -378,10 +389,8 @@ static void untag_chunk(struct audit_chunk *chunk, struct fsnotify_mark *entry)
>  	 */
>  	replace_chunk(new, chunk);
>  	spin_unlock(&hash_lock);
> -	fsnotify_detach_mark(entry);
>  	mutex_unlock(&audit_tree_group->mark_mutex);
> -	fsnotify_free_mark(entry);
> -	fsnotify_put_mark(new->mark);	/* drop initial reference */
> +	audit_mark_put_chunk(chunk);
>  	return;
>  
>  out_mutex:
> @@ -399,23 +408,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
>  		return -ENOMEM;
>  	}
>  
> -	entry = chunk->mark;
> +	entry = alloc_mark();
> +	if (!entry) {
> +		mutex_unlock(&audit_tree_group->mark_mutex);
> +		kfree(chunk);
> +		return -ENOMEM;
> +	}
> +
>  	if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
>  		mutex_unlock(&audit_tree_group->mark_mutex);
>  		fsnotify_put_mark(entry);
> +		kfree(chunk);
>  		return -ENOSPC;
>  	}
>  
>  	spin_lock(&hash_lock);
>  	if (tree->goner) {
>  		spin_unlock(&hash_lock);
> -		chunk->dead = 1;
>  		fsnotify_detach_mark(entry);
>  		mutex_unlock(&audit_tree_group->mark_mutex);
>  		fsnotify_free_mark(entry);
>  		fsnotify_put_mark(entry);
> +		kfree(chunk);
>  		return 0;
>  	}
> +	replace_mark_chunk(entry, chunk);
>  	chunk->owners[0].index = (1U << 31);
>  	chunk->owners[0].owner = tree;
>  	get_tree(tree);
> @@ -432,33 +449,41 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
>  	insert_hash(chunk);
>  	spin_unlock(&hash_lock);
>  	mutex_unlock(&audit_tree_group->mark_mutex);
> -	fsnotify_put_mark(entry);	/* drop initial reference */
> +	/*
> +	 * Drop our initial reference. When mark we point to is getting freed,
> +	 * we get notification through ->freeing_mark callback and cleanup
> +	 * chunk pointing to this mark.
> +	 */
> +	fsnotify_put_mark(entry);
>  	return 0;
>  }
>  
>  /* the first tagged inode becomes root of tree */
>  static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>  {
> -	struct fsnotify_mark *old_entry, *chunk_entry;
> +	struct fsnotify_mark *entry;
>  	struct audit_chunk *chunk, *old;
>  	struct node *p;
>  	int n;
>  
>  	mutex_lock(&audit_tree_group->mark_mutex);
> -	old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
> -				       audit_tree_group);
> -	if (!old_entry)
> +	entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
> +	if (!entry)
>  		return create_chunk(inode, tree);
>  
> -	old = mark_chunk(old_entry);
> -
> +	/*
> +	 * Found mark is guaranteed to be attached and mark_mutex protects mark
> +	 * from getting detached and thus it makes sure there is chunk attached
> +	 * to the mark.
> +	 */
>  	/* are we already there? */
>  	spin_lock(&hash_lock);
> +	old = mark_chunk(entry);
>  	for (n = 0; n < old->count; n++) {
>  		if (old->owners[n].owner == tree) {
>  			spin_unlock(&hash_lock);
>  			mutex_unlock(&audit_tree_group->mark_mutex);
> -			fsnotify_put_mark(old_entry);
> +			fsnotify_put_mark(entry);
>  			return 0;
>  		}
>  	}
> @@ -467,41 +492,16 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>  	chunk = alloc_chunk(old->count + 1);
>  	if (!chunk) {
>  		mutex_unlock(&audit_tree_group->mark_mutex);
> -		fsnotify_put_mark(old_entry);
> +		fsnotify_put_mark(entry);
>  		return -ENOMEM;
>  	}
>  
> -	chunk_entry = chunk->mark;
> -
> -	/*
> -	 * mark_mutex protects mark from getting detached and thus also from
> -	 * mark->connector->obj getting NULL.
> -	 */
> -	if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> -		/* old_entry is being shot, lets just lie */
> -		mutex_unlock(&audit_tree_group->mark_mutex);
> -		fsnotify_put_mark(old_entry);
> -		fsnotify_put_mark(chunk->mark);
> -		return -ENOENT;
> -	}
> -
> -	if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
> -				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
> -		mutex_unlock(&audit_tree_group->mark_mutex);
> -		fsnotify_put_mark(chunk_entry);
> -		fsnotify_put_mark(old_entry);
> -		return -ENOSPC;
> -	}
> -
>  	spin_lock(&hash_lock);
>  	if (tree->goner) {
>  		spin_unlock(&hash_lock);
> -		chunk->dead = 1;
> -		fsnotify_detach_mark(chunk_entry);
>  		mutex_unlock(&audit_tree_group->mark_mutex);
> -		fsnotify_free_mark(chunk_entry);
> -		fsnotify_put_mark(chunk_entry);
> -		fsnotify_put_mark(old_entry);
> +		fsnotify_put_mark(entry);
> +		kfree(chunk);
>  		return 0;
>  	}
>  	p = &chunk->owners[chunk->count - 1];
> @@ -509,7 +509,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>  	p->owner = tree;
>  	get_tree(tree);
>  	list_add(&p->list, &tree->chunks);
> -	old->dead = 1;
>  	if (!tree->root) {
>  		tree->root = chunk;
>  		list_add(&tree->same_root, &chunk->trees);
> @@ -520,11 +519,10 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>  	 */
>  	replace_chunk(chunk, old);
>  	spin_unlock(&hash_lock);
> -	fsnotify_detach_mark(old_entry);
>  	mutex_unlock(&audit_tree_group->mark_mutex);
> -	fsnotify_free_mark(old_entry);
> -	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
> -	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
> +	fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
> +	audit_mark_put_chunk(old);
> +
>  	return 0;
>  }
>  
> @@ -587,6 +585,9 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged)
>  		chunk = find_chunk(p);
>  		mark = chunk->mark;
>  		remove_chunk_node(chunk, p);
> +		/* Racing with audit_tree_freeing_mark()? */
> +		if (!mark)
> +			continue;
>  		fsnotify_get_mark(mark);
>  		spin_unlock(&hash_lock);
>  
> @@ -1009,10 +1010,6 @@ static void evict_chunk(struct audit_chunk *chunk)
>  	int need_prune = 0;
>  	int n;
>  
> -	if (chunk->dead)
> -		return;
> -
> -	chunk->dead = 1;
>  	mutex_lock(&audit_filter_mutex);
>  	spin_lock(&hash_lock);
>  	while (!list_empty(&chunk->trees)) {
> @@ -1051,9 +1048,18 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
>  
>  static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
>  {
> -	struct audit_chunk *chunk = mark_chunk(entry);
> +	struct audit_chunk *chunk;
>  
> -	evict_chunk(chunk);
> +	mutex_lock(&entry->group->mark_mutex);
> +	spin_lock(&hash_lock);
> +	chunk = mark_chunk(entry);
> +	replace_mark_chunk(entry, NULL);
> +	spin_unlock(&hash_lock);
> +	mutex_unlock(&entry->group->mark_mutex);
> +	if (chunk) {
> +		evict_chunk(chunk);
> +		audit_mark_put_chunk(chunk);
> +	}
>  
>  	/*
>  	 * We are guaranteed to have at least one reference to the mark from
> -- 
> 2.16.4
> 

- RGB

--
Richard Guy Briggs <rgb at redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635




More information about the Linux-audit mailing list