[RFC][PATCH] Inotify kernel API
Timothy R. Chavez
tinytim at us.ibm.com
Tue Sep 6 22:29:32 UTC 2005
Some comments below...
On Tuesday 23 August 2005 15:37, Amy Griffis wrote:
> diff -r 8ecff93e704a -r 58e1301e9661 fs/inotify.c
> --- a/fs/inotify.c Thu Aug 18 19:53:59 2005
> +++ b/fs/inotify.c Thu Aug 18 23:19:52 2005
> @@ -83,14 +83,18 @@
> wait_queue_head_t wq; /* wait queue for i/o */
> struct idr idr; /* idr mapping wd -> watch */
> struct semaphore sem; /* protects this bad boy */
> - struct list_head events; /* list of queued events */
> struct list_head watches; /* list of watches */
> atomic_t count; /* reference count */
> + u32 last_wd; /* the last wd allocated */
> + /* userland consumer API */
> + struct list_head events; /* list of queued events */
> struct user_struct *user; /* user who opened this dev */
> unsigned int queue_size; /* size of the queue (bytes) */
> unsigned int event_count; /* number of pending events */
> unsigned int max_events; /* maximum number of events */
> - u32 last_wd; /* the last wd allocated */
> + /* kernel consumer API */
> + void (*callback)(struct inotify_event *, const char *,
> + void *); /* event callback */
> };
>
> /*
> @@ -122,6 +126,7 @@
> struct inode *inode; /* associated inode */
> s32 wd; /* watch descriptor */
> u32 mask; /* event mask for this watch */
> + void *callback_arg; /* callback argument - kernel API */
> };
>
> #ifdef CONFIG_SYSCTL
> @@ -173,8 +178,10 @@
> static inline void put_inotify_dev(struct inotify_device *dev)
> {
> if (atomic_dec_and_test(&dev->count)) {
> - atomic_dec(&dev->user->inotify_devs);
> - free_uid(dev->user);
> + if (dev->user) {
> + atomic_dec(&dev->user->inotify_devs);
> + free_uid(dev->user);
> + }
> kfree(dev);
> }
> }
> @@ -341,6 +348,23 @@
> }
>
> /*
> + * inotify_callback_event - notify kernel consumers of events
> + */
> +static void inotify_callback_event(struct inotify_device *dev,
> + struct inotify_watch *watch,
> + u32 mask, u32 cookie, const char *name)
> +{
> + struct inotify_event event;
> +
> + event.wd = watch->wd;
> + event.mask = mask;
> + event.cookie = cookie;
> + event.len = 0; /* kernel consumers don't need length */
> +
> + dev->callback(&event, name, watch->callback_arg);
> +}
> +
> +/*
> * inotify_dev_get_wd - returns the next WD for use by the given dev
> *
> * Callers must hold dev->sem. This function can sleep.
> @@ -383,12 +407,13 @@
> * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
> */
> static struct inotify_watch *create_watch(struct inotify_device *dev,
> - u32 mask, struct inode *inode)
> + u32 mask, struct inode *inode,
> + void *callback_arg)
> {
> struct inotify_watch *watch;
> int ret;
>
> - if (atomic_read(&dev->user->inotify_watches) >=
> + if (dev->user && atomic_read(&dev->user->inotify_watches) >=
> inotify_max_user_watches)
> return ERR_PTR(-ENOSPC);
>
> @@ -404,6 +429,7 @@
>
> dev->last_wd = watch->wd;
> watch->mask = mask;
> + watch->callback_arg = callback_arg;
> atomic_set(&watch->count, 0);
> INIT_LIST_HEAD(&watch->d_list);
> INIT_LIST_HEAD(&watch->i_list);
> @@ -421,7 +447,8 @@
> /* bump our own count, corresponding to our entry in dev->watches */
> get_inotify_watch(watch);
>
> - atomic_inc(&dev->user->inotify_watches);
> + if (dev->user)
> + atomic_inc(&dev->user->inotify_watches);
>
> return watch;
> }
> @@ -453,7 +480,8 @@
> list_del(&watch->i_list);
> list_del(&watch->d_list);
>
> - atomic_dec(&dev->user->inotify_watches);
> + if (dev->user)
> + atomic_dec(&dev->user->inotify_watches);
> idr_remove(&dev->idr, watch->wd);
> put_inotify_watch(watch);
> }
> @@ -471,7 +499,10 @@
> */
> static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
> {
> - inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
> + if (dev->callback)
> + inotify_callback_event(dev, watch, IN_IGNORED, 0, NULL);
> + else
> + inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
> remove_watch_no_event(watch, dev);
> }
>
> @@ -484,7 +515,172 @@
> return !list_empty(&inode->inotify_watches);
> }
>
> -/* Kernel API */
> +/* Kernel consumer API */
> +
> +/*
> + * inotify_init - allocates and initializes an inotify device.
> + */
> +struct inotify_device *
> +inotify_init(void (*callback)(struct inotify_event *, const char *, void *))
> +{
> + struct inotify_device *dev;
> +
> + dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
> + if (unlikely(!dev))
> + return NULL;
> +
> + idr_init(&dev->idr);
> + INIT_LIST_HEAD(&dev->events);
> + INIT_LIST_HEAD(&dev->watches);
> + init_waitqueue_head(&dev->wq);
> + sema_init(&dev->sem, 1);
> + dev->event_count = 0;
> + dev->queue_size = 0;
> + dev->max_events = inotify_max_queued_events;
> + dev->user = NULL; /* set in sys_inotify_init */
> + dev->last_wd = 0;
> + dev->callback = callback;
> + atomic_set(&dev->count, 0);
> + get_inotify_dev(dev);
> +
> + return dev;
> +}
> +EXPORT_SYMBOL_GPL(inotify_init);
> +
> +/*
> + * inotify_free - clean up and free an inotify device.
> + */
> +int inotify_free(struct inotify_device *dev)
> +{
> + /*
> + * Destroy all of the watches on this device. Unfortunately, not very
> + * pretty. We cannot do a simple iteration over the list, because we
> + * do not know the inode until we iterate to the watch. But we need to
> + * hold inode->inotify_sem before dev->sem. The following works.
> + */
> + while (1) {
> + struct inotify_watch *watch;
> + struct list_head *watches;
> + struct inode *inode;
> +
> + down(&dev->sem);
> + watches = &dev->watches;
> + if (list_empty(watches)) {
> + up(&dev->sem);
> + break;
> + }
> + watch = list_entry(watches->next, struct inotify_watch, d_list);
> + get_inotify_watch(watch);
> + up(&dev->sem);
> +
> + inode = watch->inode;
> + down(&inode->inotify_sem);
> + down(&dev->sem);
> + remove_watch_no_event(watch, dev);
> + up(&dev->sem);
> + up(&inode->inotify_sem);
> + put_inotify_watch(watch);
> + }
> +
> + /* destroy all of the events on this device */
> + down(&dev->sem);
> + while (!list_empty(&dev->events))
> + inotify_dev_event_dequeue(dev);
> + up(&dev->sem);
> +
> + /* free this device: the put matching the get in inotify_init() */
> + put_inotify_dev(dev);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(inotify_free);
> +
> +/*
> + * inotify_add_watch - add a watch to this inotify instance.
> + */
> +s32 inotify_add_watch(struct inotify_device *dev, struct inode *inode,
> + u32 mask, void *callback_arg)
> +{
> + struct inotify_watch *watch, *old;
> + int ret;
> +
> + down(&inode->inotify_sem);
What happens if the inode is deleted when attempting to hold down the semaphore?
Perhaps a comment reminding the caller of this function they must hold the proper
inode locks around this function?.
> + down(&dev->sem);
> +
> + /* don't let consumers set invalid bits: we don't want flags set */
> + mask &= IN_ALL_EVENTS;
> + if (unlikely(!mask)) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + /*
> + * Handle the case of re-adding a watch on an (inode,dev) pair that we
> + * are already watching. We just update the mask and callback_arg and
> + * return its wd.
> + */
> + old = inode_find_dev(inode, dev);
> + if (unlikely(old)) {
> + old->mask = mask;
> + old->callback_arg = callback_arg;
You can leak memory here, right? Maybe there needs to be a old->callback_free
field that, when set, knows how to clean up old->callback_arg, before pointing it
else where. Not sure we can assume that the callback_arg is always going to be
statically allocated.
> + ret = old->wd;
> + goto out;
> + }
> +
> + watch = create_watch(dev, mask, inode, callback_arg);
> + if (unlikely(IS_ERR(watch))) {
> + ret = PTR_ERR(watch);
> + goto out;
> + }
> +
> + /* Add the watch to the device's and the inode's list */
> + list_add(&watch->d_list, &dev->watches);
> + list_add(&watch->i_list, &inode->inotify_watches);
> + ret = watch->wd;
> +out:
> + up(&dev->sem);
> + up(&inode->inotify_sem);
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(inotify_add_watch);
> +
> +/*
> + * inotify_ignore - remove a given wd from this inotify instance.
> + *
> + * Can sleep.
> + */
> +int inotify_ignore(struct inotify_device *dev, s32 wd)
> +{
> + struct inotify_watch *watch;
> + struct inode *inode;
> +
> + down(&dev->sem);
> + watch = idr_find(&dev->idr, wd);
> + if (unlikely(!watch)) {
> + up(&dev->sem);
> + return -EINVAL;
> + }
> + get_inotify_watch(watch);
> + inode = watch->inode;
> + up(&dev->sem);
> +
> + down(&inode->inotify_sem);
> + down(&dev->sem);
> +
> + /* make sure that we did not race */
> + watch = idr_find(&dev->idr, wd);
> + if (likely(watch))
> + remove_watch(watch, dev);
> +
> + up(&dev->sem);
> + up(&inode->inotify_sem);
> + put_inotify_watch(watch);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(inotify_ignore);
> +
> +/* Kernel producer API */
>
> /**
> * inotify_inode_queue_event - queue an event to all watches on this inode
Since the purpose of this patch is to add a kernel API to Inotify, I think it should
be as clean as possible. With this in mind, I think that since we _might_ be calling
inotify_callback_event that the calling function not be inotify_inode_queue_event;
it's not all that intuitive. Instead, I think this function should be renamed to
something like, inotify_inode_handle_event...
> @@ -508,7 +704,12 @@
> struct inotify_device *dev = watch->dev;
> get_inotify_watch(watch);
> down(&dev->sem);
> - inotify_dev_queue_event(dev, watch, mask, cookie, name);
> + if (dev->callback)
> + inotify_callback_event(dev, watch, mask,
> + cookie, name);
> + else
> + inotify_dev_queue_event(dev, watch, mask,
> + cookie, name);
> if (watch_mask & IN_ONESHOT)
> remove_watch_no_event(watch, dev);
> up(&dev->sem);
> @@ -622,7 +823,12 @@
> list_for_each_entry_safe(watch, next_w, watches, i_list) {
> struct inotify_device *dev = watch->dev;
> down(&dev->sem);
> - inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
> + if (dev->callback)
> + inotify_callback_event(dev, watch, IN_UNMOUNT,
> + 0, NULL);
> + else
> + inotify_dev_queue_event(dev, watch, IN_UNMOUNT,
> + 0, NULL);
> remove_watch(watch, dev);
> up(&dev->sem);
> }
> @@ -748,83 +954,7 @@
>
> static int inotify_release(struct inode *ignored, struct file *file)
> {
> - struct inotify_device *dev = file->private_data;
> -
> - /*
> - * Destroy all of the watches on this device. Unfortunately, not very
> - * pretty. We cannot do a simple iteration over the list, because we
> - * do not know the inode until we iterate to the watch. But we need to
> - * hold inode->inotify_sem before dev->sem. The following works.
> - */
> - while (1) {
> - struct inotify_watch *watch;
> - struct list_head *watches;
> - struct inode *inode;
> -
> - down(&dev->sem);
> - watches = &dev->watches;
> - if (list_empty(watches)) {
> - up(&dev->sem);
> - break;
> - }
> - watch = list_entry(watches->next, struct inotify_watch, d_list);
> - get_inotify_watch(watch);
> - up(&dev->sem);
> -
> - inode = watch->inode;
> - down(&inode->inotify_sem);
> - down(&dev->sem);
> - remove_watch_no_event(watch, dev);
> - up(&dev->sem);
> - up(&inode->inotify_sem);
> - put_inotify_watch(watch);
> - }
> -
> - /* destroy all of the events on this device */
> - down(&dev->sem);
> - while (!list_empty(&dev->events))
> - inotify_dev_event_dequeue(dev);
> - up(&dev->sem);
> -
> - /* free this device: the put matching the get in inotify_init() */
> - put_inotify_dev(dev);
> -
> - return 0;
> -}
> -
> -/*
> - * inotify_ignore - remove a given wd from this inotify instance.
> - *
> - * Can sleep.
> - */
> -static int inotify_ignore(struct inotify_device *dev, s32 wd)
> -{
> - struct inotify_watch *watch;
> - struct inode *inode;
> -
> - down(&dev->sem);
> - watch = idr_find(&dev->idr, wd);
> - if (unlikely(!watch)) {
> - up(&dev->sem);
> - return -EINVAL;
> - }
> - get_inotify_watch(watch);
> - inode = watch->inode;
> - up(&dev->sem);
> -
> - down(&inode->inotify_sem);
> - down(&dev->sem);
> -
> - /* make sure that we did not race */
> - watch = idr_find(&dev->idr, wd);
> - if (likely(watch))
> - remove_watch(watch, dev);
> -
> - up(&dev->sem);
> - up(&inode->inotify_sem);
> - put_inotify_watch(watch);
> -
> - return 0;
> + return inotify_free(file->private_data);
> }
>
> static long inotify_ioctl(struct file *file, unsigned int cmd,
> @@ -878,11 +1008,14 @@
> goto out_free_uid;
> }
>
> - dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
> + dev = inotify_init(NULL);
> if (unlikely(!dev)) {
> ret = -ENOMEM;
> goto out_free_uid;
> }
> +
> + dev->user = user;
> + atomic_inc(&user->inotify_devs);
>
> filp->f_op = &inotify_fops;
> filp->f_vfsmnt = mntget(inotify_mnt);
> @@ -892,20 +1025,6 @@
> filp->f_flags = O_RDONLY;
> filp->private_data = dev;
>
> - idr_init(&dev->idr);
> - INIT_LIST_HEAD(&dev->events);
> - INIT_LIST_HEAD(&dev->watches);
> - init_waitqueue_head(&dev->wq);
> - sema_init(&dev->sem, 1);
> - dev->event_count = 0;
> - dev->queue_size = 0;
> - dev->max_events = inotify_max_queued_events;
> - dev->user = user;
> - dev->last_wd = 0;
> - atomic_set(&dev->count, 0);
> -
> - get_inotify_dev(dev);
> - atomic_inc(&user->inotify_devs);
> fd_install(fd, filp);
>
> return fd;
> @@ -919,7 +1038,6 @@
>
> asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
> {
> - struct inotify_watch *watch, *old;
> struct inode *inode;
> struct inotify_device *dev;
> struct nameidata nd;
> @@ -938,46 +1056,14 @@
>
> ret = find_inode(path, &nd);
> if (unlikely(ret))
> - goto fput_and_out;
> + goto out;
>
> /* inode held in place by reference to nd; dev by fget on fd */
> inode = nd.dentry->d_inode;
> dev = filp->private_data;
>
> - down(&inode->inotify_sem);
> - down(&dev->sem);
> -
> - /* don't let user-space set invalid bits: we don't want flags set */
> - mask &= IN_ALL_EVENTS;
> - if (unlikely(!mask)) {
> - ret = -EINVAL;
> - goto out;
> - }
> -
> - /*
> - * Handle the case of re-adding a watch on an (inode,dev) pair that we
> - * are already watching. We just update the mask and return its wd.
> - */
> - old = inode_find_dev(inode, dev);
> - if (unlikely(old)) {
> - old->mask = mask;
> - ret = old->wd;
> - goto out;
> - }
> -
> - watch = create_watch(dev, mask, inode);
> - if (unlikely(IS_ERR(watch))) {
> - ret = PTR_ERR(watch);
> - goto out;
> - }
> -
> - /* Add the watch to the device's and the inode's list */
> - list_add(&watch->d_list, &dev->watches);
> - list_add(&watch->i_list, &inode->inotify_watches);
> - ret = watch->wd;
> + ret = inotify_add_watch(dev, inode, mask, NULL);
> out:
> - up(&dev->sem);
> - up(&inode->inotify_sem);
> path_release(&nd);
> fput_and_out:
> fput_light(filp, fput_needed);
> diff -r 8ecff93e704a -r 58e1301e9661 include/linux/inotify.h
> --- a/include/linux/inotify.h Thu Aug 18 19:53:59 2005
> +++ b/include/linux/inotify.h Thu Aug 18 23:19:52 2005
> @@ -14,6 +14,9 @@
> *
> * When you are watching a directory, you will receive the filename for events
> * such as IN_CREATE, IN_DELETE, IN_OPEN, IN_CLOSE, ..., relative to the wd.
> + *
> + * When using inotify from the kernel, len will always be zero. Instead you
> + * should check the path for non-NULL in your callback.
> */
> struct inotify_event {
> __s32 wd; /* watch descriptor */
> @@ -68,6 +71,17 @@
>
> #ifdef CONFIG_INOTIFY
>
> +/* Kernel consumer API */
> +
> +extern struct inotify_device *inotify_init(void (*)(struct inotify_event *,
> + const char *, void *));
> +extern int inotify_free(struct inotify_device *);
> +extern __s32 inotify_add_watch(struct inotify_device *, struct inode *, __u32,
> + void *);
> +extern int inotify_ignore(struct inotify_device *, __s32);
> +
> +/* Kernel producer API */
> +
> extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
> const char *);
> extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
> @@ -77,6 +91,8 @@
> extern u32 inotify_get_cookie(void);
>
> #else
> +
> +/* Kernel producer API stubs */
>
> static inline void inotify_inode_queue_event(struct inode *inode,
> __u32 mask, __u32 cookie,
More information about the Linux-audit
mailing list