[RFC][PATCH] Inotify kernel API

Timothy R. Chavez tinytim at us.ibm.com
Tue Sep 6 22:29:32 UTC 2005


Some comments below...

On Tuesday 23 August 2005 15:37, Amy Griffis wrote:
> diff -r 8ecff93e704a -r 58e1301e9661 fs/inotify.c
> --- a/fs/inotify.c      Thu Aug 18 19:53:59 2005
> +++ b/fs/inotify.c      Thu Aug 18 23:19:52 2005
> @@ -83,14 +83,18 @@
>         wait_queue_head_t       wq;             /* wait queue for i/o */
>         struct idr              idr;            /* idr mapping wd -> watch */
>         struct semaphore        sem;            /* protects this bad boy */
> -       struct list_head        events;         /* list of queued events */
>         struct list_head        watches;        /* list of watches */
>         atomic_t                count;          /* reference count */
> +       u32                     last_wd;        /* the last wd allocated */
> +       /* userland consumer API */
> +       struct list_head        events;         /* list of queued events */
>         struct user_struct      *user;          /* user who opened this dev */
>         unsigned int            queue_size;     /* size of the queue (bytes) */
>         unsigned int            event_count;    /* number of pending events */
>         unsigned int            max_events;     /* maximum number of events */
> -       u32                     last_wd;        /* the last wd allocated */
> +       /* kernel consumer API */
> +       void (*callback)(struct inotify_event *, const char *,
> +                        void *);               /* event callback */
>  };
>  
>  /*
> @@ -122,6 +126,7 @@
>         struct inode            *inode; /* associated inode */
>         s32                     wd;     /* watch descriptor */
>         u32                     mask;   /* event mask for this watch */
> +       void            *callback_arg;  /* callback argument - kernel API */
>  };
>  
>  #ifdef CONFIG_SYSCTL
> @@ -173,8 +178,10 @@
>  static inline void put_inotify_dev(struct inotify_device *dev)
>  {
>         if (atomic_dec_and_test(&dev->count)) {
> -               atomic_dec(&dev->user->inotify_devs);
> -               free_uid(dev->user);
> +               if (dev->user) {
> +                       atomic_dec(&dev->user->inotify_devs);
> +                       free_uid(dev->user);
> +               }
>                 kfree(dev);
>         }
>  }
> @@ -341,6 +348,23 @@
>  }
>  
>  /*
> + * inotify_callback_event - notify kernel consumers of events
> + */
> +static void inotify_callback_event(struct inotify_device *dev, 
> +                                 struct inotify_watch *watch, 
> +                                 u32 mask, u32 cookie, const char *name)
> +{
> +       struct inotify_event event;
> +
> +       event.wd = watch->wd;
> +       event.mask = mask;
> +       event.cookie = cookie;
> +       event.len = 0; /* kernel consumers don't need length */
> +
> +       dev->callback(&event, name, watch->callback_arg);
> +}
> +
> +/*
>   * inotify_dev_get_wd - returns the next WD for use by the given dev
>   *
>   * Callers must hold dev->sem.  This function can sleep.
> @@ -383,12 +407,13 @@
>   * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
>   */
>  static struct inotify_watch *create_watch(struct inotify_device *dev,
> -                                         u32 mask, struct inode *inode)
> +                                         u32 mask, struct inode *inode,
> +                                         void *callback_arg)
>  {
>         struct inotify_watch *watch;
>         int ret;
>  
> -       if (atomic_read(&dev->user->inotify_watches) >=
> +       if (dev->user && atomic_read(&dev->user->inotify_watches) >=
>                         inotify_max_user_watches)
>                 return ERR_PTR(-ENOSPC);
>  
> @@ -404,6 +429,7 @@
>  
>         dev->last_wd = watch->wd;
>         watch->mask = mask;
> +       watch->callback_arg = callback_arg;
>         atomic_set(&watch->count, 0);
>         INIT_LIST_HEAD(&watch->d_list);
>         INIT_LIST_HEAD(&watch->i_list);
> @@ -421,7 +447,8 @@
>         /* bump our own count, corresponding to our entry in dev->watches */
>         get_inotify_watch(watch);
>  
> -       atomic_inc(&dev->user->inotify_watches);
> +       if (dev->user)
> +               atomic_inc(&dev->user->inotify_watches);
>  
>         return watch;
>  }
> @@ -453,7 +480,8 @@
>         list_del(&watch->i_list);
>         list_del(&watch->d_list);
>  
> -       atomic_dec(&dev->user->inotify_watches);
> +       if (dev->user)
> +               atomic_dec(&dev->user->inotify_watches);
>         idr_remove(&dev->idr, watch->wd);
>         put_inotify_watch(watch);
>  }
> @@ -471,7 +499,10 @@
>   */
>  static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
>  {
> -       inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
> +       if (dev->callback)
> +               inotify_callback_event(dev, watch, IN_IGNORED, 0, NULL);
> +       else
> +               inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
>         remove_watch_no_event(watch, dev);
>  }
>  
> @@ -484,7 +515,172 @@
>         return !list_empty(&inode->inotify_watches);
>  }
>  
> -/* Kernel API */
> +/* Kernel consumer API */
> +
> +/*
> + * inotify_init - allocates and initializes an inotify device.
> + */
> +struct inotify_device * 
> +inotify_init(void (*callback)(struct inotify_event *, const char *, void *))
> +{
> +       struct inotify_device *dev;
> +
> +       dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
> +       if (unlikely(!dev))
> +               return NULL;
> +
> +       idr_init(&dev->idr);
> +       INIT_LIST_HEAD(&dev->events);
> +       INIT_LIST_HEAD(&dev->watches);
> +       init_waitqueue_head(&dev->wq);
> +       sema_init(&dev->sem, 1);
> +       dev->event_count = 0;
> +       dev->queue_size = 0;
> +       dev->max_events = inotify_max_queued_events;
> +       dev->user = NULL;       /* set in sys_inotify_init */
> +       dev->last_wd = 0;
> +       dev->callback = callback;
> +       atomic_set(&dev->count, 0);
> +       get_inotify_dev(dev);
> +
> +       return dev;
> +}
> +EXPORT_SYMBOL_GPL(inotify_init);
> +
> +/*
> + * inotify_free - clean up and free an inotify device.
> + */
> +int inotify_free(struct inotify_device *dev)
> +{
> +       /*
> +        * Destroy all of the watches on this device.  Unfortunately, not very
> +        * pretty.  We cannot do a simple iteration over the list, because we
> +        * do not know the inode until we iterate to the watch.  But we need to
> +        * hold inode->inotify_sem before dev->sem.  The following works.
> +        */
> +       while (1) {
> +               struct inotify_watch *watch;
> +               struct list_head *watches;
> +               struct inode *inode;
> +
> +               down(&dev->sem);
> +               watches = &dev->watches;
> +               if (list_empty(watches)) {
> +                       up(&dev->sem);
> +                       break;
> +               }
> +               watch = list_entry(watches->next, struct inotify_watch, d_list);
> +               get_inotify_watch(watch);
> +               up(&dev->sem);
> +
> +               inode = watch->inode;
> +               down(&inode->inotify_sem);
> +               down(&dev->sem);
> +               remove_watch_no_event(watch, dev);
> +               up(&dev->sem);
> +               up(&inode->inotify_sem);
> +               put_inotify_watch(watch);
> +       }
> +
> +       /* destroy all of the events on this device */
> +       down(&dev->sem);
> +       while (!list_empty(&dev->events))
> +               inotify_dev_event_dequeue(dev);
> +       up(&dev->sem);
> +
> +       /* free this device: the put matching the get in inotify_init() */
> +       put_inotify_dev(dev);
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(inotify_free);
> +
> +/*
> + * inotify_add_watch - add a watch to this inotify instance.
> + */
> +s32 inotify_add_watch(struct inotify_device *dev, struct inode *inode, 
> +                     u32 mask, void *callback_arg)
> +{
> +       struct inotify_watch *watch, *old;
> +       int ret;
> +
> +       down(&inode->inotify_sem);

What happens if the inode is deleted when attempting to hold down the semaphore?
Perhaps a comment reminding the caller of this function they must hold the proper
inode locks around this function?.

> +       down(&dev->sem);
> +
> +       /* don't let consumers set invalid bits: we don't want flags set */
> +       mask &= IN_ALL_EVENTS;
> +       if (unlikely(!mask)) {
> +               ret = -EINVAL;
> +               goto out;
> +       }
> +
> +       /*
> +        * Handle the case of re-adding a watch on an (inode,dev) pair that we
> +        * are already watching.  We just update the mask and callback_arg and
> +        * return its wd.
> +        */
> +       old = inode_find_dev(inode, dev);
> +       if (unlikely(old)) {
> +               old->mask = mask;
> +               old->callback_arg = callback_arg;

You can leak memory here, right?  Maybe there needs to be a old->callback_free
field that, when set, knows how to clean up old->callback_arg, before pointing it
else where.  Not sure we can assume that the callback_arg is always going to be
statically allocated.

> +               ret = old->wd;
> +               goto out;
> +       }
> +
> +       watch = create_watch(dev, mask, inode, callback_arg);
> +       if (unlikely(IS_ERR(watch))) {
> +               ret = PTR_ERR(watch);
> +               goto out;
> +       }
> +
> +       /* Add the watch to the device's and the inode's list */
> +       list_add(&watch->d_list, &dev->watches);
> +       list_add(&watch->i_list, &inode->inotify_watches);
> +       ret = watch->wd;
> +out:
> +       up(&dev->sem);
> +       up(&inode->inotify_sem);
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(inotify_add_watch);
> +
> +/*
> + * inotify_ignore - remove a given wd from this inotify instance.
> + *
> + * Can sleep.
> + */
> +int inotify_ignore(struct inotify_device *dev, s32 wd)
> +{
> +       struct inotify_watch *watch;
> +       struct inode *inode;
> +
> +       down(&dev->sem);
> +       watch = idr_find(&dev->idr, wd);
> +       if (unlikely(!watch)) {
> +               up(&dev->sem);
> +               return -EINVAL;
> +       }
> +       get_inotify_watch(watch);
> +       inode = watch->inode;
> +       up(&dev->sem);
> +
> +       down(&inode->inotify_sem);
> +       down(&dev->sem);
> +
> +       /* make sure that we did not race */
> +       watch = idr_find(&dev->idr, wd);
> +       if (likely(watch))
> +               remove_watch(watch, dev);
> +
> +       up(&dev->sem);
> +       up(&inode->inotify_sem);
> +       put_inotify_watch(watch);
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(inotify_ignore);
> +
> +/* Kernel producer API */
>  
>  /**
>   * inotify_inode_queue_event - queue an event to all watches on this inode

Since the purpose of this patch is to add a kernel API to Inotify, I think it should
be as clean as possible.   With this in mind, I think that since we _might_ be calling
inotify_callback_event that the calling function not be inotify_inode_queue_event;
it's not all that intuitive.  Instead, I think this function should be renamed to 
something like, inotify_inode_handle_event...

> @@ -508,7 +704,12 @@
>                         struct inotify_device *dev = watch->dev;
>                         get_inotify_watch(watch);
>                         down(&dev->sem);
> -                       inotify_dev_queue_event(dev, watch, mask, cookie, name);
> +                       if (dev->callback)
> +                               inotify_callback_event(dev, watch, mask, 
> +                                                      cookie, name);
> +                       else
> +                               inotify_dev_queue_event(dev, watch, mask,
> +                                                       cookie, name);
>                         if (watch_mask & IN_ONESHOT)
>                                 remove_watch_no_event(watch, dev);
>                         up(&dev->sem);
> @@ -622,7 +823,12 @@
>                 list_for_each_entry_safe(watch, next_w, watches, i_list) {
>                         struct inotify_device *dev = watch->dev;
>                         down(&dev->sem);
> -                       inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
> +                       if (dev->callback)
> +                               inotify_callback_event(dev, watch, IN_UNMOUNT, 
> +                                                      0, NULL);
> +                       else
> +                               inotify_dev_queue_event(dev, watch, IN_UNMOUNT,
> +                                                       0, NULL);
>                         remove_watch(watch, dev);
>                         up(&dev->sem);
>                 }
> @@ -748,83 +954,7 @@
>  
>  static int inotify_release(struct inode *ignored, struct file *file)
>  {
> -       struct inotify_device *dev = file->private_data;
> -
> -       /*
> -        * Destroy all of the watches on this device.  Unfortunately, not very
> -        * pretty.  We cannot do a simple iteration over the list, because we
> -        * do not know the inode until we iterate to the watch.  But we need to
> -        * hold inode->inotify_sem before dev->sem.  The following works.
> -        */
> -       while (1) {
> -               struct inotify_watch *watch;
> -               struct list_head *watches;
> -               struct inode *inode;
> -
> -               down(&dev->sem);
> -               watches = &dev->watches;
> -               if (list_empty(watches)) {
> -                       up(&dev->sem);
> -                       break;
> -               }
> -               watch = list_entry(watches->next, struct inotify_watch, d_list);
> -               get_inotify_watch(watch);
> -               up(&dev->sem);
> -
> -               inode = watch->inode;
> -               down(&inode->inotify_sem);
> -               down(&dev->sem);
> -               remove_watch_no_event(watch, dev);
> -               up(&dev->sem);
> -               up(&inode->inotify_sem);
> -               put_inotify_watch(watch);
> -       }
> -
> -       /* destroy all of the events on this device */
> -       down(&dev->sem);
> -       while (!list_empty(&dev->events))
> -               inotify_dev_event_dequeue(dev);
> -       up(&dev->sem);
> -
> -       /* free this device: the put matching the get in inotify_init() */
> -       put_inotify_dev(dev);
> -
> -       return 0;
> -}
> -
> -/*
> - * inotify_ignore - remove a given wd from this inotify instance.
> - *
> - * Can sleep.
> - */
> -static int inotify_ignore(struct inotify_device *dev, s32 wd)
> -{
> -       struct inotify_watch *watch;
> -       struct inode *inode;
> -
> -       down(&dev->sem);
> -       watch = idr_find(&dev->idr, wd);
> -       if (unlikely(!watch)) {
> -               up(&dev->sem);
> -               return -EINVAL;
> -       }
> -       get_inotify_watch(watch);
> -       inode = watch->inode;
> -       up(&dev->sem);
> -
> -       down(&inode->inotify_sem);
> -       down(&dev->sem);
> -
> -       /* make sure that we did not race */
> -       watch = idr_find(&dev->idr, wd);
> -       if (likely(watch))
> -               remove_watch(watch, dev);
> -
> -       up(&dev->sem);
> -       up(&inode->inotify_sem);
> -       put_inotify_watch(watch);
> -
> -       return 0;
> +       return inotify_free(file->private_data);
>  }
>  
>  static long inotify_ioctl(struct file *file, unsigned int cmd,
> @@ -878,11 +1008,14 @@
>                 goto out_free_uid;
>         }
>  
> -       dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
> +       dev = inotify_init(NULL);
>         if (unlikely(!dev)) {
>                 ret = -ENOMEM;
>                 goto out_free_uid;
>         }
> +
> +       dev->user = user;
> +       atomic_inc(&user->inotify_devs);
>  
>         filp->f_op = &inotify_fops;
>         filp->f_vfsmnt = mntget(inotify_mnt);
> @@ -892,20 +1025,6 @@
>         filp->f_flags = O_RDONLY;
>         filp->private_data = dev;
>  
> -       idr_init(&dev->idr);
> -       INIT_LIST_HEAD(&dev->events);
> -       INIT_LIST_HEAD(&dev->watches);
> -       init_waitqueue_head(&dev->wq);
> -       sema_init(&dev->sem, 1);
> -       dev->event_count = 0;
> -       dev->queue_size = 0;
> -       dev->max_events = inotify_max_queued_events;
> -       dev->user = user;
> -       dev->last_wd = 0;
> -       atomic_set(&dev->count, 0);
> -
> -       get_inotify_dev(dev);
> -       atomic_inc(&user->inotify_devs);
>         fd_install(fd, filp);
>  
>         return fd;
> @@ -919,7 +1038,6 @@
>  
>  asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
>  {
> -       struct inotify_watch *watch, *old;
>         struct inode *inode;
>         struct inotify_device *dev;
>         struct nameidata nd;
> @@ -938,46 +1056,14 @@
>  
>         ret = find_inode(path, &nd);
>         if (unlikely(ret))
> -               goto fput_and_out;
> +               goto out;
>  
>         /* inode held in place by reference to nd; dev by fget on fd */
>         inode = nd.dentry->d_inode;
>         dev = filp->private_data;
>  
> -       down(&inode->inotify_sem);
> -       down(&dev->sem);
> -
> -       /* don't let user-space set invalid bits: we don't want flags set */
> -       mask &= IN_ALL_EVENTS;
> -       if (unlikely(!mask)) {
> -               ret = -EINVAL;
> -               goto out;
> -       }
> -
> -       /*
> -        * Handle the case of re-adding a watch on an (inode,dev) pair that we
> -        * are already watching.  We just update the mask and return its wd.
> -        */
> -       old = inode_find_dev(inode, dev);
> -       if (unlikely(old)) {
> -               old->mask = mask;
> -               ret = old->wd;
> -               goto out;
> -       }
> -
> -       watch = create_watch(dev, mask, inode);
> -       if (unlikely(IS_ERR(watch))) {
> -               ret = PTR_ERR(watch);
> -               goto out;
> -       }
> -
> -       /* Add the watch to the device's and the inode's list */
> -       list_add(&watch->d_list, &dev->watches);
> -       list_add(&watch->i_list, &inode->inotify_watches);
> -       ret = watch->wd;
> +       ret = inotify_add_watch(dev, inode, mask, NULL);
>  out:
> -       up(&dev->sem);
> -       up(&inode->inotify_sem);
>         path_release(&nd);
>  fput_and_out:
>         fput_light(filp, fput_needed);
> diff -r 8ecff93e704a -r 58e1301e9661 include/linux/inotify.h
> --- a/include/linux/inotify.h   Thu Aug 18 19:53:59 2005
> +++ b/include/linux/inotify.h   Thu Aug 18 23:19:52 2005
> @@ -14,6 +14,9 @@
>   *
>   * When you are watching a directory, you will receive the filename for events
>   * such as IN_CREATE, IN_DELETE, IN_OPEN, IN_CLOSE, ..., relative to the wd.
> + *
> + * When using inotify from the kernel, len will always be zero.  Instead you
> + * should check the path for non-NULL in your callback.
>   */
>  struct inotify_event {
>         __s32           wd;             /* watch descriptor */
> @@ -68,6 +71,17 @@
>  
>  #ifdef CONFIG_INOTIFY
>  
> +/* Kernel consumer API */
> +
> +extern struct inotify_device *inotify_init(void (*)(struct inotify_event *, 
> +                                                   const char *, void *));
> +extern int inotify_free(struct inotify_device *);
> +extern __s32 inotify_add_watch(struct inotify_device *, struct inode *, __u32,
> +                              void *);
> +extern int inotify_ignore(struct inotify_device *, __s32);
> +
> +/* Kernel producer API */
> +
>  extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
>                                       const char *);
>  extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
> @@ -77,6 +91,8 @@
>  extern u32 inotify_get_cookie(void);
>  
>  #else
> +
> +/* Kernel producer API stubs */
>  
>  static inline void inotify_inode_queue_event(struct inode *inode,
>                                              __u32 mask, __u32 cookie,




More information about the Linux-audit mailing list