[Cluster-devel] [GFS2 PATCH 2/2] GFS2: Use resizable hash table for glocks

Andreas Gruenbacher agruenba at redhat.com
Mon Aug 24 13:16:00 UTC 2015


Bob,

2015-07-09 20:25 GMT+02:00 Bob Peterson <rpeterso at redhat.com>:
> This patch changes the glock hash table from a normal hash table to
> a resizable hash table, which scales better. This also simplifies
> a lot of code.
> ---
>  fs/gfs2/glock.c  | 282 +++++++++++++++++++++----------------------------------
>  fs/gfs2/incore.h |   4 +-
>  2 files changed, 111 insertions(+), 175 deletions(-)
>
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index 25e0389..813de00 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -34,6 +34,7 @@
>  #include <linux/percpu.h>
>  #include <linux/list_sort.h>
>  #include <linux/lockref.h>
> +#include <linux/rhashtable.h>
>
>  #include "gfs2.h"
>  #include "incore.h"
> @@ -51,8 +52,12 @@
>
>  struct gfs2_glock_iter {
>         int hash;                       /* hash bucket index           */
> -       unsigned nhash;                 /* Index within current bucket */
>         struct gfs2_sbd *sdp;           /* incore superblock           */
> +};
> +
> +struct gfs2_glock_rht_iter {
> +       struct gfs2_sbd *sdp;           /* incore superblock           */
> +       struct rhashtable_iter hti;     /* rhashtable iterator         */
>         struct gfs2_glock *gl;          /* current glock struct        */
>         loff_t last_pos;                /* last position               */
>  };
> @@ -70,44 +75,20 @@ static DEFINE_SPINLOCK(lru_lock);
>
>  #define GFS2_GL_HASH_SHIFT      15
>  #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
> -#define GFS2_GL_HASH_MASK       (GFS2_GL_HASH_SIZE - 1)
> -
> -static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
> -static struct dentry *gfs2_root;
> -
> -/**
> - * gl_hash() - Turn glock number into hash bucket number
> - * @lock: The glock number
> - *
> - * Returns: The number of the corresponding hash bucket
> - */
> -
> -static unsigned int gl_hash(const struct gfs2_sbd *sdp,
> -                           const struct lm_lockname *name)
> -{
> -       unsigned int h;
> -
> -       h = jhash(&name->ln_number, sizeof(u64), 0);
> -       h = jhash(&name->ln_type, sizeof(unsigned int), h);
> -       h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
> -       h &= GFS2_GL_HASH_MASK;
> -
> -       return h;
> -}
>
> -static inline void spin_lock_bucket(unsigned int hash)
> -{
> -       hlist_bl_lock(&gl_hash_table[hash]);
> -}
> +struct rhashtable_params ht_parms = {
> +       .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
> +       .key_len = sizeof(struct lm_lockname),
> +       .key_offset = offsetof(struct gfs2_glock, gl_name),
> +       .head_offset = offsetof(struct gfs2_glock, gl_node),
> +       .hashfn = jhash,
> +};
>
> -static inline void spin_unlock_bucket(unsigned int hash)
> -{
> -       hlist_bl_unlock(&gl_hash_table[hash]);
> -}
> +static struct rhashtable gl_hash_table;
>
> -static void gfs2_glock_dealloc(struct rcu_head *rcu)
> +void gfs2_glock_free(struct gfs2_glock *gl)
>  {
> -       struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
> +       struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
>
>         if (gl->gl_ops->go_flags & GLOF_ASPACE) {
>                 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
> @@ -115,13 +96,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
>                 kfree(gl->gl_lksb.sb_lvbptr);
>                 kmem_cache_free(gfs2_glock_cachep, gl);
>         }
> -}
> -
> -void gfs2_glock_free(struct gfs2_glock *gl)
> -{
> -       struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> -
> -       call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
>         if (atomic_dec_and_test(&sdp->sd_glock_disposal))
>                 wake_up(&sdp->sd_glock_wait);
>  }
> @@ -202,9 +176,8 @@ void gfs2_glock_put(struct gfs2_glock *gl)
>
>         gfs2_glock_remove_from_lru(gl);
>         spin_unlock(&gl->gl_lockref.lock);
> -       spin_lock_bucket(gl->gl_hash);
> -       hlist_bl_del_rcu(&gl->gl_list);
> -       spin_unlock_bucket(gl->gl_hash);
> +       if (gl->gl_node.next != NULL)
> +               rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
>         GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
>         GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
>         trace_gfs2_glock_put(gl);
> @@ -212,30 +185,6 @@ void gfs2_glock_put(struct gfs2_glock *gl)
>  }
>
>  /**
> - * search_bucket() - Find struct gfs2_glock by lock number
> - * @bucket: the bucket to search
> - * @name: The lock name
> - *
> - * Returns: NULL, or the struct gfs2_glock with the requested number
> - */
> -
> -static struct gfs2_glock *search_bucket(unsigned int hash,
> -                                       const struct lm_lockname *name)
> -{
> -       struct gfs2_glock *gl;
> -       struct hlist_bl_node *h;
> -
> -       hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
> -               if (!lm_name_equal(&gl->gl_name, name))
> -                       continue;
> -               if (lockref_get_not_dead(&gl->gl_lockref))
> -                       return gl;
> -       }
> -
> -       return NULL;
> -}
> -
> -/**
>   * may_grant - check if its ok to grant a new lock
>   * @gl: The glock
>   * @gh: The lock request which we wish to grant
> @@ -704,14 +653,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>         struct lm_lockname name = { .ln_number = number,
>                                     .ln_type = glops->go_type,
>                                     .ln_sbd = sdp };
> -       struct gfs2_glock *gl, *tmp;
> -       unsigned int hash = gl_hash(sdp, &name);
> +       struct gfs2_glock *gl, *tmp = NULL;
>         struct address_space *mapping;
>         struct kmem_cache *cachep;
> +       int ret, tries = 0;
>
> -       rcu_read_lock();
> -       gl = search_bucket(hash, &name);
> -       rcu_read_unlock();
> +       gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
> +       if (gl && !lockref_get_not_dead(&gl->gl_lockref))
> +               gl = NULL;
>
>         *glp = gl;
>         if (gl)
> @@ -738,13 +687,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>         }
>
>         atomic_inc(&sdp->sd_glock_disposal);
> +       gl->gl_node.next = NULL;
>         gl->gl_flags = 0;
>         gl->gl_name = name;
>         gl->gl_lockref.count = 1;
>         gl->gl_state = LM_ST_UNLOCKED;
>         gl->gl_target = LM_ST_UNLOCKED;
>         gl->gl_demote_state = LM_ST_EXCLUSIVE;
> -       gl->gl_hash = hash;
>         gl->gl_ops = glops;
>         gl->gl_dstamp = ktime_set(0, 0);
>         preempt_disable();
> @@ -769,22 +718,34 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>                 mapping->writeback_index = 0;
>         }
>
> -       spin_lock_bucket(hash);
> -       tmp = search_bucket(hash, &name);
> -       if (tmp) {
> -               spin_unlock_bucket(hash);
> -               kfree(gl->gl_lksb.sb_lvbptr);
> -               kmem_cache_free(cachep, gl);
> -               atomic_dec(&sdp->sd_glock_disposal);
> -               gl = tmp;
> -       } else {
> -               hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
> -               spin_unlock_bucket(hash);
> +again:
> +       ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node,
> +                                           ht_parms);
> +       if (ret == 0) {
> +               *glp = gl;
> +               return 0;
>         }
>
> -       *glp = gl;
> +       if (ret == -EEXIST) {
> +               ret = 0;
> +               tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
> +               if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
> +                       if (++tries < 100) {
> +                               cond_resched();
> +                               goto again;
> +                       }
> +                       tmp = NULL;
> +                       ret = -ENOMEM;
> +               }
> +       } else {
> +               WARN_ON_ONCE(ret);
> +       }
> +       kfree(gl->gl_lksb.sb_lvbptr);
> +       kmem_cache_free(cachep, gl);
> +       atomic_dec(&sdp->sd_glock_disposal);
> +       *glp = tmp;
>
> -       return 0;
> +       return ret;
>  }
>
>  /**
> @@ -1460,31 +1421,24 @@ static struct shrinker glock_shrinker = {
>   *
>   */
>
> -static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
> -                         unsigned int hash)
> +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
>  {
>         struct gfs2_glock *gl;
> -       struct hlist_bl_head *head = &gl_hash_table[hash];
> -       struct hlist_bl_node *pos;
> +       struct rhash_head *pos, *next;
> +       const struct bucket_table *tbl;
> +       int i;
>
> -       rcu_read_lock();
> -       hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
> -               if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref))
> -                       examiner(gl);
> +       tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table);
> +       for (i = 0; i < tbl->size; i++) {
> +               rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) {
> +                       if ((gl->gl_name.ln_sbd == sdp) &&
> +                           lockref_get_not_dead(&gl->gl_lockref))
> +                               examiner(gl);
> +               }
>         }
> -       rcu_read_unlock();
>         cond_resched();
>  }
>
> -static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
> -{
> -       unsigned x;
> -
> -       for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
> -               examine_bucket(examiner, sdp, x);
> -}
> -
> -
>  /**
>   * thaw_glock - thaw out a glock which has an unprocessed reply waiting
>   * @gl: The glock to thaw
> @@ -1802,20 +1756,24 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
>
>  int __init gfs2_glock_init(void)
>  {
> -       unsigned i;
> -       for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
> -               INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
> -       }
> +       int ret;
> +
> +       ret = rhashtable_init(&gl_hash_table, &ht_parms);
> +       if (ret < 0)
> +               return ret;
>
>         glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
>                                           WQ_HIGHPRI | WQ_FREEZABLE, 0);
> -       if (!glock_workqueue)
> +       if (!glock_workqueue) {
> +               rhashtable_destroy(&gl_hash_table);
>                 return -ENOMEM;
> +       }
>         gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
>                                                 WQ_MEM_RECLAIM | WQ_FREEZABLE,
>                                                 0);
>         if (!gfs2_delete_workqueue) {
>                 destroy_workqueue(glock_workqueue);
> +               rhashtable_destroy(&gl_hash_table);
>                 return -ENOMEM;
>         }
>
> @@ -1827,72 +1785,36 @@ int __init gfs2_glock_init(void)
>  void gfs2_glock_exit(void)
>  {
>         unregister_shrinker(&glock_shrinker);
> +       rhashtable_destroy(&gl_hash_table);
>         destroy_workqueue(glock_workqueue);
>         destroy_workqueue(gfs2_delete_workqueue);
>  }
>
> -static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
> +static void gfs2_glock_iter_next(struct gfs2_glock_rht_iter *gi)
>  {
> -       return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
> -                             struct gfs2_glock, gl_list);
> -}
> -
> -static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
> -{
> -       return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
> -                             struct gfs2_glock, gl_list);
> -}
> -
> -static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
> -{
> -       struct gfs2_glock *gl;
> -
>         do {
> -               gl = gi->gl;
> -               if (gl) {
> -                       gi->gl = glock_hash_next(gl);
> -                       gi->nhash++;
> -               } else {
> -                       if (gi->hash >= GFS2_GL_HASH_SIZE) {
> -                               rcu_read_unlock();
> -                               return 1;
> -                       }
> -                       gi->gl = glock_hash_chain(gi->hash);
> -                       gi->nhash = 0;
> -               }
> -               while (gi->gl == NULL) {
> -                       gi->hash++;
> -                       if (gi->hash >= GFS2_GL_HASH_SIZE) {
> -                               rcu_read_unlock();
> -                               return 1;
> -                       }
> -                       gi->gl = glock_hash_chain(gi->hash);
> -                       gi->nhash = 0;
> -               }
> +               gi->gl = rhashtable_walk_next(&gi->hti);
>         /* Skip entries for other sb and dead entries */
> -       } while (gi->sdp != gi->gl->gl_name.ln_sbd ||
> -                __lockref_is_dead(&gi->gl->gl_lockref));
> -
> -       return 0;
> +       } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) ||
> +                             __lockref_is_dead(&gi->gl->gl_lockref)));
>  }
>
>  static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
>  {
> -       struct gfs2_glock_iter *gi = seq->private;
> +       struct gfs2_glock_rht_iter *gi = seq->private;
>         loff_t n = *pos;
> +       int ret;
>
>         if (gi->last_pos <= *pos)
> -               n = gi->nhash + (*pos - gi->last_pos);
> -       else
> -               gi->hash = 0;
> +               n = (*pos - gi->last_pos);
>
> -       gi->nhash = 0;
> -       rcu_read_lock();
> +       ret = rhashtable_walk_start(&gi->hti);
> +       if (ret)
> +               return NULL;
>
>         do {
> -               if (gfs2_glock_iter_next(gi))
> -                       return NULL;
> -       } while (n--);
> +               gfs2_glock_iter_next(gi);
> +       } while (gi->gl && n--);
>
>         gi->last_pos = *pos;
>         return gi->gl;
> @@ -1901,23 +1823,20 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
>  static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
>                                  loff_t *pos)
>  {
> -       struct gfs2_glock_iter *gi = seq->private;
> +       struct gfs2_glock_rht_iter *gi = seq->private;
>
>         (*pos)++;
>         gi->last_pos = *pos;
> -       if (gfs2_glock_iter_next(gi))
> -               return NULL;
> -
> +       gfs2_glock_iter_next(gi);
>         return gi->gl;
>  }
>
>  static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
>  {
> -       struct gfs2_glock_iter *gi = seq->private;
> +       struct gfs2_glock_rht_iter *gi = seq->private;
>
> -       if (gi->gl)
> -               rcu_read_unlock();
>         gi->gl = NULL;
> +       rhashtable_walk_stop(&gi->hti);
>  }
>
>  static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
> @@ -1981,29 +1900,46 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
>  static int gfs2_glocks_open(struct inode *inode, struct file *file)
>  {
>         int ret = seq_open_private(file, &gfs2_glock_seq_ops,
> -                                  sizeof(struct gfs2_glock_iter));
> +                                  sizeof(struct gfs2_glock_rht_iter));
>         if (ret == 0) {
>                 struct seq_file *seq = file->private_data;
> -               struct gfs2_glock_iter *gi = seq->private;
> +               struct gfs2_glock_rht_iter *gi = seq->private;
> +
>                 gi->sdp = inode->i_private;
> +               gi->last_pos = 0;
>                 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
>                 if (seq->buf)
>                         seq->size = GFS2_SEQ_GOODSIZE;
> +               gi->gl = NULL;
> +               ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
>         }
>         return ret;
>  }
>
> +static int gfs2_glocks_release(struct inode *inode, struct file *file)
> +{
> +       struct seq_file *seq = file->private_data;
> +       struct gfs2_glock_rht_iter *gi = seq->private;
> +
> +       gi->gl = NULL;
> +       rhashtable_walk_exit(&gi->hti);
> +       return seq_release_private(inode, file);
> +}
> +
>  static int gfs2_glstats_open(struct inode *inode, struct file *file)
>  {
>         int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
> -                                  sizeof(struct gfs2_glock_iter));
> +                                  sizeof(struct gfs2_glock_rht_iter));
>         if (ret == 0) {
>                 struct seq_file *seq = file->private_data;
> -               struct gfs2_glock_iter *gi = seq->private;
> +               struct gfs2_glock_rht_iter *gi = seq->private;
>                 gi->sdp = inode->i_private;
> +               gi->last_pos = 0;
>                 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
>                 if (seq->buf)
>                         seq->size = GFS2_SEQ_GOODSIZE;
> +               gi->gl = NULL;
> +               ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
>         }
>         return ret;
>  }
> @@ -2025,7 +1961,7 @@ static const struct file_operations gfs2_glocks_fops = {
>         .open    = gfs2_glocks_open,
>         .read    = seq_read,
>         .llseek  = seq_lseek,
> -       .release = seq_release_private,
> +       .release = gfs2_glocks_release,
>  };
>
>  static const struct file_operations gfs2_glstats_fops = {
> @@ -2033,7 +1969,7 @@ static const struct file_operations gfs2_glstats_fops = {
>         .open    = gfs2_glstats_open,
>         .read    = seq_read,
>         .llseek  = seq_lseek,
> -       .release = seq_release_private,
> +       .release = gfs2_glocks_release,
>  };
>
>  static const struct file_operations gfs2_sbstats_fops = {
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 4de7853..0d63715 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -22,6 +22,7 @@
>  #include <linux/ktime.h>
>  #include <linux/percpu.h>
>  #include <linux/lockref.h>
> +#include <linux/rhashtable.h>
>
>  #define DIO_WAIT       0x00000010
>  #define DIO_METADATA   0x00000020
> @@ -342,7 +343,6 @@ struct gfs2_glock {
>                      gl_req:2,          /* State in last dlm request */
>                      gl_reply:8;        /* Last reply from the dlm */
>
> -       unsigned int gl_hash;
>         unsigned long gl_demote_time; /* time of first demote request */
>         long gl_hold_time;
>         struct list_head gl_holders;
> @@ -368,7 +368,7 @@ struct gfs2_glock {
>                         loff_t end;
>                 } gl_vm;
>         };
> -       struct rcu_head gl_rcu;
> +       struct rhash_head gl_node;
>  };
>
>  #define GFS2_MIN_LVB_SIZE 32   /* Min size of LVB that gfs2 supports */
> --
> 2.1.0
>




More information about the Cluster-devel mailing list