[Cluster-devel] [GFS2 PATCH 2/2] GFS2: Use resizable hash table for glocks
Andreas Gruenbacher
agruenba at redhat.com
Mon Aug 24 13:16:00 UTC 2015
Bob,
2015-07-09 20:25 GMT+02:00 Bob Peterson <rpeterso at redhat.com>:
> This patch changes the glock hash table from a normal hash table to
> a resizable hash table, which scales better. This also simplifies
> a lot of code.
> ---
> fs/gfs2/glock.c | 282 +++++++++++++++++++++----------------------------------
> fs/gfs2/incore.h | 4 +-
> 2 files changed, 111 insertions(+), 175 deletions(-)
>
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index 25e0389..813de00 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -34,6 +34,7 @@
> #include <linux/percpu.h>
> #include <linux/list_sort.h>
> #include <linux/lockref.h>
> +#include <linux/rhashtable.h>
>
> #include "gfs2.h"
> #include "incore.h"
> @@ -51,8 +52,12 @@
>
> struct gfs2_glock_iter {
> int hash; /* hash bucket index */
> - unsigned nhash; /* Index within current bucket */
> struct gfs2_sbd *sdp; /* incore superblock */
> +};
> +
> +struct gfs2_glock_rht_iter {
> + struct gfs2_sbd *sdp; /* incore superblock */
> + struct rhashtable_iter hti; /* rhashtable iterator */
> struct gfs2_glock *gl; /* current glock struct */
> loff_t last_pos; /* last position */
> };
> @@ -70,44 +75,20 @@ static DEFINE_SPINLOCK(lru_lock);
>
> #define GFS2_GL_HASH_SHIFT 15
> #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
> -#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
> -
> -static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
> -static struct dentry *gfs2_root;
> -
> -/**
> - * gl_hash() - Turn glock number into hash bucket number
> - * @lock: The glock number
> - *
> - * Returns: The number of the corresponding hash bucket
> - */
> -
> -static unsigned int gl_hash(const struct gfs2_sbd *sdp,
> - const struct lm_lockname *name)
> -{
> - unsigned int h;
> -
> - h = jhash(&name->ln_number, sizeof(u64), 0);
> - h = jhash(&name->ln_type, sizeof(unsigned int), h);
> - h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
> - h &= GFS2_GL_HASH_MASK;
> -
> - return h;
> -}
>
> -static inline void spin_lock_bucket(unsigned int hash)
> -{
> - hlist_bl_lock(&gl_hash_table[hash]);
> -}
> +struct rhashtable_params ht_parms = {
> + .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
> + .key_len = sizeof(struct lm_lockname),
> + .key_offset = offsetof(struct gfs2_glock, gl_name),
> + .head_offset = offsetof(struct gfs2_glock, gl_node),
> + .hashfn = jhash,
> +};
>
> -static inline void spin_unlock_bucket(unsigned int hash)
> -{
> - hlist_bl_unlock(&gl_hash_table[hash]);
> -}
> +static struct rhashtable gl_hash_table;
>
> -static void gfs2_glock_dealloc(struct rcu_head *rcu)
> +void gfs2_glock_free(struct gfs2_glock *gl)
> {
> - struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
> + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
>
> if (gl->gl_ops->go_flags & GLOF_ASPACE) {
> kmem_cache_free(gfs2_glock_aspace_cachep, gl);
> @@ -115,13 +96,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
> kfree(gl->gl_lksb.sb_lvbptr);
> kmem_cache_free(gfs2_glock_cachep, gl);
> }
> -}
> -
> -void gfs2_glock_free(struct gfs2_glock *gl)
> -{
> - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> -
> - call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
> if (atomic_dec_and_test(&sdp->sd_glock_disposal))
> wake_up(&sdp->sd_glock_wait);
> }
> @@ -202,9 +176,8 @@ void gfs2_glock_put(struct gfs2_glock *gl)
>
> gfs2_glock_remove_from_lru(gl);
> spin_unlock(&gl->gl_lockref.lock);
> - spin_lock_bucket(gl->gl_hash);
> - hlist_bl_del_rcu(&gl->gl_list);
> - spin_unlock_bucket(gl->gl_hash);
> + if (gl->gl_node.next != NULL)
> + rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
> GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
> GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
> trace_gfs2_glock_put(gl);
> @@ -212,30 +185,6 @@ void gfs2_glock_put(struct gfs2_glock *gl)
> }
>
> /**
> - * search_bucket() - Find struct gfs2_glock by lock number
> - * @bucket: the bucket to search
> - * @name: The lock name
> - *
> - * Returns: NULL, or the struct gfs2_glock with the requested number
> - */
> -
> -static struct gfs2_glock *search_bucket(unsigned int hash,
> - const struct lm_lockname *name)
> -{
> - struct gfs2_glock *gl;
> - struct hlist_bl_node *h;
> -
> - hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
> - if (!lm_name_equal(&gl->gl_name, name))
> - continue;
> - if (lockref_get_not_dead(&gl->gl_lockref))
> - return gl;
> - }
> -
> - return NULL;
> -}
> -
> -/**
> * may_grant - check if its ok to grant a new lock
> * @gl: The glock
> * @gh: The lock request which we wish to grant
> @@ -704,14 +653,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
> struct lm_lockname name = { .ln_number = number,
> .ln_type = glops->go_type,
> .ln_sbd = sdp };
> - struct gfs2_glock *gl, *tmp;
> - unsigned int hash = gl_hash(sdp, &name);
> + struct gfs2_glock *gl, *tmp = NULL;
> struct address_space *mapping;
> struct kmem_cache *cachep;
> + int ret, tries = 0;
>
> - rcu_read_lock();
> - gl = search_bucket(hash, &name);
> - rcu_read_unlock();
> + gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
> + if (gl && !lockref_get_not_dead(&gl->gl_lockref))
> + gl = NULL;
>
> *glp = gl;
> if (gl)
> @@ -738,13 +687,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
> }
>
> atomic_inc(&sdp->sd_glock_disposal);
> + gl->gl_node.next = NULL;
> gl->gl_flags = 0;
> gl->gl_name = name;
> gl->gl_lockref.count = 1;
> gl->gl_state = LM_ST_UNLOCKED;
> gl->gl_target = LM_ST_UNLOCKED;
> gl->gl_demote_state = LM_ST_EXCLUSIVE;
> - gl->gl_hash = hash;
> gl->gl_ops = glops;
> gl->gl_dstamp = ktime_set(0, 0);
> preempt_disable();
> @@ -769,22 +718,34 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
> mapping->writeback_index = 0;
> }
>
> - spin_lock_bucket(hash);
> - tmp = search_bucket(hash, &name);
> - if (tmp) {
> - spin_unlock_bucket(hash);
> - kfree(gl->gl_lksb.sb_lvbptr);
> - kmem_cache_free(cachep, gl);
> - atomic_dec(&sdp->sd_glock_disposal);
> - gl = tmp;
> - } else {
> - hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
> - spin_unlock_bucket(hash);
> +again:
> + ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node,
> + ht_parms);
> + if (ret == 0) {
> + *glp = gl;
> + return 0;
> }
>
> - *glp = gl;
> + if (ret == -EEXIST) {
> + ret = 0;
> + tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
> + if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
> + if (++tries < 100) {
> + cond_resched();
> + goto again;
> + }
> + tmp = NULL;
> + ret = -ENOMEM;
> + }
> + } else {
> + WARN_ON_ONCE(ret);
> + }
> + kfree(gl->gl_lksb.sb_lvbptr);
> + kmem_cache_free(cachep, gl);
> + atomic_dec(&sdp->sd_glock_disposal);
> + *glp = tmp;
>
> - return 0;
> + return ret;
> }
>
> /**
> @@ -1460,31 +1421,24 @@ static struct shrinker glock_shrinker = {
> *
> */
>
> -static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
> - unsigned int hash)
> +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
> {
> struct gfs2_glock *gl;
> - struct hlist_bl_head *head = &gl_hash_table[hash];
> - struct hlist_bl_node *pos;
> + struct rhash_head *pos, *next;
> + const struct bucket_table *tbl;
> + int i;
>
> - rcu_read_lock();
> - hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
> - if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref))
> - examiner(gl);
> + tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table);
> + for (i = 0; i < tbl->size; i++) {
> + rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) {
> + if ((gl->gl_name.ln_sbd == sdp) &&
> + lockref_get_not_dead(&gl->gl_lockref))
> + examiner(gl);
> + }
> }
> - rcu_read_unlock();
> cond_resched();
> }
>
> -static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
> -{
> - unsigned x;
> -
> - for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
> - examine_bucket(examiner, sdp, x);
> -}
> -
> -
> /**
> * thaw_glock - thaw out a glock which has an unprocessed reply waiting
> * @gl: The glock to thaw
> @@ -1802,20 +1756,24 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
>
> int __init gfs2_glock_init(void)
> {
> - unsigned i;
> - for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
> - INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
> - }
> + int ret;
> +
> + ret = rhashtable_init(&gl_hash_table, &ht_parms);
> + if (ret < 0)
> + return ret;
>
> glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
> WQ_HIGHPRI | WQ_FREEZABLE, 0);
> - if (!glock_workqueue)
> + if (!glock_workqueue) {
> + rhashtable_destroy(&gl_hash_table);
> return -ENOMEM;
> + }
> gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
> WQ_MEM_RECLAIM | WQ_FREEZABLE,
> 0);
> if (!gfs2_delete_workqueue) {
> destroy_workqueue(glock_workqueue);
> + rhashtable_destroy(&gl_hash_table);
> return -ENOMEM;
> }
>
> @@ -1827,72 +1785,36 @@ int __init gfs2_glock_init(void)
> void gfs2_glock_exit(void)
> {
> unregister_shrinker(&glock_shrinker);
> + rhashtable_destroy(&gl_hash_table);
> destroy_workqueue(glock_workqueue);
> destroy_workqueue(gfs2_delete_workqueue);
> }
>
> -static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
> +static void gfs2_glock_iter_next(struct gfs2_glock_rht_iter *gi)
> {
> - return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
> - struct gfs2_glock, gl_list);
> -}
> -
> -static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
> -{
> - return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
> - struct gfs2_glock, gl_list);
> -}
> -
> -static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
> -{
> - struct gfs2_glock *gl;
> -
> do {
> - gl = gi->gl;
> - if (gl) {
> - gi->gl = glock_hash_next(gl);
> - gi->nhash++;
> - } else {
> - if (gi->hash >= GFS2_GL_HASH_SIZE) {
> - rcu_read_unlock();
> - return 1;
> - }
> - gi->gl = glock_hash_chain(gi->hash);
> - gi->nhash = 0;
> - }
> - while (gi->gl == NULL) {
> - gi->hash++;
> - if (gi->hash >= GFS2_GL_HASH_SIZE) {
> - rcu_read_unlock();
> - return 1;
> - }
> - gi->gl = glock_hash_chain(gi->hash);
> - gi->nhash = 0;
> - }
> + gi->gl = rhashtable_walk_next(&gi->hti);
> /* Skip entries for other sb and dead entries */
> - } while (gi->sdp != gi->gl->gl_name.ln_sbd ||
> - __lockref_is_dead(&gi->gl->gl_lockref));
> -
> - return 0;
> + } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) ||
> + __lockref_is_dead(&gi->gl->gl_lockref)));
> }
>
> static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
> {
> - struct gfs2_glock_iter *gi = seq->private;
> + struct gfs2_glock_rht_iter *gi = seq->private;
> loff_t n = *pos;
> + int ret;
>
> if (gi->last_pos <= *pos)
> - n = gi->nhash + (*pos - gi->last_pos);
> - else
> - gi->hash = 0;
> + n = (*pos - gi->last_pos);
>
> - gi->nhash = 0;
> - rcu_read_lock();
> + ret = rhashtable_walk_start(&gi->hti);
> + if (ret)
> + return NULL;
>
> do {
> - if (gfs2_glock_iter_next(gi))
> - return NULL;
> - } while (n--);
> + gfs2_glock_iter_next(gi);
> + } while (gi->gl && n--);
>
> gi->last_pos = *pos;
> return gi->gl;
> @@ -1901,23 +1823,20 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
> static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
> loff_t *pos)
> {
> - struct gfs2_glock_iter *gi = seq->private;
> + struct gfs2_glock_rht_iter *gi = seq->private;
>
> (*pos)++;
> gi->last_pos = *pos;
> - if (gfs2_glock_iter_next(gi))
> - return NULL;
> -
> + gfs2_glock_iter_next(gi);
> return gi->gl;
> }
>
> static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
> {
> - struct gfs2_glock_iter *gi = seq->private;
> + struct gfs2_glock_rht_iter *gi = seq->private;
>
> - if (gi->gl)
> - rcu_read_unlock();
> gi->gl = NULL;
> + rhashtable_walk_stop(&gi->hti);
> }
>
> static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
> @@ -1981,29 +1900,46 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
> static int gfs2_glocks_open(struct inode *inode, struct file *file)
> {
> int ret = seq_open_private(file, &gfs2_glock_seq_ops,
> - sizeof(struct gfs2_glock_iter));
> + sizeof(struct gfs2_glock_rht_iter));
> if (ret == 0) {
> struct seq_file *seq = file->private_data;
> - struct gfs2_glock_iter *gi = seq->private;
> + struct gfs2_glock_rht_iter *gi = seq->private;
> +
> gi->sdp = inode->i_private;
> + gi->last_pos = 0;
> seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
> if (seq->buf)
> seq->size = GFS2_SEQ_GOODSIZE;
> + gi->gl = NULL;
> + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
> }
> return ret;
> }
>
> +static int gfs2_glocks_release(struct inode *inode, struct file *file)
> +{
> + struct seq_file *seq = file->private_data;
> + struct gfs2_glock_rht_iter *gi = seq->private;
> +
> + gi->gl = NULL;
> + rhashtable_walk_exit(&gi->hti);
> + return seq_release_private(inode, file);
> +}
> +
> static int gfs2_glstats_open(struct inode *inode, struct file *file)
> {
> int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
> - sizeof(struct gfs2_glock_iter));
> + sizeof(struct gfs2_glock_rht_iter));
> if (ret == 0) {
> struct seq_file *seq = file->private_data;
> - struct gfs2_glock_iter *gi = seq->private;
> + struct gfs2_glock_rht_iter *gi = seq->private;
> gi->sdp = inode->i_private;
> + gi->last_pos = 0;
> seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
> if (seq->buf)
> seq->size = GFS2_SEQ_GOODSIZE;
> + gi->gl = NULL;
> + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
> }
> return ret;
> }
> @@ -2025,7 +1961,7 @@ static const struct file_operations gfs2_glocks_fops = {
> .open = gfs2_glocks_open,
> .read = seq_read,
> .llseek = seq_lseek,
> - .release = seq_release_private,
> + .release = gfs2_glocks_release,
> };
>
> static const struct file_operations gfs2_glstats_fops = {
> @@ -2033,7 +1969,7 @@ static const struct file_operations gfs2_glstats_fops = {
> .open = gfs2_glstats_open,
> .read = seq_read,
> .llseek = seq_lseek,
> - .release = seq_release_private,
> + .release = gfs2_glocks_release,
> };
>
> static const struct file_operations gfs2_sbstats_fops = {
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 4de7853..0d63715 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -22,6 +22,7 @@
> #include <linux/ktime.h>
> #include <linux/percpu.h>
> #include <linux/lockref.h>
> +#include <linux/rhashtable.h>
>
> #define DIO_WAIT 0x00000010
> #define DIO_METADATA 0x00000020
> @@ -342,7 +343,6 @@ struct gfs2_glock {
> gl_req:2, /* State in last dlm request */
> gl_reply:8; /* Last reply from the dlm */
>
> - unsigned int gl_hash;
> unsigned long gl_demote_time; /* time of first demote request */
> long gl_hold_time;
> struct list_head gl_holders;
> @@ -368,7 +368,7 @@ struct gfs2_glock {
> loff_t end;
> } gl_vm;
> };
> - struct rcu_head gl_rcu;
> + struct rhash_head gl_node;
> };
>
> #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
> --
> 2.1.0
>
More information about the Cluster-devel
mailing list