[Linux-cluster] FW: [PATCH] More comments for GFS files

Cahill, Ben M ben.m.cahill at intel.com
Mon Sep 27 21:42:36 UTC 2004


 

-----Original Message-----
From: Cahill, Ben M 
Sent: Thursday, September 23, 2004 4:12 PM
To: RedHat Cluster (linux-cluster at redhat.com)
Subject: [PATCH] More comments for GFS files

Hi all,

Below please find a patch for more comments in some files in
gfs-kernel/src/gfs:

dio.c
file.c
gfs_ioctl.c
incore.h
log.c
lops.c
lvb.h
rgrp.c

The focus was on incore.h.

These were diffed against Thursday's CVS, and I've built and run GFS
after applying the patches, so things should hopefully apply cleanly.

-- Ben --

Opinions are mine, not Intel's




diff -ru cvs/cluster/gfs-kernel/src/gfs/dio.c
build_092304/cluster/gfs-kernel/src/gfs/dio.c
--- cvs/cluster/gfs-kernel/src/gfs/dio.c	2004-06-24
04:53:27.000000000 -0400
+++ build_092304/cluster/gfs-kernel/src/gfs/dio.c	2004-09-23
14:18:00.229937128 -0400
@@ -1078,6 +1078,9 @@
  * gfs_sync_meta - sync all the buffers in a filesystem
  * @sdp: the filesystem
  *
+ * Flush metadata blocks to on-disk journal, then
+ * Flush metadata blocks (now in AIL) to on-disk in-place locations
+ * Periodically keep checking until done (AIL empty)
  */
 
 void
diff -ru cvs/cluster/gfs-kernel/src/gfs/file.c
build_092304/cluster/gfs-kernel/src/gfs/file.c
--- cvs/cluster/gfs-kernel/src/gfs/file.c	2004-06-24
04:53:27.000000000 -0400
+++ build_092304/cluster/gfs-kernel/src/gfs/file.c	2004-09-23
14:18:09.964457256 -0400
@@ -199,15 +199,18 @@
 	char **p = (char **)buf;
 	int error = 0;
 
+	/* the dinode block always gets journaled */
 	if (bh->b_blocknr == ip->i_num.no_addr) {
 		GFS_ASSERT_INODE(!new, ip,);
 		gfs_trans_add_bh(ip->i_gl, bh);
 		memcpy(bh->b_data + offset, *p, size);
+	/* data blocks get journaled only for special files */
 	} else if (gfs_is_jdata(ip)) {
 		gfs_trans_add_bh(ip->i_gl, bh);
 		memcpy(bh->b_data + offset, *p, size);
 		if (new)
 			gfs_buffer_clear_ends(bh, offset, size, TRUE);
+	/* non-journaled data blocks get written to in-place disk blocks
*/
 	} else {
 		memcpy(bh->b_data + offset, *p, size);
 		if (new)
@@ -240,11 +243,13 @@
 	char **p = (char **)buf;
 	int error = 0;
 
+	/* the dinode block always gets journaled */
 	if (bh->b_blocknr == ip->i_num.no_addr) {
 		GFS_ASSERT_INODE(!new, ip,);
 		gfs_trans_add_bh(ip->i_gl, bh);
 		if (copy_from_user(bh->b_data + offset, *p, size))
 			error = -EFAULT;
+	/* data blocks get journaled only for special files */
 	} else if (gfs_is_jdata(ip)) {
 		gfs_trans_add_bh(ip->i_gl, bh);
 		if (copy_from_user(bh->b_data + offset, *p, size))
@@ -254,6 +259,7 @@
 			if (error)
 				memset(bh->b_data + offset, 0, size);
 		}
+	/* non-journaled data blocks get written to in-place disk blocks
*/
 	} else {
 		if (copy_from_user(bh->b_data + offset, *p, size))
 			error = -EFAULT;
diff -ru cvs/cluster/gfs-kernel/src/gfs/gfs_ioctl.h
build_092304/cluster/gfs-kernel/src/gfs/gfs_ioctl.h
--- cvs/cluster/gfs-kernel/src/gfs/gfs_ioctl.h	2004-09-13
18:48:45.000000000 -0400
+++ build_092304/cluster/gfs-kernel/src/gfs/gfs_ioctl.h	2004-09-23
13:32:21.518284584 -0400
@@ -131,18 +131,21 @@
 	unsigned int gt_demote_secs;
 	unsigned int gt_incore_log_blocks;
 	unsigned int gt_jindex_refresh_secs;
+
+	/* how often various daemons run (seconds) */
 	unsigned int gt_depend_secs;
-	unsigned int gt_scand_secs;
-	unsigned int gt_recoverd_secs;
-	unsigned int gt_logd_secs;
-	unsigned int gt_quotad_secs;
-	unsigned int gt_inoded_secs;
-	unsigned int gt_quota_simul_sync;
-	unsigned int gt_quota_warn_period;
+	unsigned int gt_scand_secs;       /* find unused glocks and
inodes */
+	unsigned int gt_recoverd_secs;    /* recover journal of crashed
node */
+	unsigned int gt_logd_secs;        /* update log tail as AIL
flushes */
+	unsigned int gt_quotad_secs;      /* sync changes to quota file,
clean*/
+	unsigned int gt_inoded_secs;      /* toss unused inodes */
+
+	unsigned int gt_quota_simul_sync; /* max # quotavals to sync at
once */
+	unsigned int gt_quota_warn_period; /* secs between quota warn
msgs */
 	unsigned int gt_atime_quantum;
-	unsigned int gt_quota_quantum;
-	unsigned int gt_quota_scale_num;
-	unsigned int gt_quota_scale_den;
+	unsigned int gt_quota_quantum;    /* secs between syncs to quota
file */
+	unsigned int gt_quota_scale_num;  /* numerator */
+	unsigned int gt_quota_scale_den;  /* denominator */
 	unsigned int gt_quota_enforce;
 	unsigned int gt_quota_account;
 	unsigned int gt_new_files_jdata;
diff -ru cvs/cluster/gfs-kernel/src/gfs/incore.h
build_092304/cluster/gfs-kernel/src/gfs/incore.h
--- cvs/cluster/gfs-kernel/src/gfs/incore.h	2004-09-13
18:48:45.000000000 -0400
+++ build_092304/cluster/gfs-kernel/src/gfs/incore.h	2004-09-23
14:58:06.330154296 -0400
@@ -11,20 +11,28 @@
 
************************************************************************
*******
 
************************************************************************
******/
 
+/*
+ *  In-core (memory/RAM) structures.
+ *  These do not appear on-disk.  See gfs_ondisk.h for on-disk
structures.
+ */
+
 #ifndef __INCORE_DOT_H__
 #define __INCORE_DOT_H__
 
+/*  flags used in function call parameters  */
+
 #define DIO_NEW           (0x00000001)
-#define DIO_FORCE         (0x00000002)
-#define DIO_CLEAN         (0x00000004)
-#define DIO_DIRTY         (0x00000008)
-#define DIO_START         (0x00000010)
-#define DIO_WAIT          (0x00000020)
-#define DIO_METADATA      (0x00000040)
-#define DIO_DATA          (0x00000080)
+#define DIO_FORCE         (0x00000002)  /* force read of block from
disk */
+#define DIO_CLEAN         (0x00000004)  /* don't write to disk */
+#define DIO_DIRTY         (0x00000008)  /* data changed, must write to
disk */
+#define DIO_START         (0x00000010)  /* start disk read or write */
+#define DIO_WAIT          (0x00000020)  /* wait for disk r/w to
complete */
+
+#define DIO_METADATA      (0x00000040)  /* process glock's protected
metadata */
+#define DIO_DATA          (0x00000080)  /* process glock's protected
filedata */
 #define DIO_INVISIBLE     (0x00000100)
-#define DIO_CHECK         (0x00000200)
-#define DIO_ALL           (0x00000400)
+#define DIO_CHECK         (0x00000200)  /* make sure glock's AIL is
empty */
+#define DIO_ALL           (0x00000400)  /* flush all AIL transactions
to disk */
 
 /*  Structure prototypes  */
 
@@ -98,6 +106,7 @@
 	void (*lo_after_scan) (struct gfs_sbd * sdp, unsigned int jid,
 			       unsigned int pass);
 
+	/* type of element (glock/buf/unlinked/quota) */
 	char *lo_name;
 };
 
@@ -107,227 +116,351 @@
  */
 
 struct gfs_log_element {
-	struct gfs_log_operations *le_ops;
+	struct gfs_log_operations *le_ops; /* vector of functions */
 
-	struct gfs_trans *le_trans;
-	struct list_head le_list;
+	struct gfs_trans *le_trans;     /* we're part of this
transaction */
+	struct list_head le_list;       /* link to transaction's element
list */
 };
 
+/*
+ * Meta-header cache structure.
+ * One for each metadata block that we've read from disk, and are still
using.
+ * In-core superblock structure hosts the actual cache.
+ * Also, each resource group keeps a list of cached blocks within its
scope.
+ */
 struct gfs_meta_header_cache {
-	struct list_head mc_list_hash;
-	struct list_head mc_list_single;
-	struct list_head mc_list_rgd;
+	/* Links to various lists */
+	struct list_head mc_list_hash;   /* superblock's hashed list */
+	struct list_head mc_list_single; /* superblock's single list */
+	struct list_head mc_list_rgd;    /* resource group's list */
 
-	uint64_t mc_block;
-	struct gfs_meta_header mc_mh;
+	uint64_t mc_block;               /* block # (in-place address)
*/
+	struct gfs_meta_header mc_mh;    /* payload: the block's
meta-header */
 };
 
+/*
+ * Dependency cache structure.
+ * In-core superblock structure hosts the actual cache.
+ * Also, each resource group keeps a list of dependency blocks within
its scope.
+ */
 struct gfs_depend {
-	struct list_head gd_list_hash;
-	struct list_head gd_list_rgd;
+	/* Links to various lists */
+	struct list_head gd_list_hash;  /* superblock's hashed list */
+	struct list_head gd_list_rgd;   /* resource group's list */
 
-	struct gfs_rgrpd *gd_rgd;
-	uint64_t gd_formal_ino;
-	unsigned long gd_time;
+	struct gfs_rgrpd *gd_rgd;       /* resource group descriptor */
+	uint64_t gd_formal_ino;         /* inode ID */
+	unsigned long gd_time;          /* time (jiffies) when put on
list */
 };
 
 /*
- *  Structure containing information about the allocation bitmaps.
- *  There are one of these for each fs block that the bitmap for
- *  the resource group header covers.
+ *  Block allocation bitmap descriptor structure.
+ *  One of these for each fs block that contains bitmap data
+ *    (i.e. the resource group header blocks and their following bitmap
blocks).
+ *  Each allocatable fs data block is represented by 2 bits (4 alloc
states).
  */
 
 struct gfs_bitmap {
-	uint32_t bi_offset;	/* The offset in the buffer of the first
byte */
-	uint32_t bi_start;	/* The position of the first byte in
this block */
-	uint32_t bi_len;	/* The number of bytes in this block */
+	uint32_t bi_offset;  /* Byte offset of bitmap within this bit
block
+	                        (non-zero only for an rgrp header block)
*/
+	uint32_t bi_start;   /* Data block (rgrp scope, 32-bit)
represented
+	                        by the first bit-pair in this bit block
*/
+	uint32_t bi_len;     /* The number of bitmap bytes in this bit
block */
 };
 
 /*
- *  Structure containing information Resource Groups
+ *  Resource Group (Rgrp) descriptor structure.
+ *  There is one of these for each resource (block) group in the fs.
+ *  The filesystem is divided into a number of resource groups to allow
+ *    simultaneous block alloc operations by a number of nodes.
  */
 
 struct gfs_rgrpd {
-	struct list_head rd_list;	/* Link with superblock */
-	struct list_head rd_list_mru;
-	struct list_head rd_recent;	/* Recently used rgrps */
+	/* Links to superblock lists */
+	struct list_head rd_list;       /* on-disk-order list of all
rgrps */
+	struct list_head rd_list_mru;   /* Most Recently Used list of
all rgs */
+	struct list_head rd_recent;     /* recently used rgrps */
 
-	struct gfs_glock *rd_gl;	/* Glock for rgrp */
+	struct gfs_glock *rd_gl;        /* Glock for this rgrp */
 
-	unsigned long rd_flags;
+	unsigned long rd_flags;         /* ?? */
 
-	struct gfs_rindex rd_ri;	/* Resource Index structure */
-	struct gfs_rgrp rd_rg;	        /* Resource Group structure */
-	uint64_t rd_rg_vn;
+	struct gfs_rindex rd_ri;        /* Resource Index (on-disk)
structure */
+	struct gfs_rgrp rd_rg;          /* Resource Group (on-disk)
structure */
+	uint64_t rd_rg_vn;              /* version #: if != glock's
gl_vn,
+	                                   we need to read rgrp fm disk
*/
 
-	struct gfs_bitmap *rd_bits;
-	struct buffer_head **rd_bh;
+	/* Block alloc bitmap cache */
+	struct gfs_bitmap *rd_bits;     /* Array of block bitmap
descriptors */
+	struct buffer_head **rd_bh;     /* Array of ptrs to block bitmap
bh's */
 
-	uint32_t rd_last_alloc_data;
-	uint32_t rd_last_alloc_meta;
+	/* Block allocation strategy, rgrp scope. Start at these blocks
when
+	 * searching for next data/meta block to alloc */
+	uint32_t rd_last_alloc_data;    /* most recent data block
allocated */
+	uint32_t rd_last_alloc_meta;    /* most recent meta block
allocated */
 
-	struct list_head rd_mhc;
-	struct list_head rd_depend;
+	struct list_head rd_mhc;        /* cached meta-headers for this
rgrp */
+	struct list_head rd_depend;     /* dependency elements */
 
-	struct gfs_sbd *rd_sbd;
+	struct gfs_sbd *rd_sbd;		/* fs incore superblock (fs
instance) */
 };
 
 /*
  *  Per-buffer data
+ *  One of these is attached as GFS private data to each fs block's
buffer_head.
+ *  These also link into the Active Items Lists (AIL) (buffers flushed
to
+ *    on-disk log, but not yet flushed to on-disk in-place locations)
attached
+ *    to transactions and glocks.
  */
 
 struct gfs_bufdata {
-	struct buffer_head *bd_bh;	/* struct buffer_head which this
struct belongs to */
-	struct gfs_glock *bd_gl;	/* Pointer to Glock struct for
this bh */
+	struct buffer_head *bd_bh;  /* we belong to this Linux
buffer_head */
+	struct gfs_glock *bd_gl;    /* this glock protects buffer's
payload */
 
 	struct gfs_log_element bd_new_le;
 	struct gfs_log_element bd_incore_le;
 
-	char *bd_frozen;
-	struct semaphore bd_lock;
+	char *bd_frozen;            /* "frozen" copy of buffer's data */
+	struct semaphore bd_lock;   /* protects access to this structure
*/
 
-	unsigned int bd_pinned;	                /* Pin count */
-	struct list_head bd_ail_tr_list;	/* List of buffers
hanging off tr_ail_bufs */
-	struct list_head bd_ail_gl_list;	/* List of buffers
hanging off gl_ail_bufs */
+	/* "pin" means keep buffer in RAM, don't write to disk (yet) */
+	unsigned int bd_pinned;	         /* recursive pin count */
+	struct list_head bd_ail_tr_list; /* link to transaction's AIL
list */
+	struct list_head bd_ail_gl_list; /* link to glock's AIL list */
 };
 
 /*
  *  Glock operations
+ *  One set of operations for each glock, the set selected by type of
glock.
+ *  These functions get called at various points in a glock's lifetime.
+ *  "xmote" = promote (lock) a glock at inter-node level.
+ *  "th" = top half, "bh" = bottom half
  */
 
 struct gfs_glock_operations {
+
+	/* before acquiring a lock at inter-node level */
 	void (*go_xmote_th) (struct gfs_glock * gl, unsigned int state,
 			     int flags);
+
+	/* after acquiring a lock at inter-node level */
 	void (*go_xmote_bh) (struct gfs_glock * gl);
+
+	/* before releasing a lock at inter-node level, calls go_sync
*/
 	void (*go_drop_th) (struct gfs_glock * gl);
+
+	/* after releasing a lock at inter-node level, calls go_inval
*/
 	void (*go_drop_bh) (struct gfs_glock * gl);
+
+	/* sync dirty data to disk before releasing an inter-node lock
+	 * (another node needs to read the updated data from disk) */
 	void (*go_sync) (struct gfs_glock * gl, int flags);
+
+	/* invalidate local data just after releasing an inter-node lock
+	 * (another node may change the on-disk data, so it's no good to
us) */
 	void (*go_inval) (struct gfs_glock * gl, int flags);
+
+	/* lock-type-specific check to see if it's okay to unlock a
glock */
 	int (*go_demote_ok) (struct gfs_glock * gl);
+
+	/* after locking at local process level */
 	int (*go_lock) (struct gfs_glock * gl, int flags);
+
+	/* before unlocking at local process level */
 	void (*go_unlock) (struct gfs_glock * gl, int flags);
+
+	/* after receiving a callback: another node needs the lock */
 	void (*go_callback) (struct gfs_glock * gl, unsigned int state);
+
 	void (*go_greedy) (struct gfs_glock * gl);
-	int go_type;
+
+	/* lock type: locks with same lock # (usually an fs block #),
+	 *   but different types, are different locks */
+	int go_type;    /* glock type */
 };
 
-/*  Actions  */
-#define HIF_MUTEX               (0)
-#define HIF_PROMOTE             (1)
-#define HIF_DEMOTE              (2)
-#define HIF_GREEDY              (3)
+/*
+ *  Glock holder structure
+ *  These coordinate the use, within this node, of an acquired
inter-node lock.
+ *  One for each holder of a glock.  A glock may be shared within a
node by
+ *    several processes, or even by several recursive requests from the
same
+ *    process.  Each is a separate "holder".  To be shared locally, the
glock
+ *    must be in "SHARED" or "DEFERRED" state at inter-node level,
which means
+ *    that processes on other nodes might also read the protected
entity.
+ *  When a process needs to manipulate a lock, it requests it via one
of
+ *    these holder structures.  If the request cannot be satisfied
immediately,
+ *    the holder structure gets queued on one of these glock lists:
+ *    1) waiters1, for gaining exclusive access to the glock structure.
+ *    2) waiters2, for locking (promoting) or unlocking (demoting) a
lock.
+ *       This may require changing lock state at inter-node level.
+ *  When holding a lock, gfs_holder struct stays on glock's holder
list.
+ *  See gfs-kernel/src/harness/lm_interface.h for gh_state (LM_ST_...)
+ *    and gh_flags (LM_FLAG...) fields.
+ *  Also see glock.h for gh_flags field (GL_...) flags.
+ */
+/*  Action requests  */
+#define HIF_MUTEX       (0)  /* exclusive access to glock struct */
+#define HIF_PROMOTE     (1)  /* change lock to more restrictive state
*/
+#define HIF_DEMOTE      (2)  /* change lock to less restrictive state
*/
+#define HIF_GREEDY      (3)
 
 /*  States  */
-#define HIF_ALLOCED             (4)
-#define HIF_DEALLOC             (5)
-#define HIF_HOLDER              (6)
-#define HIF_FIRST               (7)
-#define HIF_WAKEUP              (8)
-#define HIF_RECURSE             (9)
+#define HIF_ALLOCED     (4)  /* holder structure is or was in use */
+#define HIF_DEALLOC     (5)  /* holder structure no longer in use */
+#define HIF_HOLDER      (6)  /* we have been granted a hold on the lock
*/
+#define HIF_FIRST       (7)  /* we are first on glock's holder list */
+#define HIF_WAKEUP      (8)  /* wake us up when request is satisfied */
+#define HIF_RECURSE     (9)  /* recursive locks on same glock by same
process */
 
 struct gfs_holder {
-	struct list_head gh_list;
+	struct list_head gh_list;      /* link to one of glock's holder
lists */
 
-	struct gfs_glock *gh_gl;
-	struct task_struct *gh_owner;
-	unsigned int gh_state;
-	int gh_flags;
-
-	int gh_error;
-	unsigned long gh_iflags;
-	struct completion gh_wait;
+	struct gfs_glock *gh_gl;       /* glock that we're holding */
+	struct task_struct *gh_owner;  /* Linux process that is the
holder */
+
+	/* request to change lock state */
+	unsigned int gh_state;         /* LM_ST_... requested lock state
*/
+	int gh_flags;                  /* GL_... or LM_FLAG_... req
modifiers */
+
+	int gh_error;                  /* GLR_... CANCELLED or TRYFAILED
*/
+	unsigned long gh_iflags;       /* HIF_... see above */
+	struct completion gh_wait;     /* wait for completion of ... */
 };
 
 /*
  *  Glock Structure
- */
-
-#define GLF_PLUG                (0)
-#define GLF_LOCK                (1)
-#define GLF_STICKY              (2)
+ *  One for each inter-node lock held by this node.
+ *  A glock is a local representation/abstraction of an inter-node
lock.
+ *    Inter-node locks are managed by a "lock module" which plugs in to
the
+ *    lock harness / glock interface (see gfs-kernel/harness).
Different
+ *    lock modules support different lock protocols (e.g. GULM, GDLM,
no_lock).
+ *  A glock may have one or more holders within a node.  See gfs_holder
above.
+ *  Glocks are managed within a hash table hosted by the in-core
superblock.
+ *  After all holders have released a glock, it will stay in the hash
table
+ *    cache for a certain time (gt_prefetch_secs), during which the
inter-node
+ *    lock will not be released unless another node needs the lock.
This
+ *    provides better performance in case this node needs the glock
again soon.
+ *  Each glock has an associated vector of lock-type-specific "glops"
functions
+ *    which are called at important times during the life of a glock,
and
+ *    which define the type of lock (e.g. dinode, rgrp, non-disk, etc).
+ *    See gfs_glock_operations above.
+ *  A glock, at inter-node scope, is identified by the following
dimensions:
+ *    1)  lock number (usually a block # for on-disk protected
entities,
+ *           or a fixed assigned number for non-disk locks, e.g.
MOUNT).
+ *    2)  lock type (actually, the type of entity protected by the
lock).
+ *    3)  lock namespace, to support multiple GFS filesystems
simultaneously.
+ *           Namespace (usually cluster:filesystem) is specified when
mounting.
+ *           See man page for gfs_mount.
+ *  Glocks require support of Lock Value Blocks (LVBs) by the
inter-node lock
+ *    manager.  LVBs are small (32-byte) chunks of data associated with
a given
+ *    lock, that can be quickly shared between cluster nodes.  Used for
certain
+ *    purposes such as sharing an rgroup's block usage statistics
without
+ *    requiring the overhead of:
+ *      -- sync-to-disk by one node, then a
+ *      -- read from disk by another node.
+ *  
+ */
+
+#define GLF_PLUG                (0)  /* dummy */
+#define GLF_LOCK                (1)  /* exclusive access to glock
structure */
+#define GLF_STICKY              (2)  /* permanent lock, used sparingly
*/
 #define GLF_PREFETCH            (3)
 #define GLF_SYNC                (4)
 #define GLF_DIRTY               (5)
-#define GLF_LVB_INVALID         (6)
+#define GLF_LVB_INVALID         (6)  /* LVB does not contain valid data
*/
 #define GLF_SKIP_WAITERS2       (7)
 #define GLF_GREEDY              (8)
 
 struct gfs_glock {
-	struct list_head gl_list;
-	unsigned long gl_flags;
-	struct lm_lockname gl_name;
-	atomic_t gl_count;
-
-	spinlock_t gl_spin;
-
-	unsigned int gl_state;
-	struct list_head gl_holders;
-	struct list_head gl_waiters1;	/*  HIF_MUTEX  */
-	struct list_head gl_waiters2;	/*  HIF_DEMOTE, HIF_GREEDY  */
-	struct list_head gl_waiters3;	/*  HIF_PROMOTE  */
+	struct list_head gl_list;    /* link to superblock's hash table
*/
+	unsigned long gl_flags;      /* GLF_... see above */
+	struct lm_lockname gl_name;  /* lock number and lock type */
+	atomic_t gl_count;           /* recursive access/usage count */
+
+	spinlock_t gl_spin;          /* protects some members of this
struct */
+
+	/* lock state reflects inter-node manager's lock state */
+	unsigned int gl_state;       /* LM_ST_... see
harness/lm_interface.h */
+
+	/* lists of gfs_holders */
+	struct list_head gl_holders;  /* all current holders of the
glock */
+	struct list_head gl_waiters1; /* wait for excl. access to glock
struct*/
+	struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
+	struct list_head gl_waiters3; /* HIF_PROMOTE */
 
-	struct gfs_glock_operations *gl_ops;
+	struct gfs_glock_operations *gl_ops; /* function vector, defines
type */
 
 	struct gfs_holder *gl_req_gh;
 	gfs_glop_bh_t gl_req_bh;
 
-	lm_lock_t *gl_lock;
-	char *gl_lvb;
-	atomic_t gl_lvb_count;
-
-	uint64_t gl_vn;
-	unsigned long gl_stamp;
-	void *gl_object;
+	lm_lock_t *gl_lock;       /* lock module's private lock data */
+	char *gl_lvb;             /* Lock Value Block */
+	atomic_t gl_lvb_count;    /* LVB recursive usage (hold/unhold)
count */
+
+	uint64_t gl_vn;           /* incremented when protected data
changes */
+	unsigned long gl_stamp;   /* glock cache retention timer */
+	void *gl_object;          /* the protected entity (e.g. a
dinode) */
 
 	struct gfs_log_element gl_new_le;
 	struct gfs_log_element gl_incore_le;
 
-	struct gfs_gl_hash_bucket *gl_bucket;
-	struct list_head gl_reclaim;
+	struct gfs_gl_hash_bucket *gl_bucket; /* our bucket in hash
table */
+	struct list_head gl_reclaim;          /* link to "reclaim" list
*/
 
-	struct gfs_sbd *gl_sbd;
+	struct gfs_sbd *gl_sbd;               /* superblock (fs
instance) */
 
-	struct inode *gl_aspace;
-	struct list_head gl_dirty_buffers;
-	struct list_head gl_ail_bufs;
+	struct inode *gl_aspace;              /* Linux VFS inode */
+	struct list_head gl_dirty_buffers;    /* ?? */
+	struct list_head gl_ail_bufs;         /* AIL buffers protected
by us */
 };
 
 /*
  *  In-Place Reservation structure
+ *  Coordinates allocation of "in-place" (as opposed to journal) fs
blocks,
+ *     which contain persistent inode/file/directory data and metadata.
+ *     These blocks are the allocatable blocks within resource groups
(i.e.
+ *     not including rgrp header and block alloc bitmap blocks).
+ *  gfs_inplace_reserve() calculates a fulfillment plan for allocating
blocks,
+ *     based on block statistics in the resource group headers.
+ *  Then, gfs_blkalloc() or gfs_metaalloc() walks the block alloc
bitmaps
+ *     to do the actual allocation.
  */
 
 struct gfs_alloc {
-	/*  Quota stuff  */
-
-	unsigned int al_qd_num;
-	struct gfs_quota_data *al_qd[4];
-	struct gfs_holder al_qd_ghs[4];
-
-	/* Filled in by the caller to gfs_inplace_reserve() */
-
-	uint32_t al_requested_di;
-	uint32_t al_requested_meta;
-	uint32_t al_requested_data;
-
-	/* Filled in by gfs_inplace_reserve() */
-
-	char *al_file;
-	unsigned int al_line;
-	struct gfs_holder al_ri_gh;
-	struct gfs_holder al_rgd_gh;
-	struct gfs_rgrpd *al_rgd;
-	uint32_t al_reserved_meta;
-	uint32_t al_reserved_data;
-
-	/* Filled in by gfs_blkalloc() */
-
-	uint32_t al_alloced_di;
-	uint32_t al_alloced_meta;
-	uint32_t al_alloced_data;
+	/*
+	 *  Up to 4 quotas (including an inode's user and group quotas)
+	 *  can track changes in block allocation
+	 */
+
+	unsigned int al_qd_num;          /* # of quotas tracking changes
*/
+	struct gfs_quota_data *al_qd[4]; /* ptrs to quota structures */
+	struct gfs_holder al_qd_ghs[4];  /* holders for quota glocks */
+
+	/* Request, filled in by the caller to gfs_inplace_reserve() */
+
+	uint32_t al_requested_di;     /* number of dinodes to reserve */
+	uint32_t al_requested_meta;   /* number of metadata blocks to
reserve */
+	uint32_t al_requested_data;   /* number of data blocks to
reserve */
+
+	/* Fulfillment plan, filled in by gfs_inplace_reserve() */
+
+	char *al_file;                /* debug info, .c file making
request */
+	unsigned int al_line;         /* debug info, line of code making
req */
+	struct gfs_holder al_ri_gh;   /* glock holder for resource grp
index */
+	struct gfs_holder al_rgd_gh;  /* glock holder for al_rgd rgrp */
+	struct gfs_rgrpd *al_rgd;     /* resource group from which to
alloc */
+	uint32_t al_reserved_meta;    /* alloc this # meta blocks from
al_rgd */
+	uint32_t al_reserved_data;    /* alloc this # data blocks from
al_rgd */
+
+	/* Actual alloc, filled in by gfs_blkalloc()/gfs_metaalloc(),
etc. */
+
+	uint32_t al_alloced_di;       /* # dinode blocks allocated */
+	uint32_t al_alloced_meta;     /* # meta blocks allocated */
+	uint32_t al_alloced_data;     /* # data blocks allocated */
 
 	/* Dinode allocation crap */
 
-	struct gfs_unlinked *al_ul;
+	struct gfs_unlinked *al_ul;   /* unlinked dinode log entry */
 };
 
 /*
@@ -339,27 +472,32 @@
 #define GIF_SW_PAGED            (2)
 
 struct gfs_inode {
-	struct gfs_inum i_num;
+	struct gfs_inum i_num;   /* formal inode # and block address */
 
-	atomic_t i_count;
-	unsigned long i_flags;
+	atomic_t i_count;        /* recursive usage (get/put) count */
+	unsigned long i_flags;   /* GIF_...  see above */
 
-	uint64_t i_vn;
-	struct gfs_dinode i_di;
+	uint64_t i_vn;           /* version #: if different from glock's
vn,
+	                            we need to read inode from disk */
+	struct gfs_dinode i_di;  /* dinode (on-disk) structure */
 
-	struct gfs_glock *i_gl;
-	struct gfs_sbd *i_sbd;
-	struct inode *i_vnode;
+	struct gfs_glock *i_gl;  /* this glock protects this inode */
+	struct gfs_sbd *i_sbd;   /* superblock (fs instance structure)
*/
+	struct inode *i_vnode;   /* Linux VFS inode structure */
 
-	struct gfs_holder i_iopen_gh;
+	struct gfs_holder i_iopen_gh;  /* glock holder for # inode opens
lock */
 
-	struct gfs_alloc *i_alloc;
-	uint64_t i_last_rg_alloc;
+	/* block allocation strategy, inode scope */
+	struct gfs_alloc *i_alloc; /* in-place block reservation
structure */
+	uint64_t i_last_rg_alloc;  /* most recnt block alloc was fm this
rgrp */
 
-	struct task_struct *i_creat_task;
-	pid_t i_creat_pid;
+	/* Linux process that originally created this inode */
+	struct task_struct *i_creat_task; /* Linux "current" task struct
*/
+	pid_t i_creat_pid;                /* Linux process ID
current->pid */
 
-	spinlock_t i_lock;
+	spinlock_t i_lock;                /* protects this structure */
+
+	/* cache of most-recently used buffers in indirect addressing
chain */
 	struct buffer_head *i_cache[GFS_MAX_META_HEIGHT];
 
 	unsigned int i_greedy;
@@ -378,8 +516,8 @@
 	struct semaphore f_fl_lock;
 	struct gfs_holder f_fl_gh;
 
-	struct gfs_inode *f_inode;
-	struct file *f_vfile;
+	struct gfs_inode *f_inode;        /* incore GFS inode */
+	struct file *f_vfile;             /* Linux file struct */
 };
 
 /*
@@ -393,112 +531,143 @@
 #define ULF_LOCK                (4)
 
 struct gfs_unlinked {
-	struct list_head ul_list;
-	unsigned int ul_count;
+	struct list_head ul_list;    /* link to superblock's
sd_unlinked_list */
+	unsigned int ul_count;       /* usage count */
 
-	struct gfs_inum ul_inum;
-	unsigned long ul_flags;
+	struct gfs_inum ul_inum;     /* formal inode #, block addr */
+	unsigned long ul_flags;      /* ULF_... */
 
-	struct gfs_log_element ul_new_le;
-	struct gfs_log_element ul_incore_le;
-	struct gfs_log_element ul_ondisk_le;
+	struct gfs_log_element ul_new_le;    /* new, not yet committed
*/
+	struct gfs_log_element ul_incore_le; /* committed to incore log
*/
+	struct gfs_log_element ul_ondisk_le; /* committed to ondisk log
*/
 };
 
 /*
  *  Quota log element
+ *  One for each logged change in a block alloc value affecting a given
quota.
+ *  Only one of these for a given quota within a given transaction;
+ *    multiple changes, within one transaction, for a given quota will
be
+ *    combined into one log element.
  */
 
 struct gfs_quota_le {
-	struct gfs_log_element ql_le;
+	/* Log element maps us to a particular set of log operations
functions,
+	 *    and to a particular transaction */
+	struct gfs_log_element ql_le;    /* generic log element
structure */
 
-	struct gfs_quota_data *ql_data;
-	struct list_head ql_data_list;
+	struct gfs_quota_data *ql_data;  /* the quota we're changing */
+	struct list_head ql_data_list;   /* link to quota's log element
list */
 
-	int64_t ql_change;
+	int64_t ql_change;           /* # of blocks alloc'd (+) or freed
(-) */
 };
 
-#define QDF_USER                (0)
-#define QDF_OD_LIST             (1)
-#define QDF_LOCK                (2)
+/*
+ *  Quota structure
+ *  One for each user or group quota.
+ *  Summarizes all block allocation activity for a given quota, and
supports
+ *    recording updates of current block alloc values in GFS' special
quota
+ *    file, including the journaling of these updates, encompassing
+ *    multiple transactions and log dumps.
+ */
+
+#define QDF_USER                (0)   /* user (1) vs. group (0) quota
*/
+#define QDF_OD_LIST             (1)   /* waiting for sync to quota file
*/
+#define QDF_LOCK                (2)   /* protects access to this
structure */
 
 struct gfs_quota_data {
-	struct list_head qd_list;
-	unsigned int qd_count;
+	struct list_head qd_list;     /* Link to superblock's
sd_quota_list */
+	unsigned int qd_count;        /* usage/reference count */
 
-	uint32_t qd_id;
-	unsigned long qd_flags;
+	uint32_t qd_id;               /* user or group ID number */
+	unsigned long qd_flags;       /* QDF_... */
 
-	struct list_head qd_le_list;
+	/* this list is for non-log-dump transactions */
+	struct list_head qd_le_list;  /* List of gfs_quota_le log
elements */
 
-	int64_t qd_change_new;
-	int64_t qd_change_ic;
-	int64_t qd_change_od;
-	int64_t qd_change_sync;
+	/* summary of block alloc changes affecting this quota, in
various
+	 * stages of logging & syncing changes to the special quota file
*/
+	int64_t qd_change_new;  /* new, not yet committed to in-core
log*/
+	int64_t qd_change_ic;   /* committed to in-core log */
+	int64_t qd_change_od;   /* committed to on-disk log */
+	int64_t qd_change_sync; /* being synced to the in-place quota
file */
 
-	struct gfs_quota_le qd_ondisk_ql;
-	uint64_t qd_sync_gen;
+	struct gfs_quota_le qd_ondisk_ql; /* log element for log dump */
+	uint64_t qd_sync_gen;         /* sync-to-quota-file generation #
*/
 
-	struct gfs_glock *qd_gl;
-	struct gfs_quota_lvb qd_qb;
+	/* glock provides protection for quota, *and* provides
+	 * lock value block (LVB) communication, between nodes, of
current
+	 * quota values.  Shared lock -> LVB read.  EX lock -> LVB
write. */
+	struct gfs_glock *qd_gl;      /* glock for this quota */
+	struct gfs_quota_lvb qd_qb;   /* LVB (limit/warn/value) */
 
-	unsigned long qd_last_warn;
+	unsigned long qd_last_warn;   /* jiffies of last warning to user
*/
 };
 
+/*
+ * Log Buffer descriptor structure
+ * One for each fs block buffer recorded in the log
+ */
 struct gfs_log_buf {
-	struct list_head lb_list;
+	/* link to one of the transaction structure's lists */
+	struct list_head lb_list;      /* link to tr_free_bufs or
tr_list */
 
 	struct buffer_head lb_bh;
 	struct buffer_head *lb_unlock;
 };
 
 /*
- *  Transaction structures
+ *  Transaction structure
+ *  One for each transaction
+ *  This coordinates the logging and flushing of written metadata.
  */
 
 #define TRF_LOG_DUMP            (0x00000001)
 
 struct gfs_trans {
-	struct list_head tr_list;
+
+	/* link to various lists */
+	struct list_head tr_list;      /* superblk's incore trans or AIL
list*/
 
 	/* Initial creation stuff */
 
-	char *tr_file;
-	unsigned int tr_line;
+	char *tr_file;                 /* debug info: .c file creating
trans */
+	unsigned int tr_line;          /* debug info: codeline creating
trans */
 
-	unsigned int tr_mblks_asked;	/* Number of log blocks asked to
be reserved */
-	unsigned int tr_eblks_asked;
-	unsigned int tr_seg_reserved;	/* Number of segments reserved
*/
+	/* reservations for on-disk space in journal */
+	unsigned int tr_mblks_asked;   /* # of meta log blocks requested
*/
+	unsigned int tr_eblks_asked;   /* # of extra log blocks
requested */
+	unsigned int tr_seg_reserved;  /* # of segments actually
reserved */
 
-	struct gfs_holder *tr_t_gh;
+	struct gfs_holder *tr_t_gh;    /* glock holder for this
transaction */
 
 	/* Stuff filled in during creation */
 
-	unsigned int tr_flags;
-	struct list_head tr_elements;
+	unsigned int tr_flags;         /* TRF_... */
+	struct list_head tr_elements;  /* List of this trans' log
elements */
 
 	/* Stuff modified during the commit */
 
-	unsigned int tr_num_free_bufs;
+	unsigned int tr_num_free_bufs; /* List of free gfs_log_buf
structs */
 	struct list_head tr_free_bufs;
-	unsigned int tr_num_free_bmem;
+	unsigned int tr_num_free_bmem; /* List of free fs-block-size
buffers */
 	struct list_head tr_free_bmem;
 
-	uint64_t tr_log_head;	        /* The current log head */
-	uint64_t tr_first_head;	        /* First header block */
+	uint64_t tr_log_head;          /* The current log head */
+	uint64_t tr_first_head;	       /* First header block */
 
-	struct list_head tr_bufs;	/* List of buffers going to the
log */
+	struct list_head tr_bufs;      /* List of buffers going to the
log */
 
-	/* Stuff that's part of the AIL */
+	/* Stuff that's part of the Active Items List (AIL) */
 
-	struct list_head tr_ail_bufs;
+	struct list_head tr_ail_bufs;  /* List of buffers on AIL list */
 
-	/* Private data for different log element types */
+	/* # log elements of various types on tr_elements list */
 
-	unsigned int tr_num_gl;
-	unsigned int tr_num_buf;
-	unsigned int tr_num_iul;
-	unsigned int tr_num_ida;
-	unsigned int tr_num_q;
+	unsigned int tr_num_gl;        /* glocks */
+	unsigned int tr_num_buf;       /* buffers */
+	unsigned int tr_num_iul;       /* unlinked inodes */
+	unsigned int tr_num_ida;       /* de-allocated inodes */
+	unsigned int tr_num_q;         /* quotas */
 };
 
 /*
@@ -511,153 +680,201 @@
 } __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
 
 /*
- *  Super Block Data Structure  (One per filesystem)
- */
+ *  "Super Block" Data Structure
+ *  One per mounted filesystem.
+ *  This is the big instance structure that ties everything together
for
+ *    a given mounted filesystem.  Each GFS mount has its own,
supporting
+ *    mounts of multiple GFS filesystems on each node.
+ *  Pointer to this is usually seen as "sdp" throughout code.
+ *  This is a very large structure, as structures go, in part because
it
+ *    contains arrays of hash buckets for various in-core caches.
+ */
+
+/* sd_flags */
+
+#define SDF_JOURNAL_LIVE        (0)  /* journaling is active (fs is
writeable)*/
+
+/* daemon run (1) / stop (0) flags */
+#define SDF_SCAND_RUN           (1)  /* put unused glocks on reclaim
queue */
+#define SDF_GLOCKD_RUN          (2)  /* reclaim (dealloc) unused glocks
*/
+#define SDF_RECOVERD_RUN        (3)  /* recover journal of a crashed
node */
+#define SDF_LOGD_RUN            (4)  /* update log tail after AIL
flushed */
+#define SDF_QUOTAD_RUN          (5)  /* sync quota changes to file,
cleanup */
+#define SDF_INODED_RUN          (6)  /* deallocate unlinked inodes */
+
+/* (re)mount options from Linux VFS */
+#define SDF_NOATIME             (7)  /* don't change access time */
+#define SDF_ROFS                (8)  /* read-only mode (no journal) */
 
-#define SDF_JOURNAL_LIVE        (0)
-#define SDF_SCAND_RUN           (1)
-#define SDF_GLOCKD_RUN          (2)
-#define SDF_RECOVERD_RUN        (3)
-#define SDF_LOGD_RUN            (4)
-#define SDF_QUOTAD_RUN          (5)
-#define SDF_INODED_RUN          (6)
-#define SDF_NOATIME             (7)
-#define SDF_ROFS                (8)
+/* journal log dump support */
 #define SDF_NEED_LOG_DUMP       (9)
 #define SDF_FOUND_UL_DUMP       (10)
 #define SDF_FOUND_Q_DUMP        (11)
-#define SDF_IN_LOG_DUMP         (12)
+#define SDF_IN_LOG_DUMP         (12) /* serializes log dumps */
+
 
-#define GFS_GL_HASH_SHIFT       (13)
+/* constants for various in-core caches */
+
+/* glock cache */
+#define GFS_GL_HASH_SHIFT       (13)    /* # hash buckets = 8K */
 #define GFS_GL_HASH_SIZE        (1 << GFS_GL_HASH_SHIFT)
 #define GFS_GL_HASH_MASK        (GFS_GL_HASH_SIZE - 1)
 
-#define GFS_MHC_HASH_SHIFT      (10)
+/* meta header cache */
+#define GFS_MHC_HASH_SHIFT      (10)    /* # hash buckets = 1K */
 #define GFS_MHC_HASH_SIZE       (1 << GFS_MHC_HASH_SHIFT)
 #define GFS_MHC_HASH_MASK       (GFS_MHC_HASH_SIZE - 1)
 
-#define GFS_DEPEND_HASH_SHIFT   (10)
+/* dependency cache */
+#define GFS_DEPEND_HASH_SHIFT   (10)    /* # hash buckets = 1K */
 #define GFS_DEPEND_HASH_SIZE    (1 << GFS_DEPEND_HASH_SHIFT)
 #define GFS_DEPEND_HASH_MASK    (GFS_DEPEND_HASH_SIZE - 1)
 
 struct gfs_sbd {
-	struct gfs_sb sd_sb;	        /* Super Block */
+	struct gfs_sb sd_sb;            /* GFS on-disk Super Block image
*/
 
-	struct super_block *sd_vfs;	/* FS's device independent sb */
+	struct super_block *sd_vfs;     /* Linux VFS device independent
sb */
 
-	struct gfs_args sd_args;
-	unsigned long sd_flags;
+	struct gfs_args sd_args;        /* Mount arguments */
+	unsigned long sd_flags;         /* SDF_... see above */
 
-	struct gfs_tune sd_tune;	/* FS tuning structure */
+	struct gfs_tune sd_tune;	/* Filesystem tuning structure
*/
 
 	/* Resource group stuff */
 
-	struct gfs_inode *sd_riinode;	/* rindex inode */
-	uint64_t sd_riinode_vn;	/* Version number of the resource index
inode */
-
-	struct list_head sd_rglist;	/* List of resource groups */
-	struct semaphore sd_rindex_lock;
-
-	struct list_head sd_rg_mru_list;	/* List of resource
groups in MRU order */
-	spinlock_t sd_rg_mru_lock;	/* Lock for MRU list */
-	struct list_head sd_rg_recent;	/* Recently used rgrps */
-	spinlock_t sd_rg_recent_lock;
-	struct gfs_rgrpd *sd_rg_forward;	/* Next new rgrp to try
for allocation */
-	spinlock_t sd_rg_forward_lock;
+	struct gfs_inode *sd_riinode;	/* Resource Index (rindex) inode
*/
+	uint64_t sd_riinode_vn;	        /* Resource Index version #
(detects
+	                                   whether new rgrps have been
added) */
+
+	struct list_head sd_rglist;	/* List of all resource groups,
*/
+	struct semaphore sd_rindex_lock;/*     on-disk order */
+	struct list_head sd_rg_mru_list;/* List of resource groups, */
+	spinlock_t sd_rg_mru_lock;      /*     most-recently-used (MRU)
order */
+	struct list_head sd_rg_recent;	/* List of rgrps from which
blocks */
+	spinlock_t sd_rg_recent_lock;   /*     were recently allocated
*/
+	struct gfs_rgrpd *sd_rg_forward;/* Next rgrp from which to
attempt */
+	spinlock_t sd_rg_forward_lock;  /*     a block alloc */
 
-	unsigned int sd_rgcount;	/* Count of resource groups */
+	unsigned int sd_rgcount;	/* Total # of resource groups */
 
 	/*  Constants computed on mount  */
 
-	uint32_t sd_fsb2bb;
-	uint32_t sd_fsb2bb_shift;	/* Shift FS Block numbers to the
left by
-					   this to get buffer cache
blocks  */
-	uint32_t sd_diptrs;	/* Number of pointers in a dinode */
-	uint32_t sd_inptrs;	/* Number of pointers in a indirect
block */
-	uint32_t sd_jbsize;	/* Size of a journaled data block */
-	uint32_t sd_hash_bsize;	/* sizeof(exhash block) */
+	/* "bb" == "basic block" == 512Byte sector */
+	uint32_t sd_fsb2bb;             /* # 512B basic blocks in a FS
block */
+	uint32_t sd_fsb2bb_shift;       /* Shift sector # to the right
by 
+	                                   this to get FileSystem block
addr */
+	uint32_t sd_diptrs;     /* Max # of block pointers in a dinode
*/
+	uint32_t sd_inptrs;     /* Max # of block pointers in an
indirect blk */
+	uint32_t sd_jbsize;     /* Payload size (bytes) of a journaled
metadata
+	                               block (GFS journals all meta
blocks) */
+	uint32_t sd_hash_bsize; /* sizeof(exhash block) */
 	uint32_t sd_hash_bsize_shift;
-	uint32_t sd_hash_ptrs;	/* Number of points in a hash block */
-	uint32_t sd_max_dirres;	/* Maximum space needed to add a
directory entry */
-	uint32_t sd_max_height;	/* Maximum height of a file's metadata
tree */
+	uint32_t sd_hash_ptrs;  /* Number of points in a hash block */
+	uint32_t sd_max_dirres; /* Max blocks needed to add a directory
entry */
+	uint32_t sd_max_height;	/* Max height of a file's indir addr
tree */
 	uint64_t sd_heightsize[GFS_MAX_META_HEIGHT];
-	uint32_t sd_max_jheight;	/* Maximum height of a journaled
file's metadata tree */
+	uint32_t sd_max_jheight; /* Max hgt, journaled file's indir addr
tree */
 	uint64_t sd_jheightsize[GFS_MAX_META_HEIGHT];
 
 	/*  Lock Stuff  */
 
+	/* glock cache (all glocks currently held by this node for this
fs) */
 	struct gfs_gl_hash_bucket sd_gl_hash[GFS_GL_HASH_SIZE];
 
-	struct list_head sd_reclaim_list;
+	/* glock reclaim support for scand and glockd */
+	struct list_head sd_reclaim_list;   /* list of glocks to reclaim
*/
 	spinlock_t sd_reclaim_lock;
 	wait_queue_head_t sd_reclaim_wchan;
-	atomic_t sd_reclaim_count;
+	atomic_t sd_reclaim_count;          /* # glocks on reclaim list
*/
 
-	struct lm_lockstruct sd_lockstruct;
+	/* lock module tells us if we're first-to-mount, 
+	 *    which journal to use, etc. */
+	struct lm_lockstruct sd_lockstruct; /* info provided by lock
module */
 
-	struct list_head sd_mhc[GFS_MHC_HASH_SIZE];
-	struct list_head sd_mhc_single;
+	/*  Other caches */
+
+	/* meta-header cache (incore copies of on-disk meta headers)*/
+	struct list_head sd_mhc[GFS_MHC_HASH_SIZE]; /* hash buckets */
+	struct list_head sd_mhc_single;     /* non-hashed list of all
MHCs */
 	spinlock_t sd_mhc_lock;
-	atomic_t sd_mhc_count;
+	atomic_t sd_mhc_count;              /* # MHCs in cache */
 
-	struct list_head sd_depend[GFS_DEPEND_HASH_SIZE];
+	/* dependency cache */
+	struct list_head sd_depend[GFS_DEPEND_HASH_SIZE];  /* hash
buckets */
 	spinlock_t sd_depend_lock;
-	atomic_t sd_depend_count;
+	atomic_t sd_depend_count;           /* # dependencies in cache
*/
 
-	struct gfs_holder sd_live_gh;
+	/* LIVE inter-node lock indicates that fs is mounted on at least
+	 * one node */
+	struct gfs_holder sd_live_gh;       /* glock holder for LIVE
lock */
 
+	/* for quiescing the filesystem */
 	struct gfs_holder sd_freeze_gh;
 	struct semaphore sd_freeze_lock;
 	unsigned int sd_freeze_count;
 
 	/*  Inode Stuff  */
 
-	struct gfs_inode *sd_rooti;	/* FS's root inode */
+	struct gfs_inode *sd_rooti;         /* FS's root inode */
 
-	struct gfs_glock *sd_rename_gl;	/* rename glock */
+	/* only 1 node at a time may rename (e.g. mv) a file or dir */
+	struct gfs_glock *sd_rename_gl;     /* rename glock */
 
 	/*  Daemon stuff  */
 
-	struct task_struct *sd_scand_process;
-	unsigned int sd_glockd_num;
+	/* scan for glocks and inodes to toss from memory */
+	struct task_struct *sd_scand_process; /* scand places on reclaim
list*/
+	unsigned int sd_glockd_num;    /* # of glockd procs to do
reclaiming*/
+
+	/* recover journal of a crashed node */
 	struct task_struct *sd_recoverd_process;
+
+	/* update log tail as AIL gets flushed to in-place on-disk
blocks */
 	struct task_struct *sd_logd_process;
+
+	/* sync quota updates to disk, and clean up unused quota structs
*/
 	struct task_struct *sd_quotad_process;
+
+	/* clean up unused inode structures */
 	struct task_struct *sd_inoded_process;
 
+	/* support for starting/stopping daemons */
 	struct semaphore sd_thread_lock;
 	struct completion sd_thread_completion;
 
 	/*  Log stuff  */
 
-	struct gfs_glock *sd_trans_gl;	/* transaction glock */
+	/* transaction lock protects journal replay (recovery) */
+	struct gfs_glock *sd_trans_gl;	/* transaction glock structure
*/
 
-	struct gfs_inode *sd_jiinode;	/* jindex inode */
-	uint64_t sd_jiinode_vn;	/* Version number of the journal index
inode */
+	struct gfs_inode *sd_jiinode;	/* journal index inode */
+	uint64_t sd_jiinode_vn;         /* journal index version #
(detects
+	                                   if new journals have been
added) */
 
 	unsigned int sd_journals;	/* Number of journals in the FS
*/
-	struct gfs_jindex *sd_jindex;	/* Array of Jindex structures
describing this FS's journals */
+	struct gfs_jindex *sd_jindex;	/* Array of journal descriptors
*/
 	struct semaphore sd_jindex_lock;
-	unsigned long sd_jindex_refresh_time;
+	unsigned long sd_jindex_refresh_time; /* poll for new journals
(secs) */
 
-	struct gfs_jindex sd_jdesc;	/* Jindex structure describing
this machine's journal */
-	struct gfs_holder sd_journal_gh;	/* the glock for this
machine's journal */
+	struct gfs_jindex sd_jdesc;	 /* this machine's journal
descriptor */
+	struct gfs_holder sd_journal_gh; /* this machine's journal glock
*/
 
 	uint64_t sd_sequence;	/* Assigned to xactions in order they
commit */
 	uint64_t sd_log_head;	/* Block number of next journal write */
 	uint64_t sd_log_wrap;
 
 	spinlock_t sd_log_seg_lock;
-	unsigned int sd_log_seg_free;	/* Free segments in the log */
+	unsigned int sd_log_seg_free;	/* # of free segments in the log
*/
 	struct list_head sd_log_seg_list;
 	wait_queue_head_t sd_log_seg_wait;
 
-	struct list_head sd_log_ail;	/* struct gfs_trans structures
that form the Active Items List 
-					   "next" is the head, "prev" is
the tail  */
-
-	struct list_head sd_log_incore;	/* transactions that have been
commited incore (but not ondisk)
-					   "next" is the newest, "prev"
is the oldest  */
-	unsigned int sd_log_buffers;	/* Number of buffers in the
incore log */
+	/* "Active Items List" of transactions that have been flushed to
+	 * on-disk log, and are waiting for flush to in-place on-disk
blocks */
+	struct list_head sd_log_ail;	/* "next" is head, "prev" is
tail */
+
+	/* Transactions committed incore, but not yet flushed to on-disk
log */
+	struct list_head sd_log_incore;	/* "next" is newest, "prev" is
oldest */
+	unsigned int sd_log_buffers;	/* # of buffers in the incore
log */
 
 	struct semaphore sd_log_lock;	/* Lock for access to log values
*/
 
@@ -674,16 +891,17 @@
 
 	/*  quota crap  */
 
-	struct list_head sd_quota_list;
+	struct list_head sd_quota_list; /* list of all gfs_quota_data
structs */
 	spinlock_t sd_quota_lock;
 
-	atomic_t sd_quota_count;
-	atomic_t sd_quota_od_count;
+	atomic_t sd_quota_count;        /* # quotas on sd_quota_list */
+	atomic_t sd_quota_od_count;     /* # quotas waiting for sync to
+	                                   special on-disk quota file */
 
-	struct gfs_inode *sd_qinode;
+	struct gfs_inode *sd_qinode;    /* special on-disk quota file */
 
-	uint64_t sd_quota_sync_gen;
-	unsigned long sd_quota_sync_time;
+	uint64_t sd_quota_sync_gen;     /* generation, incr when sync to
file */
+	unsigned long sd_quota_sync_time; /* jiffies, last sync to quota
file */
 
 	/*  license crap  */
 
diff -ru cvs/cluster/gfs-kernel/src/gfs/log.c
build_092304/cluster/gfs-kernel/src/gfs/log.c
--- cvs/cluster/gfs-kernel/src/gfs/log.c	2004-07-12
15:22:44.000000000 -0400
+++ build_092304/cluster/gfs-kernel/src/gfs/log.c	2004-09-23
14:18:29.406501616 -0400
@@ -134,7 +134,8 @@
 /**
  * gfs_ail_start - Start I/O on the AIL
  * @sdp: the filesystem
- * @flags:
+ * @flags:  DIO_ALL -- flush *all* AIL transactions to disk
+ *          default -- flush first-on-list AIL transaction to disk
  *
  */
 
@@ -1207,7 +1208,7 @@
 		LO_CLEAN_DUMP(sdp, le);
 	}
 
-	/* If there isn't anything the AIL, we won't get back the log
+	/* If there isn't anything in the AIL, we won't get back the log
 	   space we reserved unless we do it ourselves. */
 
 	if (list_empty(&sdp->sd_log_ail)) {
diff -ru cvs/cluster/gfs-kernel/src/gfs/lops.c
build_092304/cluster/gfs-kernel/src/gfs/lops.c
--- cvs/cluster/gfs-kernel/src/gfs/lops.c	2004-06-24
04:53:28.000000000 -0400
+++ build_092304/cluster/gfs-kernel/src/gfs/lops.c	2004-09-23
14:18:41.725628824 -0400
@@ -442,6 +442,13 @@
  * @blkno: the location of the log's copy of the block
  *
  * Returns: 0 on success, -EXXX on failure
+ *
+ * Read in-place block from disk
+ * Read log (journal) block from disk
+ * Compare generation numbers
+ * Copy log block to in-place block on-disk if:
+ *   log generation # > in-place generation #
+ *   OR generation #s are ==, but data contained in block is different
(corrupt)
  */
 
 static int
diff -ru cvs/cluster/gfs-kernel/src/gfs/lvb.h
build_092304/cluster/gfs-kernel/src/gfs/lvb.h
--- cvs/cluster/gfs-kernel/src/gfs/lvb.h	2004-06-24
04:53:28.000000000 -0400
+++ build_092304/cluster/gfs-kernel/src/gfs/lvb.h	2004-09-23
14:19:09.962336192 -0400
@@ -11,26 +11,44 @@
 
************************************************************************
*******
 
************************************************************************
******/
 
+/*
+ * Formats of Lock Value Blocks (LVBs) for various types of locks.
+ * These 32-bit data chunks can be shared quickly between nodes
+ *   via the inter-node lock manager (via LAN instead of on-disk).
+ */
+
 #ifndef __LVB_DOT_H__
 #define __LVB_DOT_H__
 
 #define GFS_MIN_LVB_SIZE (32)
 
+/*
+ * Resource Group block allocation statistics
+ * Each resource group lock contains one of these in its LVB.
+ * Used for sharing approximate current statistics for statfs.
+ * Not used for actual block allocation.
+ */
 struct gfs_rgrp_lvb {
-	uint32_t rb_magic;
-	uint32_t rb_free;
-	uint32_t rb_useddi;
-	uint32_t rb_freedi;
-	uint32_t rb_usedmeta;
-	uint32_t rb_freemeta;
+	uint32_t rb_magic;      /* GFS_MAGIC sanity check value */
+	uint32_t rb_free;       /* # free data blocks */
+	uint32_t rb_useddi;     /* # used dinode blocks */
+	uint32_t rb_freedi;     /* # free dinode blocks */
+	uint32_t rb_usedmeta;   /* # used metadata blocks */
+	uint32_t rb_freemeta;   /* # free metadata blocks */
 };
 
+/*
+ * Quota
+ * Each quota lock contains one of these in its LVB.
+ * Keeps track of block allocation limits and current block allocation
+ *   for either a cluster-wide user or a cluster-wide group.
+ */
 struct gfs_quota_lvb {
-	uint32_t qb_magic;
+	uint32_t qb_magic;      /* GFS_MAGIC sanity check value */
 	uint32_t qb_pad;
-	uint64_t qb_limit;
-	uint64_t qb_warn;
-	int64_t qb_value;
+	uint64_t qb_limit;      /* hard limit of # blocks to alloc */
+	uint64_t qb_warn;       /* warn user when alloc is above this #
*/
+	int64_t qb_value;       /* current # blocks allocated */
 };
 
 /*  Translation functions  */
diff -ru cvs/cluster/gfs-kernel/src/gfs/rgrp.c
build_092304/cluster/gfs-kernel/src/gfs/rgrp.c
--- cvs/cluster/gfs-kernel/src/gfs/rgrp.c	2004-06-24
04:53:28.000000000 -0400
+++ build_092304/cluster/gfs-kernel/src/gfs/rgrp.c	2004-09-23
14:18:56.703351864 -0400
@@ -372,6 +372,7 @@
 
 	memset(count, 0, 4 * sizeof(uint32_t));
 
+	/* count # blocks in each of 4 possible allocation states */
 	for (buf = 0; buf < length; buf++) {
 		bits = &rgd->rd_bits[buf];
 		for (x = 0; x < 4; x++)
@@ -531,6 +532,7 @@
  * gfs_compute_bitstructs - Compute the bitmap sizes
  * @rgd: The resource group descriptor
  *
+ * Calculates bitmap descriptors, one for each block that contains
bitmap data
  */
 
 static void
@@ -538,7 +540,7 @@
 {
 	struct gfs_sbd *sdp = rgd->rd_sbd;
 	struct gfs_bitmap *bits;
-	uint32_t length = rgd->rd_ri.ri_length;
+	uint32_t length = rgd->rd_ri.ri_length; /* # blocks in hdr &
bitmap */
 	uint32_t bytes_left, bytes;
 	int x;
 
@@ -550,21 +552,25 @@
 	for (x = 0; x < length; x++) {
 		bits = &rgd->rd_bits[x];
 
+		/* small rgrp; bitmap stored completely in header block
*/
 		if (length == 1) {
 			bytes = bytes_left;
 			bits->bi_offset = sizeof(struct gfs_rgrp);
 			bits->bi_start = 0;
 			bits->bi_len = bytes;
+		/* header block */
 		} else if (x == 0) {
 			bytes = sdp->sd_sb.sb_bsize - sizeof(struct
gfs_rgrp);
 			bits->bi_offset = sizeof(struct gfs_rgrp);
 			bits->bi_start = 0;
 			bits->bi_len = bytes;
+		/* last block */
 		} else if (x + 1 == length) {
 			bytes = bytes_left;
 			bits->bi_offset = sizeof(struct
gfs_meta_header);
 			bits->bi_start = rgd->rd_ri.ri_bitbytes -
bytes_left;
 			bits->bi_len = bytes;
+		/* other blocks */
 		} else {
 			bytes = sdp->sd_sb.sb_bsize - sizeof(struct
gfs_meta_header);
 			bits->bi_offset = sizeof(struct
gfs_meta_header);
@@ -855,10 +861,12 @@
  * @rgd: the RG data
  * @al: the struct gfs_alloc structure describing the reservation
  *
- * Sets the $ir_datares field in @res.
- * Sets the $ir_metares field in @res.
+ * If there's room for the requested blocks to be allocated from the
RG:
+ *   Sets the $al_reserved_data field in @al.
+ *   Sets the $al_reserved_meta field in @al.
+ *   Sets the $al_rgd field in @al.
  *
- * Returns: 1 on success, 0 on failure
+ * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
  */
 
 static int
@@ -900,7 +908,7 @@
 }
 
 /**
- * recent_rgrp_first - get first RG from recent list
+ * recent_rgrp_first - get first RG from "recent" list
  * @sdp: The GFS superblock
  * @rglast: address of the rgrp used last
  *
@@ -939,7 +947,7 @@
 }
 
 /**
- * recent_rgrp_next - get next RG from recent list
+ * recent_rgrp_next - get next RG from "recent" list
  * @cur_rgd: current rgrp
  *
  * Returns: The next rgrp in the recent list
@@ -978,7 +986,7 @@
 }
 
 /**
- * recent_rgrp_remove - remove an RG from recent list
+ * recent_rgrp_remove - remove an RG from "recent" list
  * @rgd: The rgrp to remove
  *
  */
@@ -992,9 +1000,14 @@
 }
 
 /**
- * recent_rgrp_add - add an RG to recent list
+ * recent_rgrp_add - add an RG to tail of "recent" list
  * @new_rgd: The rgrp to add
  *
+ * Before adding, make sure that:
+ *   1) it's not already on the list
+ *   2) there's still room for more entries
+ * The capacity limit imposed on the "recent" list is basically a
node's "share"
+ *   of rgrps within a cluster, i.e. (total # rgrps) / (# nodes
(journals))
  */
 
 static void







More information about the Linux-cluster mailing list