[Cluster-devel] cluster/gfs/gfs_fsck bitmap.c block_list.c ini ...

rpeterso at sourceware.org rpeterso at sourceware.org
Wed Sep 20 14:04:05 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	STABLE
Changes by:	rpeterso at sourceware.org	2006-09-20 14:04:04

Modified files:
	gfs/gfs_fsck   : bitmap.c block_list.c initialize.c 

Log message:
	This is the fix for bugzilla 200883: gfs_fsck segfaults.
	The problem was that gfs_fsck was running out of memory
	for in-core bitmaps when run on very large file systems.
	For example, 45T requires about 11GB of memory.  This fix doesn't
	allow it to run, this just exits gracefully, tells them why,
	and how much additional memory is needed.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/bitmap.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1&r2=1.1.2.1.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/block_list.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.2&r2=1.1.2.2.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/initialize.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.4.4.2.2.2&r2=1.1.2.4.4.2.2.3

--- cluster/gfs/gfs_fsck/bitmap.c	2005/02/15 17:17:52	1.1.2.1
+++ cluster/gfs/gfs_fsck/bitmap.c	2006/09/20 14:04:04	1.1.2.1.6.1
@@ -54,14 +54,14 @@
 	if(!(bmap->map = malloc(sizeof(char) * bmap->mapsize))) {
 		log_err("Unable to allocate bitmap of size %"PRIu64"\n",
 			bmap->mapsize);
-		return ENOMEM;
+		return -ENOMEM;
 	}
 	if(!memset(bmap->map, 0, sizeof(char) * bmap->mapsize)) {
 		log_err("Unable to zero bitmap of size %"PRIu64"\n",
 			bmap->mapsize);
 		free(bmap->map);
 		bmap->map = NULL;
-		return ENOMEM;
+		return -ENOMEM;
 	}
 	log_debug("Allocated bitmap of size %"PRIu64
 		  " with %d chunks per byte\n",
--- cluster/gfs/gfs_fsck/block_list.c	2005/02/28 15:46:45	1.1.2.2
+++ cluster/gfs/gfs_fsck/block_list.c	2006/09/20 14:04:04	1.1.2.2.6.1
@@ -52,6 +52,7 @@
 struct block_list *block_list_create(uint64_t size, enum block_list_type type)
 {
 	struct block_list *il;
+	uint64_t addl_mem_needed = 0L;
 	log_info("Creating a block list of size %"PRIu64"...\n", size);
 
 	if ((il = malloc(sizeof(*il)))) {
@@ -64,25 +65,46 @@
 		switch(type) {
 		case gbmap:
 			if(bitmap_create(&il->list.gbmap.group_map, size, 4)) {
+				/* Note on addl_mem_needed: We've tried to allocate ram   */
+				/* for our bitmaps, but we failed.  The fs is too big.    */
+				/* We should tell them how much to allocate.  This first  */
+				/* bitmap is the biggest, but we need three more smaller  */
+				/* for the code that immediately follows.  I'm rounding   */
+				/* up to twice the memory for this bitmap, even though    */
+				/* it's actually 1 + 3/4.  That will allow for future     */
+				/* mallocs that happen after this point in the code.      */
+				/* For the bad_map, we have two more to go (total of 3)   */
+				/* but again I'm rounding it up to 4 smaller ones.        */
+				/* For the dup_map, I'm rounding from 2 to 3, and for     */
+				/* eattr_map, I'm rounding up from 1 to 2.                */
+				addl_mem_needed = il->list.gbmap.group_map.mapsize * 2;
 				stack;
 				free(il);
 				il = NULL;
 			}
-			if(bitmap_create(&il->list.gbmap.bad_map, size, 1)) {
+			else if(bitmap_create(&il->list.gbmap.bad_map, size, 1)) {
+				addl_mem_needed = il->list.gbmap.group_map.mapsize * 4;
 				stack;
 				free(il);
 				il = NULL;
 			}
-			if(bitmap_create(&il->list.gbmap.dup_map, size, 1)) {
+			else if(bitmap_create(&il->list.gbmap.dup_map, size, 1)) {
+				addl_mem_needed = il->list.gbmap.group_map.mapsize * 3;
 				stack;
 				free(il);
 				il = NULL;
 			}
-			if(bitmap_create(&il->list.gbmap.eattr_map, size, 1)) {
+			else if(bitmap_create(&il->list.gbmap.eattr_map, size, 1)) {
+				addl_mem_needed = il->list.gbmap.group_map.mapsize * 2;
 				stack;
 				free(il);
 				il = NULL;
 			}
+			if (addl_mem_needed) {
+				log_err("This system doesn't have enough memory + swap space to fsck this file system.\n");
+				log_err("Additional memory needed is approximately: %ldMB\n", addl_mem_needed / 1048576);
+				log_err("Please increase your swap space by that amount and run gfs_fsck again.\n");
+			}
 			break;
 		default:
 			log_crit("Block list type %d not implemented\n",
--- cluster/gfs/gfs_fsck/initialize.c	2006/06/21 14:21:21	1.1.2.4.4.2.2.2
+++ cluster/gfs/gfs_fsck/initialize.c	2006/09/20 14:04:04	1.1.2.4.4.2.2.3
@@ -343,6 +343,8 @@
 	}
 
 	sdp->bl = block_list_create(sdp->last_fs_block+1, gbmap);
+	if (!sdp->bl)
+		goto fail;
 
 	return 0;
 




More information about the Cluster-devel mailing list