[Crash-utility] patch to add vmss memory regions support

Dyno (Hongjun) Fu hfu at vmware.com
Thu Mar 26 22:55:18 UTC 2015


Dave,

  An assert i put caused the crash, workstation VM memory has more block item than i thought.
fixed it in the patch and tested against all the dump i have. i will try to provide a 4GB memory dump later.

ESX VM 3G Memory
================
- Group: memory pos=0x1f6f6 size=0xc000090c ------------------------------------
align_mask[0, 0]                         => 0x00ffff
regionsCount                             => 0x000000
Memory[0, 0]                             => BLOCK, pos=0x20000, size=0xc0000000

ESX VM 4G Memory
================
- Group: memory pos=0x1f6f6 size=0x10000090c -----------------------------------
align_mask[0, 0]                         => 0x00ffff
regionsCount                             => 0x000002
regionPageNum[0]                         => 0x000000
regionPPN[0]                             => 0x000000
regionSize[0]                            => 0x0c0000
regionPageNum[1]                         => 0x0c0000
regionPPN[1]                             => 0x100000
regionSize[1]                            => 0x040000
Memory[0, 0]                             => BLOCK, pos=0x20000, size=0x100000000

WS VM Memory
============
- Group: memory pos=0x93b1 size=0x10098 ----------------------------------------
align_mask[0, 0]                         => 0x00ffff
regionsCount                             => 0x000000
hotSetSize                               => 0x040000
hotSet                                   => BLOCK, pos=0x9405, size=0x8000
MainMemPageZeroStateSize                 => 0x040000
MainMemKnownZero                         => BLOCK, pos=0x11447, size=0x8000

rgds,
Dyno

On 3/26/15 1:12 PM, Dave Anderson wrote:
> 
> 
> ----- Original Message -----
>> Dave,
>>   updated the patch and please review. thanks.
>>   - the page_size/page_shift problem.
>>   - change type cast to union.
>>   - the read_vmware_vmss() regression.
>>
>> rgds,
>> Dyno
> 
> 
> Dyno,
> 
> Since you cannot make any additional sample vmss2core-generated
> dumpfiles available to me, I can only test this latest patch
> on the two dumpfile/kernel pairs that you gave me in February:
>   
>   vmlinux-2.6.32-431.el6 CentOS6.5-11bd56db.vmss
> 
> and
> 
>   vmlinux-3.13.0-39-generic Ubuntu1404_64bit-65993542.vmss 
>   (with its companion Ubuntu1404_64bit-65993542.vmem file)
> 
> The CentOS kernel works OK with your patch:
>   
>   $ crash vmlinux-2.6.32-431.el6 CentOS6.5-11bd56db.vmss
>   
>   crash 7.1.1rc13
>   Copyright (C) 2002-2014  Red Hat, Inc.
>   Copyright (C) 2004, 2005, 2006, 2010  IBM Corporation
>   Copyright (C) 1999-2006  Hewlett-Packard Co
>   Copyright (C) 2005, 2006, 2011, 2012  Fujitsu Limited
>   Copyright (C) 2006, 2007  VA Linux Systems Japan K.K.
>   Copyright (C) 2005, 2011  NEC Corporation
>   Copyright (C) 1999, 2002, 2007  Silicon Graphics, Inc.
>   Copyright (C) 1999, 2000, 2001, 2002  Mission Critical Linux, Inc.
>   This program is free software, covered by the GNU General Public License,
>   and you are welcome to change it and/or distribute copies of it under
>   certain conditions.  Enter "help copying" to see the conditions.
>   This program has absolutely no warranty.  Enter "help warranty" for details.
>    
>   GNU gdb (GDB) 7.6
>   Copyright (C) 2013 Free Software Foundation, Inc.
>   License GPLv3+: GNU GPL version 3 or later <https://urldefense.proofpoint.com/v2/url?u=http-3A__gnu.org_licenses_gpl.html&d=AwIFaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=g2Vka_25x09RSowRkQw8pA&m=iQzEiRx9c9mhk2AN2ZgUquvryNUKAR8a3H2BxGelV-8&s=UTmIfMl3-60oCUaY2i-0rZlb-o78kBVx6f9F90s9r4Q&e= >
>   This is free software: you are free to change and redistribute it.
>   There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
>   and "show warranty" for details.
>   This GDB was configured as "x86_64-unknown-linux-gnu"...
>   
>         KERNEL: vmlinux-2.6.32-431.el6            
>       DUMPFILE: CentOS6.5-11bd56db.vmss
>           CPUS: 4
>           DATE: Tue Feb  3 18:22:03 2015
>         UPTIME: 00:01:06
>   LOAD AVERAGE: 0.71, 0.22, 0.07
>          TASKS: 302
>       NODENAME: promd-1s-dhcp37.eng.vmware.com
>        RELEASE: 2.6.32-431.el6.x86_64
>        VERSION: #1 SMP Fri Nov 22 03:15:09 UTC 2013
>        MACHINE: x86_64  (2394 Mhz)
>         MEMORY: 511.5 MB
>          PANIC: ""
>            PID: 0
>        COMMAND: "swapper"
>           TASK: ffffffff81a8d020  (1 of 4)  [THREAD_INFO: ffffffff81a00000]
>            CPU: 0
>          STATE: TASK_RUNNING (ACTIVE)
>        WARNING: panic task not found
>   
>   crash> 
>   
> But the Ubuntu1404_64bit-65993542.vmss fails miserably:
> 
>   $ crash Ubuntu1404_64bit-65993542.vmss vmlinux-3.13.0-39-generic
>   
>   crash 7.1.1rc13
>   Copyright (C) 2002-2014  Red Hat, Inc.
>   Copyright (C) 2004, 2005, 2006, 2010  IBM Corporation
>   Copyright (C) 1999-2006  Hewlett-Packard Co
>   Copyright (C) 2005, 2006, 2011, 2012  Fujitsu Limited
>   Copyright (C) 2006, 2007  VA Linux Systems Japan K.K.
>   Copyright (C) 2005, 2011  NEC Corporation
>   Copyright (C) 1999, 2002, 2007  Silicon Graphics, Inc.
>   Copyright (C) 1999, 2000, 2001, 2002  Mission Critical Linux, Inc.
>   This program is free software, covered by the GNU General Public License,
>   and you are welcome to change it and/or distribute copies of it under
>   certain conditions.  Enter "help copying" to see the conditions.
>   This program has absolutely no warranty.  Enter "help warranty" for details.
>    
>   crash: vmware_vmss.c:169: vmware_vmss_init: Assertion `__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (name) && __builtin_constant_p ("Memory") && (__s1_len = strlen (name), __s2_len = strlen ("Memory"), (!((size_t)(const void *)((name) + 1) - (size_t)(const void *)(name) == 1) || __s1_len >= 4) && (!((size_t)(const void *)(("Memory") + 1) - (size_t)(const void *)("Memory") == 1) || __s2_len >= 4)) ? __builtin_strcmp (name, "Memory") : (__builtin_constant_p (name) && ((size_t)(const void *)((name) + 1) - (size_t)(const void *)(name) == 1) && (__s1_len = strlen (name), __s1_len < 4) ? (__builtin_constant_p ("Memory") && ((size_t)(const void *)(("Memory") + 1) - (size_t)(const void *)("Memory") == 1) ? __builtin_strcmp (name, "Memory") : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) ("Memory"); register int __result = (((__const unsigned char *) (__const char *) (name))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) 
{ __result = (((__const unsigned char *) (__const char *) (name))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (name))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (name))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p ("Memory") && ((size_t)(const void *)(("Memory") + 1) - (size_t)(const void *)("Memory") == 1) && (__s2_len = strlen ("Memory"), __s2_len < 4) ? (__builtin_constant_p (name) && ((size_t)(const void *)((name) + 1) - (size_t)(const void *)(name) == 1) ? __builtin_strcmp (name, "Memory") : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (name); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) ("Memory"))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) ("Memory"))[1]); if (__s2_len > 1 && __result == 0) { __resul
t = (__s1[2] - ((__const unsigned char *) (__const char *) ("Memory"))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) ("Memory"))[3]); } } __result; }))) : __builtin_strcmp (name, "Memory")))); }) == 0' failed.
>   Aborted (core dumped)
>   $ 
>   
> Note that crash-7.1.0 works OK:
>   
>   $ /usr/bin/crash Ubuntu1404_64bit-65993542.vmss vmlinux-3.13.0-39-generic
>   
>   crash 7.1.0
>   Copyright (C) 2002-2014  Red Hat, Inc.
>   Copyright (C) 2004, 2005, 2006, 2010  IBM Corporation
>   Copyright (C) 1999-2006  Hewlett-Packard Co
>   Copyright (C) 2005, 2006, 2011, 2012  Fujitsu Limited
>   Copyright (C) 2006, 2007  VA Linux Systems Japan K.K.
>   Copyright (C) 2005, 2011  NEC Corporation
>   Copyright (C) 1999, 2002, 2007  Silicon Graphics, Inc.
>   Copyright (C) 1999, 2000, 2001, 2002  Mission Critical Linux, Inc.
>   This program is free software, covered by the GNU General Public License,
>   and you are welcome to change it and/or distribute copies of it under
>   certain conditions.  Enter "help copying" to see the conditions.
>   This program has absolutely no warranty.  Enter "help warranty" for details.
>    
>   vmw: Memory dump is not part of this vmss file.
>   vmw: Try to locate the companion vmem file ...
>   vmw: vmem file: Ubuntu1404_64bit-65993542.vmem
>   
>   GNU gdb (GDB) 7.6
>   Copyright (C) 2013 Free Software Foundation, Inc.
>   License GPLv3+: GNU GPL version 3 or later <https://urldefense.proofpoint.com/v2/url?u=http-3A__gnu.org_licenses_gpl.html&d=AwIFaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=g2Vka_25x09RSowRkQw8pA&m=iQzEiRx9c9mhk2AN2ZgUquvryNUKAR8a3H2BxGelV-8&s=UTmIfMl3-60oCUaY2i-0rZlb-o78kBVx6f9F90s9r4Q&e= >
>   This is free software: you are free to change and redistribute it.
>   There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
>   and "show warranty" for details.
>   This GDB was configured as "x86_64-unknown-linux-gnu"...
>   
>         KERNEL: vmlinux-3.13.0-39-generic         
>       DUMPFILE: Ubuntu1404_64bit-65993542.vmss
>           CPUS: 1
>           DATE: Thu Nov 13 14:10:53 2014
>         UPTIME: 2 days, 03:40:33
>   LOAD AVERAGE: 0.00, 0.01, 0.05
>          TASKS: 669
>       NODENAME: ubuntu
>        RELEASE: 3.13.0-39-generic
>        VERSION: #66-Ubuntu SMP Tue Oct 28 13:30:27 UTC 2014
>        MACHINE: x86_64  (2693 Mhz)
>         MEMORY: 1 GB
>          PANIC: ""
>            PID: 0
>        COMMAND: "swapper/0"
>           TASK: ffffffff81c15480  [THREAD_INFO: ffffffff81c00000]
>            CPU: 0
>          STATE: TASK_RUNNING 
>        WARNING: panic task not found
>   
>   crash> 
>   
> Anyway, besides fixing whatever the problem is, please remove the assert()
> calls entirely.  They did not exist in the original vmware_vmss.c file,
> and shouldn't be added now.
> 
> assert() is not used by the top-level crash sources (except for qemu.c 
> and qemu-load.c, which came from a 3rd party, and I didn't feel like changing 
> all of them).  Instead, please use the crash convention by testing for the
> anomoly, and then send an appropriate error message to error(FATAL, ...), 
> which will kill the crash session if it's during session initialization, 
> or kill the current command if it's during crash runtime.
> 
> Thanks,
>   Dave
> 
-------------- next part --------------
commit 5640c23ff0cf68ce34eb93bdd02077f7ca1a4ea9
Author: Dyno (Hongjun) Fu <hfu at vmware.com>
Date:   Tue Mar 24 13:47:46 2015 -0700

    add vmss memory regions support
    
    There might be holes in the guest os memory address saved for PCI
    etc.  The memory dump will be divided into regions to skip these
    holes.
    
    On vSphere 5.5, RHEL 6.5 memory dump larger than 3GB is such a
    case.

diff --git a/vmware_vmss.c b/vmware_vmss.c
index aeb1c1d..a484df2 100644
--- a/vmware_vmss.c
+++ b/vmware_vmss.c
@@ -21,6 +21,10 @@
 
 #define LOGPRX "vmw: "
 
+/* VMware only supports X86/X86_64 virtual machines. */
+#define VMW_PAGE_SIZE (4096)
+#define VMW_PAGE_SHIFT (12)
+
 static vmssdata vmss = { 0 };
 
 int
@@ -139,8 +143,7 @@ vmware_vmss_init(char *filename, FILE *ofp)
 				break;
 			}
 			name[nameLen] = 0;
-			DEBUG_PARSE_PRINT((vmss.ofp, LOGPRX"\t Item %20s",
-					   name));
+			DEBUG_PARSE_PRINT((vmss.ofp, LOGPRX"\t Item %20s", name));
 
 			nindx = TAG_NINDX(tag);
 			if (nindx > 3) {
@@ -187,47 +190,52 @@ vmware_vmss_init(char *filename, FILE *ofp)
 					break;
 				}
 
-				/* The things that we really care about...*/
-				if (strcmp(grps[i].name, "memory") == 0 &&
-				    strcmp(name, "Memory") == 0) {
+				if (strcmp(name, "Memory") == 0) {
+					/* The things that we really care about...*/
 					vmss.memoffset = blockpos;
 					vmss.memsize = nbytesinmem;
-				}
-
-				DEBUG_PARSE_PRINT((vmss.ofp, "\t=> %sBLOCK: position=%#llx size=%#llx memsize=%#llx\n",
-						  compressed ? "COMPRESSED " : "",
-						  (ulonglong)blockpos, (ulonglong)nbytes, (ulonglong)nbytesinmem));
 
+					DEBUG_PARSE_PRINT((vmss.ofp, "\t=> %sBLOCK: position=%#llx size=%#llx memsize=%#llx\n",
+							  compressed ? "COMPRESSED " : "",
+							  (ulonglong)blockpos, (ulonglong)nbytes, (ulonglong)nbytesinmem));
+					assert (!compressed);
+				}
 			} else {
-				uint8_t val[TAG_VALSIZE_MASK];
+				union {
+					uint8_t val[TAG_VALSIZE_MASK];
+					uint32_t val32;
+				} u;
 				unsigned k;
 				unsigned valsize = TAG_VALSIZE(tag);
 				uint64_t blockpos = ftell(vmss.dfp);
 
 				DEBUG_PARSE_PRINT((vmss.ofp, "\t=> position=%#llx size=%#x: ", (ulonglong)blockpos, valsize));
-				if (fread(val, sizeof(val[0]), valsize, vmss.dfp) != valsize) {
+				if (fread(u.val, sizeof(u.val[0]), valsize, vmss.dfp) != valsize) {
 					fprintf(vmss.ofp, LOGPRX"Cannot read item.\n");
 					break;
 				}
 				for (k = 0; k < valsize; k++) {
 					/* Assume Little Endian */
-					DEBUG_PARSE_PRINT((vmss.ofp, "%02X", val[valsize - k - 1]));
+					DEBUG_PARSE_PRINT((vmss.ofp, "%02X", u.val[valsize - k - 1]));
 				}
 
 				if (strcmp(grps[i].name, "memory") == 0) {
 					if (strcmp(name, "regionsCount") == 0) {
-						vmss.regionscount = (uint32_t) *val;
-						if (vmss.regionscount != 0) {
-							fprintf(vmss.ofp, LOGPRX"regionsCount=%d (!= 0) NOT TESTED!",
-							        vmss.regionscount);
-						}
+						vmss.regionscount = u.val32;
+						assert(vmss.regionscount <= MAX_REGIONS);
+					}
+				        if (strcmp(name, "regionPageNum") == 0) {
+						vmss.regions[idx[0]].startpagenum = u.val32;
+					}
+					if (strcmp(name, "regionPPN") == 0) {
+						vmss.regions[idx[0]].startppn = u.val32;
+					}
+					if (strcmp(name, "regionSize") == 0) {
+						vmss.regions[idx[0]].size = u.val32;
 					}
 					if (strcmp(name, "align_mask") == 0) {
-						vmss.alignmask = (uint32_t) *val;
-						if (vmss.alignmask != 0xff) {
-							fprintf(vmss.ofp, LOGPRX"align_mask=%d (!= 0xff) NOT TESTED!",
-							        vmss.regionscount);
-						}
+						vmss.alignmask = u.val32;
+						assert(vmss.alignmask == 0xFFFF);
 					}
 				}
 
@@ -272,26 +280,35 @@ vmware_vmss_init(char *filename, FILE *ofp)
 
 uint vmware_vmss_page_size(void)
 {
-	return 4096;
+	return VMW_PAGE_SIZE;
 }
 
 int
 read_vmware_vmss(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr)
 {
-	uint64_t pos = vmss.memoffset + paddr;
+	uint64_t pos = paddr;
+
+	if (vmss.regionscount > 0) {
+		/* Memory is divided into regions and there are holes between them. */
+		uint32_t ppn = (uint32_t) (pos >> VMW_PAGE_SHIFT);
+	        int i;
+
+		for (i = 0; i < vmss.regionscount; i++) {
+			if (ppn < vmss.regions[i].startppn)
+				break;
 
-	if (pos + cnt > vmss.memoffset + vmss.memsize) {
-		cnt -= ((pos + cnt) - (vmss.memoffset + vmss.memsize));
-		if (cnt < 0) {
-			error(INFO, LOGPRX"Read beyond the end of file! paddr=%#lx\n",
-			      paddr);
+			/* skip holes. */
+			pos -= ((vmss.regions[i].startppn - vmss.regions[i].startpagenum)
+				<< VMW_PAGE_SHIFT);
 		}
 	}
+	assert(pos + cnt <= vmss.memsize);
 
+	pos += vmss.memoffset;
         if (fseek(vmss.dfp, pos, SEEK_SET) != 0)
 		return SEEK_ERROR;
 
-        if (fread(bufptr, 1 , cnt, vmss.dfp) != cnt)
+	if (fread(bufptr, 1, cnt, vmss.dfp) != cnt)
 		return READ_ERROR;
 
 	return cnt;
diff --git a/vmware_vmss.h b/vmware_vmss.h
index dcbde2d..3f46188 100644
--- a/vmware_vmss.h
+++ b/vmware_vmss.h
@@ -82,6 +82,14 @@ struct cptgroupdesc {
 };
 typedef struct cptgroupdesc	cptgroupdesc;
 
+struct memregion {
+   uint32_t startpagenum;
+   uint32_t startppn;
+   uint32_t size;
+};
+typedef struct memregion	memregion;
+
+#define MAX_REGIONS	3
 struct vmssdata {
 	int32_t	cpt64bit;
 	FILE	*dfp;
@@ -89,6 +97,7 @@ struct vmssdata {
 	/* about the memory */
 	uint32_t	alignmask;
 	uint32_t	regionscount;
+        memregion	regions[MAX_REGIONS];
 	uint64_t	memoffset;
 	uint64_t	memsize;
 };


More information about the Crash-utility mailing list