<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
<HTML>
<HEAD>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
<META NAME="Generator" CONTENT="MS Exchange Server version 6.5.7654.12">
<TITLE>RE: [Crash-utility] Crash faults when determining panic task</TITLE>
</HEAD>
<BODY>
<!-- Converted from text/plain format -->

<P><FONT SIZE=2>Hi Dave,<BR>
<BR>
I hope I have captured everything you asked for here, if remote debugging over e-mail is too tedious, I can arrange to post a vmlinux/vmcore on our FTP site (roughly 600MB together).<BR>
<BR>
<BR>
*** Setup some breakpoints to watch bt->machdep:<BR>
<BR>
get_netdump_regs_x86_64(struct bt_info *bt, ulong *ripp, ulong *rspp)<BR>
{<BR>
        ...<BR>
<BR>
        if (((NETDUMP_DUMPFILE() || KDUMP_DUMPFILE()) &&<BR>
              VALID_STRUCT(user_regs_struct) && (bt->task == tt->panic_task)) ||<BR>
              (KDUMP_DUMPFILE() && (kt->flags & DWARF_UNWIND) &&<BR>
              (bt->flags & BT_DUMPFILE_SEARCH))) {<BR>
                ...<BR>
2287            bt->machdep = (void *)user_regs;<BR>
                ...<BR>
<BR>
        if (ELF_NOTES_VALID() &&<BR>
            (bt->flags & BT_DUMPFILE_SEARCH) && DISKDUMP_DUMPFILE() &&<BR>
            (note = (Elf64_Nhdr *)<BR>
             diskdump_get_prstatus_percpu(bt->tc->processor))) {<BR>
                ...<BR>
2306            bt->machdep = (void *)user_regs;<BR>
                ...<BR>
<BR>
(gdb) break get_netdump_regs_x86_64<BR>
Breakpoint 1 at 0x519740: file netdump.c, line 2238.<BR>
(gdb) break netdump.c:2287<BR>
Breakpoint 2 at 0x519970: file netdump.c, line 2287.<BR>
(gdb) break netdump.c:2306<BR>
Breakpoint 3 at 0x5199e7: file netdump.c, line 2306.<BR>
(gdb) r<BR>
<BR>
please wait... (determining panic task)        <BR>
Breakpoint 1, get_netdump_regs_x86_64 (bt=0x7fffffffcd70, ripp=0x7fffffffcce0,<BR>
    rspp=0x7fffffffcce8) at netdump.c:2238<BR>
2238    {<BR>
(gdb) c<BR>
Continuing.<BR>
<BR>
Breakpoint 3, get_netdump_regs_x86_64 (bt=0x7fffffffcd70, ripp=0x7fffffffcce0,<BR>
    rspp=0x7fffffffcce8) at netdump.c:2306<BR>
2306                    bt->machdep = (void *)user_regs;<BR>
(gdb) p user_regs<BR>
$1 = 0xd14084 ""<BR>
(gdb) c<BR>
Continuing.<BR>
<BR>
Breakpoint 1, get_netdump_regs_x86_64 (bt=0x7fffffffcd70, ripp=0x7fffffffcce0,<BR>
    rspp=0x7fffffffcce8) at netdump.c:2238<BR>
2238    {<BR>
(gdb) c<BR>
Continuing.<BR>
<BR>
Program received signal SIGSEGV, Segmentation fault.<BR>
x86_64_get_dumpfile_stack_frame (rsp=0x7fffffffcce8, rip=0x7fffffffcce0,<BR>
    bt_in=0x7fffffffcd70) at x86_64.c:4183<BR>
4183                    ur_rip = ULONG(user_regs +<BR>
<BR>
<BR>
*** So in its second invocation, get_netdump_regs_x86_64() never sets bt->machdep (only breakpoint 1 fired)<BR>
<BR>
*** Let's see what diskdump_get_prstatus_percpu() is returning<BR>
<BR>
(gdb) break diskdump_get_prstatus_percpu<BR>
Breakpoint 1 at 0x526070: file diskdump.c, line 1451.<BR>
(gdb) r<BR>
please wait... (determining panic task)        <BR>
Breakpoint 1, diskdump_get_prstatus_percpu (cpu=0) at diskdump.c:1451<BR>
1451            return dd->nt_prstatus_percpu[cpu];<BR>
(gdb) display dd->nt_prstatus_percpu[0]@16<BR>
1: dd->nt_prstatus_percpu[0]@16 = {0xd1c000, 0x0, 0x0, 0xd26472, 0xbf35ab2,<BR>
  0xd26472, 0x200000012, 0xd1c850, 0xd1c600, 0x1010000012b,<BR>
  0xffffffff814e4fa0, 0x14e4fa0, 0x4270, 0x0, 0x0, 0x0}<BR>
(gdb) c<BR>
Continuing.<BR>
<BR>
Breakpoint 1, diskdump_get_prstatus_percpu (cpu=1) at diskdump.c:1451<BR>
1451            return dd->nt_prstatus_percpu[cpu];<BR>
1: dd->nt_prstatus_percpu[0]@16 = {0xd1c000, 0x0, 0x0, 0xd26472, 0xbf35ab2,<BR>
  0xd26472, 0x200000012, 0xd1c850, 0xd1c600, 0x1010000012b,<BR>
  0xffffffff814e4fa0, 0x14e4fa0, 0x4270, 0x0, 0x0, 0x0}<BR>
<BR>
<BR>
*** See crash -d1 vmlinux vmcore output at the bottom of the mail, particularly the part that says...<BR>
<BR>
crash: page excluded: kernel virtual address: ffffffff81bb3b00  type: "cpu number (per_cpu)"<BR>
crash: get_cpus_present: present: 16<BR>
<BR>
<BR>
<BR>
*** Bogus note->n_descsz value<BR>
*** Apply first patch to get us further into ELF Note processing<BR>
<BR>
>From inside netdump.c :: get_regs_from_note() at the point of the fault, I don't see dd->nt_prstatus[], for dd is now type *diskdump_data...  The *note passed in can be found in dd->nt_prstatus_percpu[] however...<BR>
<BR>
please wait... (determining panic task)        <BR>
Program received signal SIGSEGV, Segmentation fault.<BR>
get_regs_from_note (note=0xd26472 "\b", ip=0x7fffffffc590, sp=0x7fffffffc598)<BR>
    at netdump.c:2221<BR>
2221            *sp = ULONG(user_regs + offset_sp);<BR>
(gdb) p/x *((Elf64_Nhdr *)note)<BR>
$1 = {n_namesz = 0x8, n_descsz = 0xccf80000, n_type = 0x8}<BR>
(gdb) p dd->nt_prstatus_percpu[0]@16<BR>
$2 = {0xd1c000, 0x0, 0x0, 0xd26472, 0xbf35ab2, 0xd26472, 0x200000012,<BR>
  0xd1c850, 0xd1c600, 0x1010000012b, 0xffffffff814e4fa0, 0x14e4fa0, 0x4270,<BR>
  0x0, 0x0, 0x0}<BR>
(gdb) ptype dd<BR>
type = struct diskdump_data {<BR>
    char *filename;<BR>
    ulong flags;<BR>
    int dfd;<BR>
    FILE *ofp;<BR>
    int machine_type;<BR>
    struct disk_dump_header *header;<BR>
    struct disk_dump_sub_header *sub_header;<BR>
    struct kdump_sub_header *sub_header_kdump;<BR>
    size_t data_offset;<BR>
    int block_size;<BR>
    int block_shift;<BR>
    char *bitmap;<BR>
    int bitmap_len;<BR>
    char *dumpable_bitmap;<BR>
    int byte;<BR>
    int bit;<BR>
    char *compressed_page;<BR>
    char *curbufptr;<BR>
    unsigned char *notes_buf;<BR>
    void **nt_prstatus_percpu;<BR>
    uint num_prstatus_notes;<BR>
    struct page_cache_hdr page_cache_hdr[16];<BR>
    char *page_cache_buf;<BR>
    int evict_index;<BR>
    ulong evictions;<BR>
    ulong cached_reads;<BR>
    ulong *valid_pages;<BR>
    ulong accesses;<BR>
} *<BR>
<BR>
<BR>
<BR>
*** Unpatched crash -d1 vmlinux vmcore output:<BR>
<BR>
crash 5.1.8<BR>
Copyright (C) 2002-2011  Red Hat, Inc.<BR>
Copyright (C) 2004, 2005, 2006  IBM Corporation<BR>
Copyright (C) 1999-2006  Hewlett-Packard Co<BR>
Copyright (C) 2005, 2006  Fujitsu Limited<BR>
Copyright (C) 2006, 2007  VA Linux Systems Japan K.K.<BR>
Copyright (C) 2005  NEC Corporation<BR>
Copyright (C) 1999, 2002, 2007  Silicon Graphics, Inc.<BR>
Copyright (C) 1999, 2000, 2001, 2002  Mission Critical Linux, Inc.<BR>
This program is free software, covered by the GNU General Public License,<BR>
and you are welcome to change it and/or distribute copies of it under<BR>
certain conditions.  Enter "help copying" to see the conditions.<BR>
This program has absolutely no warranty.  Enter "help warranty" for details.<BR>
<BR>
compressed kdump: header->utsname.machine: x86_64<BR>
diskdump_data:<BR>
          filename: vmcore<BR>
             flags: 6 (KDUMP_CMPRS_LOCAL|ERROR_EXCLUDED)<BR>
               dfd: 3<BR>
               ofp: 0<BR>
      machine_type: 62 (EM_X86_64)<BR>
<BR>
            header: 2cc1fe0<BR>
           signature: "KDUMP   "<BR>
      header_version: 4<BR>
             utsname:<BR>
               sysname: Linux<BR>
              nodename: bahamut.mno.stratus.com<BR>
               release: 2.6.32-131.0.15.el6.exp10.bz16586.x86_64<BR>
               version: #1 SMP Thu Jun 16 13:13:45 EDT 2011<BR>
               machine: x86_64<BR>
            domainname: sraeng<BR>
           timestamp:<BR>
                tv_sec: 4e4fe6e3<BR>
               tv_usec: 0<BR>
              status: 0 ()<BR>
          block_size: 4096<BR>
        sub_hdr_size: 1<BR>
       bitmap_blocks: 288<BR>
           max_mapnr: 4718592<BR>
    total_ram_blocks: 0<BR>
       device_blocks: 0<BR>
      written_blocks: 0<BR>
         current_cpu: 0<BR>
             nr_cpus: 1<BR>
      tasks[nr_cpus]: 0<BR>
<BR>
        sub_header: 0 (n/a)<BR>
<BR>
  sub_header_kdump: 2cc2ff0<BR>
           phys_base: 0<BR>
          dump_level: 31 (0x1f) (DUMP_EXCLUDE_ZERO|DUMP_EXCLUDE_CACHE|DUMP_EXCLUDE_CACHE_PRI|DUMP_EXCLUDE_USER_DATA|DUMP_EXCLUDE_FREE)<BR>
   offset_vmcoreinfo: 11bc<BR>
     size_vmcoreinfo: 1392<BR>
                      OSRELEASE=2.6.32-131.0.15.el6.exp10.bz16586.x86_64<BR>
                      PAGESIZE=4096<BR>
                      SYMBOL(init_uts_ns)=ffffffff81a2e8c0<BR>
                      SYMBOL(node_online_map)=ffffffff81ba0860<BR>
                      SYMBOL(swapper_pg_dir)=ffffffff81a25000<BR>
                      SYMBOL(_stext)=ffffffff81000198<BR>
                      SYMBOL(vmlist)=ffffffff81ee60b8<BR>
                      SYMBOL(mem_section)=ffffffff81ef03c0<BR>
                      LENGTH(mem_section)=4096<BR>
                      SIZE(mem_section)=32<BR>
                      OFFSET(mem_section.section_mem_map)=0<BR>
                      SIZE(page)=56<BR>
                      SIZE(pglist_data)=212416<BR>
                      SIZE(zone)=34496<BR>
                      SIZE(free_area)=88<BR>
                      SIZE(list_head)=16<BR>
                      SIZE(nodemask_t)=64<BR>
                      OFFSET(page.flags)=0<BR>
                      OFFSET(page._count)=8<BR>
                      OFFSET(page.mapping)=24<BR>
                      OFFSET(page.lru)=40<BR>
                      OFFSET(pglist_data.node_zones)=0<BR>
                      OFFSET(pglist_data.nr_zones)=212288<BR>
                      OFFSET(pglist_data.node_start_pfn)=212312<BR>
                      OFFSET(pglist_data.node_spanned_pages)=212328<BR>
                      OFFSET(pglist_data.node_id)=212336<BR>
                      OFFSET(zone.free_area)=32864<BR>
                      OFFSET(zone.vm_stat)=34032<BR>
                      OFFSET(zone.spanned_pages)=34344<BR>
                      OFFSET(free_area.free_list)=0<BR>
                      OFFSET(list_head.next)=0<BR>
                      OFFSET(list_head.prev)=8<BR>
                      OFFSET(vm_struct.addr)=8<BR>
                      LENGTH(zone.free_area)=11<BR>
                      SYMBOL(log_buf)=ffffffff81a37210<BR>
                      SYMBOL(log_end)=ffffffff81d5b820<BR>
                      SYMBOL(log_buf_len)=ffffffff81a37208<BR>
                      SYMBOL(logged_chars)=ffffffff81ddb920<BR>
                      LENGTH(free_area.free_list)=5<BR>
                      NUMBER(NR_FREE_PAGES)=0<BR>
                      NUMBER(PG_lru)=5<BR>
                      NUMBER(PG_private)=11<BR>
                      NUMBER(PG_swapcache)=16<BR>
                      SYMBOL(phys_base)=ffffffff81a2d010<BR>
                      SYMBOL(init_level4_pgt)=ffffffff81a25000<BR>
                      SYMBOL(node_data)=ffffffff81b9cda0<BR>
                      LENGTH(node_data)=512<BR>
                      CRASHTIME=1313859299<BR>
         offset_note: 1040<BR>
           size_note: 1780<BR>
  num_prstatus_notes: 1<BR>
           notes_buf: 2cc4000<BR>
            notes[0]: 2cc4000<BR>
  NT_PRSTATUS_offset: 1040<BR>
<BR>
       data_offset: 122000<BR>
        block_size: 4096<BR>
       block_shift: 12<BR>
            bitmap: 7fa5296fc010<BR>
        bitmap_len: 1179648<BR>
   dumpable_bitmap: 7fa528890010<BR>
              byte: 0<BR>
               bit: 0<BR>
   compressed_page: 2cdeb30<BR>
         curbufptr: 0<BR>
<BR>
 page_cache_hdr[0]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cceb20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[1]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2ccfb20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[2]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd0b20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[3]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd1b20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[4]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd2b20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[5]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd3b20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[6]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd4b20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[7]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd5b20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[8]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd6b20<BR>
        pg_hit_count: 0<BR>
 page_cache_hdr[9]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd7b20<BR>
        pg_hit_count: 0<BR>
page_cache_hdr[10]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd8b20<BR>
        pg_hit_count: 0<BR>
page_cache_hdr[11]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cd9b20<BR>
        pg_hit_count: 0<BR>
page_cache_hdr[12]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cdab20<BR>
        pg_hit_count: 0<BR>
page_cache_hdr[13]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cdbb20<BR>
        pg_hit_count: 0<BR>
page_cache_hdr[14]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cdcb20<BR>
        pg_hit_count: 0<BR>
page_cache_hdr[15]:<BR>
            pg_flags: 0 ()<BR>
             pg_addr: 0<BR>
           pg_bufptr: 2cddb20<BR>
        pg_hit_count: 0<BR>
<BR>
    page_cache_buf: 2cceb20<BR>
       evict_index: 0<BR>
         evictions: 0<BR>
          accesses: 0<BR>
      cached_reads: 0<BR>
       valid_pages: 2ccc710<BR>
crash: pv_init_ops exists: ARCH_PVOPS<BR>
compressed kdump: phys_base: 0<BR>
gdb vmlinux<BR>
GNU gdb (GDB) 7.0<BR>
Copyright (C) 2009 Free Software Foundation, Inc.<BR>
License GPLv3+: GNU GPL version 3 or later <<A HREF="http://gnu.org/licenses/gpl.html">http://gnu.org/licenses/gpl.html</A>><BR>
This is free software: you are free to change and redistribute it.<BR>
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"<BR>
and "show warranty" for details.<BR>
This GDB was configured as "x86_64-unknown-linux-gnu"...<BR>
<BR>
cpu_possible_map: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15<BR>
cpu_present_map: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15<BR>
cpu_online_map: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15<BR>
base kernel version: 2.6.32<BR>
verify_namelist:<BR>
dumpfile /proc/version:<BR>
Linux version 2.6.32-131.0.15.el6.exp10.bz16586.x86_64 (root@druk.mno.stratus.com) (gcc version 4.4.5 20110214 (Red Hat 4.4.5-6) (GCC) ) #1 SMP Thu Jun 16 13:13:45 EDT 2011<BR>
vmlinux:<BR>
Linux version 2.6.32-131.0.15.el6.exp10.bz16586.x86_64 (root@druk.mno.stratus.com) (gcc version 4.4.5 20110214 (Red Hat 4.4.5-6) (GCC) ) #1 SMP Thu Jun 16 13:13:45 EDT 2011<BR>
<BR>
crash: page excluded: kernel virtual address: ffffffff81bb3b00  type: "cpu number (per_cpu)"<BR>
crash: get_cpus_present: present: 16<BR>
crash: page excluded: kernel virtual address: ffffffff81bb3b00  type: "cpu number (per_cpu)"<BR>
crash: get_cpus_present: present: 16<BR>
IRQ stack link register: undetermined<BR>
PAGESIZE=4096<BR>
mem_section_size = 32768<BR>
NR_SECTION_ROOTS = 4096<BR>
NR_MEM_SECTIONS = 524288<BR>
SECTIONS_PER_ROOT = 128<BR>
SECTION_ROOT_MASK = 0x7f<BR>
PAGES_PER_SECTION = 32768<BR>
node_online_map: [3, 0, 0, 0, 0, 0, 0, 0] -> nodes online: 2<BR>
node_table[0]:<BR>
             id: 0<BR>
          pgdat: ffff880000020040<BR>
           size: 0<BR>
        present: 0<BR>
        mem_map: ffffea0000000000<BR>
    start_paddr: 0<BR>
    start_mapnr: 0<BR>
WARNING: sparsemem: invalid section number: 137438888923<BR>
WARNING: sparsemem: invalid section number: 137438888923<BR>
crash: invalid kernel virtual address: 0  type: "readstring characters"<BR>
crash: invalid kernel virtual address: 0  type: "readstring characters"<BR>
node_table[1]:<BR>
             id: 1<BR>
          pgdat: ffff880280000040<BR>
           size: 2097152<BR>
        present: 2097152<BR>
        mem_map: ffffea0008c00000<BR>
    start_paddr: 280000000<BR>
    start_mapnr: 2621440<BR>
NOTE: page_hash_table does not exist in this kernel<BR>
^Mplease wait... (gathering kmem slab cache data)<BR>
kmem_cache_downsize: SIZE(kmem_cache_s): 36968 cache_cache.buffer_size: 32896<BR>
kmem_cache_downsize: nr_node_ids: 2<BR>
^M                                                ^MNOTE: unwind_table structure has changed, or does not exist in this kernel<BR>
init_unwind_table: DWARF_UNWIND_EH_FRAME<BR>
^Mplease wait... (gathering module symbol data)^M                                              ^M^Mplease wait... (gathering task table data)^M                                           ^Mcrash: get_cpus_online: online: 16<BR>
^Mplease wait... (determining panic task)<BR>
crash: get_active_set_panic_task: failed<BR>
<BR>
<BR>
Thanks,<BR>
<BR>
-- Joe Lawrence</FONT>
</P>

</BODY>
</HTML>