[Crash-utility] [PATCH v3 2/2] arm/arm64: read elf notes for qemu generated cores

Dave Anderson anderson at redhat.com
Mon Nov 23 18:48:54 UTC 2015



----- Original Message -----
> On Mon, Nov 23, 2015 at 11:35:38AM -0500, Dave Anderson wrote:
> > 
> > 
> > ----- Original Message -----
> > > > > Would you like me to spin a v4 with this condition added? Or, since
> > > > > it
> > > > > actually seems to be addressing a non-qemu-generated dump issue, then
> > > > > maybe you just want to submit it as a new patch on top of the qemu
> > > > > patch?
> > > > 
> > > > No, I'm cleaning it up now -- there are a couple other gotcha's
> > > > w/respect
> > > > to backtracing, since we're now going to keep the
> > > > ms->panic_task_regs[cpus]
> > > > array intact, even if there's no note in some of them.  I'm going to
> > > > make
> > > > sure it's zero-filled in the not-found case, and I have to make sure
> > > > that
> > > > it's recognized on both architectures.  I'll post the the updated
> > > > additions
> > > > to arm.c and arm64.c so that you can do a quick test on a legitimate
> > > > QEMU dump.
> > > > 
> > > > Might be Monday, I'm not sure...
> > > 
> > > Sounds good to me :-)
> > > 
> > > Thanks,
> > > drew
> > 
> > Drew,
> > 
> > Can you sanity-test the attached patch?  If it works OK for you, I'll post
> > it as-is.
> 
> Works. I tested on both arm and aarch64. I still think we should commit
> it as two patches though. The qemu one, with its commit message focused
> on qemu, and then this one, with its commit message focused on handling
> missing prstatus for some cpus for unknown reasons. Actually, I don't
> think it should even be possible to be missing prstatus for some cpus in
> the qemu case (qemu will always populate them all), so the "dumpfiles
> created with qemu..." comment might need some tweaking as well, since the
> else-continue is being added to its if-block.
> 
> Thanks,
> drew

All right, I'll post your v3 patch as-is, and make a follow-up patch
with my additions (plus a tweak to the comment).

Thanks,
  Dave

> 
> > 
> > Thanks,
> >   Dave
> > 
> 
> > diff --git a/arm.c b/arm.c
> > index ffc7c06..114a6ae 100644
> > --- a/arm.c
> > +++ b/arm.c
> > @@ -580,8 +580,8 @@ arm_get_crash_notes(void)
> >  
> >  	buf = GETBUF(SIZE(note_buf));
> >  
> > -	if (!(panic_task_regs = malloc(kt->cpus*sizeof(*panic_task_regs))))
> > -		error(FATAL, "cannot malloc panic_task_regs space\n");
> > +	if (!(panic_task_regs = calloc((size_t)kt->cpus,
> > sizeof(*panic_task_regs))))
> > +		error(FATAL, "cannot calloc panic_task_regs space\n");
> >  	
> >  	for  (i=0;i<kt->cpus;i++) {
> >  
> > @@ -597,6 +597,32 @@ arm_get_crash_notes(void)
> >  		note = (Elf32_Nhdr *)buf;
> >  		p = buf + sizeof(Elf32_Nhdr);
> >  
> > +		/*
> > +		 * dumpfiles created with qemu won't have crash_notes, but there will
> > +		 * be elf notes.
> > +		 */
> > +		if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE())) {
> > +			if (DISKDUMP_DUMPFILE())
> > +				note = diskdump_get_prstatus_percpu(i);
> > +			else if (KDUMP_DUMPFILE())
> > +				note = netdump_get_prstatus_percpu(i);
> > +			if (note) {
> > +				/*
> > +				 * SIZE(note_buf) accounts for a "final note", which is a
> > +				 * trailing empty elf note header.
> > +				 */
> > +				long notesz = SIZE(note_buf) - sizeof(Elf32_Nhdr);
> > +
> > +				if (sizeof(Elf32_Nhdr) + roundup(note->n_namesz, 4) +
> > +				    note->n_descsz == notesz)
> > +					BCOPY((char *)note, buf, notesz);
> > +			} else {
> > +				error(WARNING,
> > +					"cannot find NT_PRSTATUS note for cpu: %d\n", i);
> > +				continue;
> > +			}
> > +		}
> > +
> >  		if (note->n_type != NT_PRSTATUS) {
> >  			error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n");
> >  			goto fail;
> > @@ -835,6 +861,9 @@ arm_back_trace(struct bt_info *bt)
> >  static void
> >  arm_back_trace_cmd(struct bt_info *bt)
> >  {
> > +	if (bt->flags & BT_REGS_NOT_FOUND)
> > +		return;
> > +
> >  	if (kt->flags & DWARF_UNWIND)
> >  		unwind_backtrace(bt);
> >  	else
> > @@ -1301,12 +1330,13 @@ arm_get_dumpfile_stack_frame(struct bt_info *bt,
> > ulong *nip, ulong *ksp)
> >  {
> >  	const struct machine_specific *ms = machdep->machspec;
> >  
> > -	if (!ms->crash_task_regs)
> > +	if (!ms->crash_task_regs ||
> > +	    (!ms->crash_task_regs[bt->tc->processor].ARM_pc &&
> > +	     !ms->crash_task_regs[bt->tc->processor].ARM_sp)) {
> > +		bt->flags |= BT_REGS_NOT_FOUND;
> >  		return FALSE;
> > +	}
> >  
> > -	if (!is_task_active(bt->task))
> > -		return FALSE;
> > -
> >  	/*
> >  	 * We got registers for panic task from crash_notes. Just return them.
> >  	 */
> > @@ -1339,11 +1369,9 @@ arm_get_stack_frame(struct bt_info *bt, ulong *pcp,
> > ulong *spp)
> >  	else
> >  		ret = arm_get_frame(bt, &ip, &sp);
> >  
> > -	if (!ret) {
> > +	if (!ret)
> >  		error(WARNING, "cannot determine starting stack frame for task %lx\n",
> >  			bt->task);
> > -		return;
> > -	}
> >  
> >  	if (pcp)
> >  		*pcp = ip;
> > diff --git a/arm64.c b/arm64.c
> > index 06b45c2..2304a4c 100644
> > --- a/arm64.c
> > +++ b/arm64.c
> > @@ -1181,6 +1181,7 @@ arm64_back_trace_cmd(struct bt_info *bt)
> >  		stackframe.pc = bt->hp->eip ?
> >  			bt->hp->eip : GET_STACK_ULONG(bt->hp->esp);
> >  		stackframe.sp = bt->hp->esp + 8;
> > +		bt->flags &= ~BT_REGS_NOT_FOUND;
> >  	} else {
> >  		stackframe.sp = bt->stkptr;
> >  		stackframe.pc = bt->instptr;
> > @@ -1197,6 +1198,9 @@ arm64_back_trace_cmd(struct bt_info *bt)
> >  		return;
> >          }
> >  
> > +	if (bt->flags & BT_REGS_NOT_FOUND)
> > +		return;
> > +
> >  	if (!(bt->flags & BT_KDUMP_ADJUST)) {
> >  		if (bt->flags & BT_USER_SPACE)
> >  			goto complete_user;
> > @@ -1336,8 +1340,12 @@ arm64_get_dumpfile_stackframe(struct bt_info *bt,
> > struct arm64_stackframe *frame
> >  	struct machine_specific *ms = machdep->machspec;
> >  	struct arm64_pt_regs *ptregs;
> >  
> > -	if (!ms->panic_task_regs)
> > +	if (!ms->panic_task_regs ||
> > +	    (!ms->panic_task_regs[bt->tc->processor].sp &&
> > +	     !ms->panic_task_regs[bt->tc->processor].pc)) {
> > +		bt->flags |= BT_REGS_NOT_FOUND;
> >  		return FALSE;
> > +	}
> >  
> >  	ptregs = &ms->panic_task_regs[bt->tc->processor];
> >  	frame->sp = ptregs->sp;
> > @@ -1371,19 +1379,17 @@ static void
> >  arm64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
> >  {
> >  	int ret;
> > -	struct arm64_stackframe stackframe;
> > +	struct arm64_stackframe stackframe = { 0 };
> >  
> >  	if (DUMPFILE() && is_task_active(bt->task))
> >  		ret = arm64_get_dumpfile_stackframe(bt, &stackframe);
> >  	else
> >  		ret = arm64_get_stackframe(bt, &stackframe);
> >  
> > -	if (!ret) {
> > +	if (!ret)
> >  		error(WARNING,
> >  			"cannot determine starting stack frame for task %lx\n",
> >  				bt->task);
> > -		return;
> > -	}
> >  
> >  	bt->frameptr = stackframe.fp;
> >  	if (pcp)
> > @@ -1811,8 +1817,8 @@ arm64_get_crash_notes(void)
> >  
> >  	buf = GETBUF(SIZE(note_buf));
> >  
> > -	if (!(ms->panic_task_regs = malloc(kt->cpus * sizeof(struct
> > arm64_pt_regs))))
> > -		error(FATAL, "cannot malloc panic_task_regs space\n");
> > +	if (!(ms->panic_task_regs = calloc((size_t)kt->cpus, sizeof(struct
> > arm64_pt_regs))))
> > +		error(FATAL, "cannot calloc panic_task_regs space\n");
> >  	
> >  	for  (i = 0; i < kt->cpus; i++) {
> >  
> > @@ -1828,6 +1834,32 @@ arm64_get_crash_notes(void)
> >  		note = (Elf64_Nhdr *)buf;
> >  		p = buf + sizeof(Elf64_Nhdr);
> >  
> > +		/*
> > +		 * dumpfiles created with qemu won't have crash_notes, but there will
> > +		 * be elf notes.
> > +		 */
> > +		if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE())) {
> > +			if (DISKDUMP_DUMPFILE())
> > +				note = diskdump_get_prstatus_percpu(i);
> > +			else if (KDUMP_DUMPFILE())
> > +				note = netdump_get_prstatus_percpu(i);
> > +			if (note) {
> > +				/*
> > +				 * SIZE(note_buf) accounts for a "final note", which is a
> > +				 * trailing empty elf note header.
> > +				 */
> > +				long notesz = SIZE(note_buf) - sizeof(Elf64_Nhdr);
> > +
> > +				if (sizeof(Elf64_Nhdr) + roundup(note->n_namesz, 4) +
> > +				    note->n_descsz == notesz)
> > +					BCOPY((char *)note, buf, notesz);
> > +			} else {
> > +				error(WARNING,
> > +					"cannot find NT_PRSTATUS note for cpu: %d\n", i);
> > +				continue;
> > +			}
> > +		}
> > +
> >  		if (note->n_type != NT_PRSTATUS) {
> >  			error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n");
> >  			goto fail;
> > diff --git a/defs.h b/defs.h
> > index 8e96461..dc79b9b 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -5048,6 +5048,7 @@ ulong cpu_map_addr(const char *type);
> >  #define BT_EFRAME_TARGET   (0x800000000000ULL)
> >  #define BT_CPUMASK        (0x1000000000000ULL)
> >  #define BT_SHOW_ALL_REGS  (0x2000000000000ULL)
> > +#define BT_REGS_NOT_FOUND (0x4000000000000ULL)
> >  #define BT_SYMBOL_OFFSET   (BT_SYMBOLIC_ARGS)
> >  
> >  #define BT_REF_HEXVAL         (0x1)
> > @@ -5721,6 +5722,7 @@ void dump_registers_for_qemu_mem_dump(void);
> >  void kdump_backup_region_init(void);
> >  void display_regs_from_elf_notes(int, FILE *);
> >  void display_ELF_note(int, int, void *, FILE *);
> > +void *netdump_get_prstatus_percpu(int);
> >  #define PRSTATUS_NOTE (1)
> >  #define QEMU_NOTE     (2)
> >  
> > diff --git a/netdump.c b/netdump.c
> > index bfa818f..3ff49f0 100644
> > --- a/netdump.c
> > +++ b/netdump.c
> > @@ -2345,6 +2345,27 @@ dump_Elf64_Nhdr(Elf64_Off offset, int store)
> >  	return len;
> >  }
> >  
> > +void *
> > +netdump_get_prstatus_percpu(int cpu)
> > +{
> > +	int online;
> > +
> > +	if ((cpu < 0) || (cpu >= nd->num_prstatus_notes))
> > +		return NULL;
> > +
> > +	/*
> > +	 * If no cpu mapping was done, then there must be
> > +	 * a one-to-one relationship between the number
> > +	 * of online cpus and the number of notes.
> > +	 */
> > +	if ((online = get_cpus_online()) &&
> > +	    (online == kt->cpus) &&
> > +	    (online != nd->num_prstatus_notes))
> > +		return NULL;
> > +
> > +	return nd->nt_prstatus_percpu[cpu];
> > +}
> > +
> >  /*
> >   *  Send the request to the proper architecture hander.
> >   */
> 
> 




More information about the Crash-utility mailing list