[Crash-utility] [PATCH v3 2/2] arm/arm64: read elf notes for qemu generated cores
Dave Anderson
anderson at redhat.com
Mon Nov 23 18:48:54 UTC 2015
----- Original Message -----
> On Mon, Nov 23, 2015 at 11:35:38AM -0500, Dave Anderson wrote:
> >
> >
> > ----- Original Message -----
> > > > > Would you like me to spin a v4 with this condition added? Or, since
> > > > > it
> > > > > actually seems to be addressing a non-qemu-generated dump issue, then
> > > > > maybe you just want to submit it as a new patch on top of the qemu
> > > > > patch?
> > > >
> > > > No, I'm cleaning it up now -- there are a couple other gotcha's
> > > > w/respect
> > > > to backtracing, since we're now going to keep the
> > > > ms->panic_task_regs[cpus]
> > > > array intact, even if there's no note in some of them. I'm going to
> > > > make
> > > > sure it's zero-filled in the not-found case, and I have to make sure
> > > > that
> > > > it's recognized on both architectures. I'll post the the updated
> > > > additions
> > > > to arm.c and arm64.c so that you can do a quick test on a legitimate
> > > > QEMU dump.
> > > >
> > > > Might be Monday, I'm not sure...
> > >
> > > Sounds good to me :-)
> > >
> > > Thanks,
> > > drew
> >
> > Drew,
> >
> > Can you sanity-test the attached patch? If it works OK for you, I'll post
> > it as-is.
>
> Works. I tested on both arm and aarch64. I still think we should commit
> it as two patches though. The qemu one, with its commit message focused
> on qemu, and then this one, with its commit message focused on handling
> missing prstatus for some cpus for unknown reasons. Actually, I don't
> think it should even be possible to be missing prstatus for some cpus in
> the qemu case (qemu will always populate them all), so the "dumpfiles
> created with qemu..." comment might need some tweaking as well, since the
> else-continue is being added to its if-block.
>
> Thanks,
> drew
All right, I'll post your v3 patch as-is, and make a follow-up patch
with my additions (plus a tweak to the comment).
Thanks,
Dave
>
> >
> > Thanks,
> > Dave
> >
>
> > diff --git a/arm.c b/arm.c
> > index ffc7c06..114a6ae 100644
> > --- a/arm.c
> > +++ b/arm.c
> > @@ -580,8 +580,8 @@ arm_get_crash_notes(void)
> >
> > buf = GETBUF(SIZE(note_buf));
> >
> > - if (!(panic_task_regs = malloc(kt->cpus*sizeof(*panic_task_regs))))
> > - error(FATAL, "cannot malloc panic_task_regs space\n");
> > + if (!(panic_task_regs = calloc((size_t)kt->cpus,
> > sizeof(*panic_task_regs))))
> > + error(FATAL, "cannot calloc panic_task_regs space\n");
> >
> > for (i=0;i<kt->cpus;i++) {
> >
> > @@ -597,6 +597,32 @@ arm_get_crash_notes(void)
> > note = (Elf32_Nhdr *)buf;
> > p = buf + sizeof(Elf32_Nhdr);
> >
> > + /*
> > + * dumpfiles created with qemu won't have crash_notes, but there will
> > + * be elf notes.
> > + */
> > + if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE())) {
> > + if (DISKDUMP_DUMPFILE())
> > + note = diskdump_get_prstatus_percpu(i);
> > + else if (KDUMP_DUMPFILE())
> > + note = netdump_get_prstatus_percpu(i);
> > + if (note) {
> > + /*
> > + * SIZE(note_buf) accounts for a "final note", which is a
> > + * trailing empty elf note header.
> > + */
> > + long notesz = SIZE(note_buf) - sizeof(Elf32_Nhdr);
> > +
> > + if (sizeof(Elf32_Nhdr) + roundup(note->n_namesz, 4) +
> > + note->n_descsz == notesz)
> > + BCOPY((char *)note, buf, notesz);
> > + } else {
> > + error(WARNING,
> > + "cannot find NT_PRSTATUS note for cpu: %d\n", i);
> > + continue;
> > + }
> > + }
> > +
> > if (note->n_type != NT_PRSTATUS) {
> > error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n");
> > goto fail;
> > @@ -835,6 +861,9 @@ arm_back_trace(struct bt_info *bt)
> > static void
> > arm_back_trace_cmd(struct bt_info *bt)
> > {
> > + if (bt->flags & BT_REGS_NOT_FOUND)
> > + return;
> > +
> > if (kt->flags & DWARF_UNWIND)
> > unwind_backtrace(bt);
> > else
> > @@ -1301,12 +1330,13 @@ arm_get_dumpfile_stack_frame(struct bt_info *bt,
> > ulong *nip, ulong *ksp)
> > {
> > const struct machine_specific *ms = machdep->machspec;
> >
> > - if (!ms->crash_task_regs)
> > + if (!ms->crash_task_regs ||
> > + (!ms->crash_task_regs[bt->tc->processor].ARM_pc &&
> > + !ms->crash_task_regs[bt->tc->processor].ARM_sp)) {
> > + bt->flags |= BT_REGS_NOT_FOUND;
> > return FALSE;
> > + }
> >
> > - if (!is_task_active(bt->task))
> > - return FALSE;
> > -
> > /*
> > * We got registers for panic task from crash_notes. Just return them.
> > */
> > @@ -1339,11 +1369,9 @@ arm_get_stack_frame(struct bt_info *bt, ulong *pcp,
> > ulong *spp)
> > else
> > ret = arm_get_frame(bt, &ip, &sp);
> >
> > - if (!ret) {
> > + if (!ret)
> > error(WARNING, "cannot determine starting stack frame for task %lx\n",
> > bt->task);
> > - return;
> > - }
> >
> > if (pcp)
> > *pcp = ip;
> > diff --git a/arm64.c b/arm64.c
> > index 06b45c2..2304a4c 100644
> > --- a/arm64.c
> > +++ b/arm64.c
> > @@ -1181,6 +1181,7 @@ arm64_back_trace_cmd(struct bt_info *bt)
> > stackframe.pc = bt->hp->eip ?
> > bt->hp->eip : GET_STACK_ULONG(bt->hp->esp);
> > stackframe.sp = bt->hp->esp + 8;
> > + bt->flags &= ~BT_REGS_NOT_FOUND;
> > } else {
> > stackframe.sp = bt->stkptr;
> > stackframe.pc = bt->instptr;
> > @@ -1197,6 +1198,9 @@ arm64_back_trace_cmd(struct bt_info *bt)
> > return;
> > }
> >
> > + if (bt->flags & BT_REGS_NOT_FOUND)
> > + return;
> > +
> > if (!(bt->flags & BT_KDUMP_ADJUST)) {
> > if (bt->flags & BT_USER_SPACE)
> > goto complete_user;
> > @@ -1336,8 +1340,12 @@ arm64_get_dumpfile_stackframe(struct bt_info *bt,
> > struct arm64_stackframe *frame
> > struct machine_specific *ms = machdep->machspec;
> > struct arm64_pt_regs *ptregs;
> >
> > - if (!ms->panic_task_regs)
> > + if (!ms->panic_task_regs ||
> > + (!ms->panic_task_regs[bt->tc->processor].sp &&
> > + !ms->panic_task_regs[bt->tc->processor].pc)) {
> > + bt->flags |= BT_REGS_NOT_FOUND;
> > return FALSE;
> > + }
> >
> > ptregs = &ms->panic_task_regs[bt->tc->processor];
> > frame->sp = ptregs->sp;
> > @@ -1371,19 +1379,17 @@ static void
> > arm64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
> > {
> > int ret;
> > - struct arm64_stackframe stackframe;
> > + struct arm64_stackframe stackframe = { 0 };
> >
> > if (DUMPFILE() && is_task_active(bt->task))
> > ret = arm64_get_dumpfile_stackframe(bt, &stackframe);
> > else
> > ret = arm64_get_stackframe(bt, &stackframe);
> >
> > - if (!ret) {
> > + if (!ret)
> > error(WARNING,
> > "cannot determine starting stack frame for task %lx\n",
> > bt->task);
> > - return;
> > - }
> >
> > bt->frameptr = stackframe.fp;
> > if (pcp)
> > @@ -1811,8 +1817,8 @@ arm64_get_crash_notes(void)
> >
> > buf = GETBUF(SIZE(note_buf));
> >
> > - if (!(ms->panic_task_regs = malloc(kt->cpus * sizeof(struct
> > arm64_pt_regs))))
> > - error(FATAL, "cannot malloc panic_task_regs space\n");
> > + if (!(ms->panic_task_regs = calloc((size_t)kt->cpus, sizeof(struct
> > arm64_pt_regs))))
> > + error(FATAL, "cannot calloc panic_task_regs space\n");
> >
> > for (i = 0; i < kt->cpus; i++) {
> >
> > @@ -1828,6 +1834,32 @@ arm64_get_crash_notes(void)
> > note = (Elf64_Nhdr *)buf;
> > p = buf + sizeof(Elf64_Nhdr);
> >
> > + /*
> > + * dumpfiles created with qemu won't have crash_notes, but there will
> > + * be elf notes.
> > + */
> > + if (note->n_namesz == 0 && (DISKDUMP_DUMPFILE() || KDUMP_DUMPFILE())) {
> > + if (DISKDUMP_DUMPFILE())
> > + note = diskdump_get_prstatus_percpu(i);
> > + else if (KDUMP_DUMPFILE())
> > + note = netdump_get_prstatus_percpu(i);
> > + if (note) {
> > + /*
> > + * SIZE(note_buf) accounts for a "final note", which is a
> > + * trailing empty elf note header.
> > + */
> > + long notesz = SIZE(note_buf) - sizeof(Elf64_Nhdr);
> > +
> > + if (sizeof(Elf64_Nhdr) + roundup(note->n_namesz, 4) +
> > + note->n_descsz == notesz)
> > + BCOPY((char *)note, buf, notesz);
> > + } else {
> > + error(WARNING,
> > + "cannot find NT_PRSTATUS note for cpu: %d\n", i);
> > + continue;
> > + }
> > + }
> > +
> > if (note->n_type != NT_PRSTATUS) {
> > error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n");
> > goto fail;
> > diff --git a/defs.h b/defs.h
> > index 8e96461..dc79b9b 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -5048,6 +5048,7 @@ ulong cpu_map_addr(const char *type);
> > #define BT_EFRAME_TARGET (0x800000000000ULL)
> > #define BT_CPUMASK (0x1000000000000ULL)
> > #define BT_SHOW_ALL_REGS (0x2000000000000ULL)
> > +#define BT_REGS_NOT_FOUND (0x4000000000000ULL)
> > #define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
> >
> > #define BT_REF_HEXVAL (0x1)
> > @@ -5721,6 +5722,7 @@ void dump_registers_for_qemu_mem_dump(void);
> > void kdump_backup_region_init(void);
> > void display_regs_from_elf_notes(int, FILE *);
> > void display_ELF_note(int, int, void *, FILE *);
> > +void *netdump_get_prstatus_percpu(int);
> > #define PRSTATUS_NOTE (1)
> > #define QEMU_NOTE (2)
> >
> > diff --git a/netdump.c b/netdump.c
> > index bfa818f..3ff49f0 100644
> > --- a/netdump.c
> > +++ b/netdump.c
> > @@ -2345,6 +2345,27 @@ dump_Elf64_Nhdr(Elf64_Off offset, int store)
> > return len;
> > }
> >
> > +void *
> > +netdump_get_prstatus_percpu(int cpu)
> > +{
> > + int online;
> > +
> > + if ((cpu < 0) || (cpu >= nd->num_prstatus_notes))
> > + return NULL;
> > +
> > + /*
> > + * If no cpu mapping was done, then there must be
> > + * a one-to-one relationship between the number
> > + * of online cpus and the number of notes.
> > + */
> > + if ((online = get_cpus_online()) &&
> > + (online == kt->cpus) &&
> > + (online != nd->num_prstatus_notes))
> > + return NULL;
> > +
> > + return nd->nt_prstatus_percpu[cpu];
> > +}
> > +
> > /*
> > * Send the request to the proper architecture hander.
> > */
>
>
More information about the Crash-utility
mailing list