[Crash-utility] [PATCH v2 4/6] ppc64: handle backtrace when CPU is in an emerency stack
lijiang
lijiang at redhat.com
Tue Jul 5 01:18:59 UTC 2022
On Mon, Jul 4, 2022 at 4:39 PM HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab at nec.com>
wrote:
> On 2022/07/04 14:25, Hari Bathini wrote:
> > A CPU could be in an emergency stack when it is running in real mode
> > or any special scenario like TM bad thing. Also, there are dedicated
> > emergency stacks for machine check and system reset interrupt. Right
> > now, no backtrace is provided if a CPU is in any of these stacks.
> > This change ensures backtrace is processed appropriately even when
> > a CPU is in any one of these emergency stacks. Also, if stack info
> > cannot be found, print that message always instead of only when
> > verbose logs are enabled.
> >
> > Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
> > ---
> >
> > Changes in v2:
> > * Avoid using variable length array for paca_ptrs.
> >
> >
> > defs.h | 12 ++++
> > ppc64.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
> > 2 files changed, 203 insertions(+), 12 deletions(-)
> >
> > diff --git a/defs.h b/defs.h
> > index d1d3ea9..9b1b69a 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -6288,6 +6288,13 @@ struct ppc64_elf_prstatus {
> >
> > #ifdef PPC64
> >
> > +enum emergency_stack_type {
> > + NONE_STACK = 0,
> > + EMERGENCY_STACK,
> > + NMI_EMERGENCY_STACK,
> > + MC_EMERGENCY_STACK
> > +};
> > +
> > struct ppc64_opal {
> > uint64_t base;
> > uint64_t entry;
> > @@ -6307,6 +6314,11 @@ struct machine_specific {
> > char *hwstackbuf;
> > uint hwstacksize;
> >
> > + /* Emergency stacks */
> > + ulong *emergency_sp;
> > + ulong *nmi_emergency_sp;
> > + ulong *mc_emergency_sp;
> > +
> > uint l4_index_size;
> > uint l3_index_size;
> > uint l2_index_size;
> > diff --git a/ppc64.c b/ppc64.c
> > index 0a3aa5f..8ea91c2 100644
> > --- a/ppc64.c
> > +++ b/ppc64.c
> > @@ -48,6 +48,10 @@ static ulong ppc64_get_stackbase(ulong);
> > static ulong ppc64_get_stacktop(ulong);
> > void ppc64_compiler_warning_stub(void);
> > static ulong ppc64_in_irqstack(ulong);
> > +static enum emergency_stack_type ppc64_in_emergency_stack(int cpu,
> ulong addr,
> > + bool verbose);
> > +static void ppc64_set_bt_emergency_stack(enum emergency_stack_type type,
> > + struct bt_info *bt);
> > static char * ppc64_check_eframe(struct ppc64_pt_regs *);
> > static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
> > struct bt_info *);
> > @@ -56,6 +60,7 @@ static int ppc64_paca_percpu_offset_init(int);
> > static void ppc64_init_cpu_info(void);
> > static int ppc64_get_cpu_map(void);
> > static void ppc64_clear_machdep_cache(void);
> > +static void ppc64_init_paca_info(void);
> > static void ppc64_vmemmap_init(void);
> > static int ppc64_get_kvaddr_ranges(struct vaddr_range *);
> > static uint get_ptetype(ulong pte);
> > @@ -692,6 +697,8 @@ ppc64_init(int when)
> > error(FATAL, "cannot malloc hwirqstack
> buffer space.");
> > }
> >
> > + ppc64_init_paca_info();
> > +
> > if (!machdep->hz) {
> > machdep->hz = HZ;
> > if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
> > @@ -1204,6 +1211,70 @@ ppc64_kvtop(struct task_context *tc, ulong kvaddr,
> > return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0],
> paddr, verbose);
> > }
> >
> > +static void
> > +ppc64_init_paca_info(void)
> > +{
> > + struct machine_specific *ms = machdep->machspec;
> > + ulong *paca_ptr;
> > + int i;
> > +
> > + if (!(paca_ptrs = (ulong *)calloc(kt->cpus, sizeof(ulong))))
>
> s/paca_ptrs/paca_ptr/
>
> ppc64.c: In function ‘ppc64_init_paca_info’:
> ppc64.c:1219:8: error: ‘paca_ptrs’ undeclared (first use in this
> function); did you mean ‘paca_ptr’?
> if (!(paca_ptrs = (ulong *)calloc(kt->cpus, sizeof(ulong))))
> ^~~~~~~~~
> paca_ptr
> ppc64.c:1219:8: note: each undeclared identifier is reported only once for
> each function it appears in
>
> > + error(FATAL, "cannot malloc paca pointers space.\n");
> > +
> > + /* Get paca pointers for all CPUs. */
> > + if (symbol_exists("paca_ptrs")) {
> > + ulong paca_loc;
> > +
> > + readmem(symbol_value("paca_ptrs"), KVADDR, &paca_loc,
> sizeof(void *),
> > + "paca double pointer", FAULT_ON_ERROR);
> > + readmem(paca_loc, KVADDR, paca_ptr, sizeof(void *) *
> kt->cpus,
> > + "paca pointers", FAULT_ON_ERROR);
> > + } else if (symbol_exists("paca") &&
> > + (get_symbol_type("paca", NULL, NULL) == TYPE_CODE_PTR))
> {
> > + readmem(symbol_value("paca"), KVADDR, paca_ptr,
> sizeof(void *) * kt->cpus,
> > + "paca pointers", FAULT_ON_ERROR);
> > + } else {
> > + free(paca_ptrs);
>
> Ditto.
>
> > + return;
> > + }
> > +
> > + /* Initialize emergency stacks info. */
> > + if (MEMBER_EXISTS("paca_struct", "emergency_sp")) {
> > + ulong offset = MEMBER_OFFSET("paca_struct",
> "emergency_sp");
> > +
> > + if (!(ms->emergency_sp = (ulong *)calloc(kt->cpus,
> sizeof(ulong))))
> > + error(FATAL, "cannot malloc emergency stack
> space.\n");
> > + for (i = 0; i < kt->cpus; i++)
> > + readmem(paca_ptr[i] + offset, KVADDR,
> &ms->emergency_sp[i],
> > + sizeof(void *), "paca->emergency_sp",
> > + FAULT_ON_ERROR);
> > + }
> > +
> > + if (MEMBER_EXISTS("paca_struct", "nmi_emergency_sp")) {
> > + ulong offset = MEMBER_OFFSET("paca_struct",
> "nmi_emergency_sp");
> > +
> > + if (!(ms->nmi_emergency_sp = (ulong *)calloc(kt->cpus,
> sizeof(ulong))))
> > + error(FATAL, "cannot malloc NMI emergency stack
> space.\n");
> > + for (i = 0; i < kt->cpus; i++)
> > + readmem(paca_ptr[i] + offset, KVADDR,
> &ms->nmi_emergency_sp[i],
> > + sizeof(void *), "paca->nmi_emergency_sp",
> > + FAULT_ON_ERROR);
> > + }
> > +
> > + if (MEMBER_EXISTS("paca_struct", "mc_emergency_sp")) {
> > + ulong offset = MEMBER_OFFSET("paca_struct",
> "mc_emergency_sp");
> > +
> > + if (!(ms->mc_emergency_sp = (ulong *)calloc(kt->cpus,
> sizeof(ulong))))
> > + error(FATAL, "cannot malloc machine check
> emergency stack space.\n");
> > + for (i = 0; i < kt->cpus; i++)
> > + readmem(paca_ptr[i] + offset, KVADDR,
> &ms->mc_emergency_sp[i],
> > + sizeof(void *), "paca->mc_emergency_sp",
> > + FAULT_ON_ERROR);
> > + }
> > +
> > + free(paca_ptrs);
>
> Ditto.
>
>
Thank you for the patchset, Hari.
Aside from the compilation issues, other changes look good to me and tests
are ok, so:
for the v2: ACK.
But these typos can be fixed when merging, no need to respin for this.
>
>
Kazu, can you help to add the following kernel commit to patch log? It will
help track the history of change.
[1] 729b0f715371 ("powerpc/book3s: Introduce exclusive emergency stack for
machine check exception.")
[2] b1ee8a3de579 ("powerpc/64s: Dedicated system reset interrupt stack")
Thanks.
Lianbo
Thanks,
> Kazu
>
>
> > +}
> > +
> > /*
> > * Verify that the kernel has made the vmemmap list available,
> > * and if so, stash the relevant data required to make vtop
> > @@ -1755,6 +1826,11 @@ ppc64_eframe_search(struct bt_info *bt_in)
> > addr = bt->stackbase +
> > roundup(SIZE(thread_info), sizeof(ulong));
> > } else if (!INSTACK(addr, bt)) {
> > + enum emergency_stack_type estype;
> > +
> > + if ((estype =
> ppc64_in_emergency_stack(bt->tc->processor, addr, false)))
> > + ppc64_set_bt_emergency_stack(estype, bt);
> > +
> > /*
> > * If the user specified SP is in HW interrupt
> stack
> > * (only for tasks running on other CPUs and in 2.4
> > @@ -1856,6 +1932,84 @@ ppc64_in_irqstack(ulong addr)
> > return 0;
> > }
> >
> > +/*
> > + * Check if the CPU is running in any of its emergency stacks.
> > + * Returns
> > + * NONE_STACK : if input is invalid or addr is not within
> any emergency stack.
> > + * EMERGENCY_STACK : if the addr is within emergency stack.
> > + * NMI_EMERGENCY_STACK : if the addr is within NMI emergency stack.
> > + * MC_EMERGENCY_STACK : if the addr is within machine check
> emergency stack.
> > + */
> > +static enum emergency_stack_type
> > +ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose)
> > +{
> > + struct machine_specific *ms = machdep->machspec;
> > + ulong base, top;
> > +
> > + if (cpu < 0 || cpu >= kt->cpus)
> > + return NONE_STACK;
> > +
> > + if (ms->emergency_sp) {
> > + top = ms->emergency_sp[cpu];
> > + base = top - STACKSIZE();
> > + if (addr >= base && addr < top) {
> > + if (verbose)
> > + fprintf(fp, "---<Emergency Stack>---\n");
> > + return EMERGENCY_STACK;
> > + }
> > + }
> > +
> > + if (ms->nmi_emergency_sp) {
> > + top = ms->nmi_emergency_sp[cpu];
> > + base = top - STACKSIZE();
> > + if (addr >= base && addr < top) {
> > + if (verbose)
> > + fprintf(fp, "---<NMI Emergency
> Stack>---\n");
> > + return NMI_EMERGENCY_STACK;
> > + }
> > + }
> > +
> > + if (ms->mc_emergency_sp) {
> > + top = ms->mc_emergency_sp[cpu];
> > + base = top - STACKSIZE();
> > + if (addr >= base && addr < top) {
> > + if (verbose)
> > + fprintf(fp, "---<Machine Check Emergency
> Stack>---\n");
> > + return MC_EMERGENCY_STACK;
> > + }
> > + }
> > +
> > + return NONE_STACK;
> > +}
> > +
> > +static void
> > +ppc64_set_bt_emergency_stack(enum emergency_stack_type type, struct
> bt_info *bt)
> > +{
> > + struct machine_specific *ms = machdep->machspec;
> > + ulong top;
> > +
> > + switch (type) {
> > + case EMERGENCY_STACK:
> > + top = ms->emergency_sp[bt->tc->processor];
> > + break;
> > + case NMI_EMERGENCY_STACK:
> > + top = ms->nmi_emergency_sp[bt->tc->processor];
> > + break;
> > + case MC_EMERGENCY_STACK:
> > + top = ms->mc_emergency_sp[bt->tc->processor];
> > + break;
> > + default:
> > + top = 0;
> > + break;
> > + }
> > +
> > + if (top) {
> > + bt->stackbase = top - STACKSIZE();
> > + bt->stacktop = top;
> > + alter_stackbuf(bt);
> > + }
> > +}
> > +
> > /*
> > * Unroll a kernel stack.
> > */
> > @@ -1936,10 +2090,13 @@ ppc64_back_trace_cmd(struct bt_info *bt)
> > static void
> > ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
> > {
> > - int frame = 0;
> > - ulong lr = 0; /* hack...need to pass in initial lr reg */
> > + enum emergency_stack_type estype;
> > ulong newpc = 0, newsp, marker;
> > + int c = bt->tc->processor;
> > + ulong nmi_sp = 0;
> > int eframe_found;
> > + int frame = 0;
> > + ulong lr = 0; /* hack...need to pass in initial lr reg */
> >
> > if (!INSTACK(req->sp, bt)) {
> > ulong irqstack;
> > @@ -1949,6 +2106,10 @@ ppc64_back_trace(struct gnu_request *req, struct
> bt_info *bt)
> > bt->stackbase = irqstack;
> > bt->stacktop = bt->stackbase + STACKSIZE();
> > alter_stackbuf(bt);
> > + } else if ((estype = ppc64_in_emergency_stack(c, req->sp,
> true))) {
> > + if (estype == NMI_EMERGENCY_STACK)
> > + nmi_sp = req->sp;
> > + ppc64_set_bt_emergency_stack(estype, bt);
> > } else if (ms->hwintrstack) {
> > bt->stacktop = ms->hwintrstack[bt->tc->processor] +
> > sizeof(ulong);
> > @@ -1957,9 +2118,7 @@ ppc64_back_trace(struct gnu_request *req, struct
> bt_info *bt)
> > bt->stackbuf = ms->hwstackbuf;
> > alter_stackbuf(bt);
> > } else {
> > - if (CRASHDEBUG(1)) {
> > - fprintf(fp, "cannot find the stack
> info.\n");
> > - }
> > + fprintf(fp, "cannot find the stack info.\n");
> > return;
> > }
> > }
> > @@ -1989,13 +2148,20 @@ ppc64_back_trace(struct gnu_request *req, struct
> bt_info *bt)
> > newsp =
> > *(ulong *)&bt->stackbuf[newsp -
> bt->stackbase];
> > if (!INSTACK(newsp, bt)) {
> > - /*
> > - * Switch HW interrupt stack to process's
> stack.
> > - */
> > - bt->stackbase = GET_STACKBASE(bt->task);
> > - bt->stacktop = GET_STACKTOP(bt->task);
> > - alter_stackbuf(bt);
> > - }
> > + if ((estype = ppc64_in_emergency_stack(c,
> newsp, true))) {
> > + if (!nmi_sp && estype ==
> NMI_EMERGENCY_STACK)
> > + nmi_sp = newsp;
> > +
> ppc64_set_bt_emergency_stack(estype, bt);
> > + } else {
> > + /*
> > + * Switch HW interrupt stack or
> emergency stack
> > + * to process's stack.
> > + */
> > + bt->stackbase =
> GET_STACKBASE(bt->task);
> > + bt->stacktop =
> GET_STACKTOP(bt->task);
> > + alter_stackbuf(bt);
> > + }
> > + }
> > if (IS_KVADDR(newsp) && INSTACK(newsp, bt))
> > newpc = *(ulong *)&bt->stackbuf[newsp + 16
> -
> > bt->stackbase];
> > @@ -2039,6 +2205,16 @@ ppc64_back_trace(struct gnu_request *req, struct
> bt_info *bt)
> > }
> > }
> >
> > + /*
> > + * NMI stack may not be re-entrant. In so, an SP in the
> NMI stack
> > + * is likely to point back to an SP within the NMI stack,
> in case
> > + * of a nested NMI.
> > + */
> > + if (nmi_sp && nmi_sp == newsp) {
> > + fprintf(fp, "---<Nested NMI>---\n");
> > + break;
> > + }
> > +
> > /*
> > * Some Linux 3.7 kernel threads have been seen to have
> > * their end-of-trace stack linkage pointer pointing
> > @@ -2416,6 +2592,9 @@ ppc64_get_dumpfile_stack_frame(struct bt_info
> *bt_in, ulong *nip, ulong *ksp)
> > pt_regs = (struct ppc64_pt_regs *)bt->machdep;
> > ur_nip = pt_regs->nip;
> > ur_ksp = pt_regs->gpr[1];
> > + /* Print the collected regs for panic task. */
> > + ppc64_print_regs(pt_regs);
> > + ppc64_print_nip_lr(pt_regs, 1);
> > } else if ((pc->flags & KDUMP) ||
> > ((pc->flags & DISKDUMP) &&
> > (*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/crash-utility/attachments/20220705/87fa82e9/attachment-0001.htm>
More information about the Crash-utility
mailing list