[Crash-utility] [PATCH v2 4/6] ppc64: handle backtrace when CPU is in an emerency stack

lijiang lijiang at redhat.com
Tue Jul 5 01:18:59 UTC 2022


On Mon, Jul 4, 2022 at 4:39 PM HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab at nec.com>
wrote:

> On 2022/07/04 14:25, Hari Bathini wrote:
> > A CPU could be in an emergency stack when it is running in real mode
> > or any special scenario like TM bad thing. Also, there are dedicated
> > emergency stacks for machine check and system reset interrupt. Right
> > now, no backtrace is provided if a CPU is in any of these stacks.
> > This change ensures backtrace is processed appropriately even when
> > a CPU is in any one of these emergency stacks. Also, if stack info
> > cannot be found, print that message always instead of only when
> > verbose logs are enabled.
> >
> > Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
> > ---
> >
> > Changes in v2:
> > * Avoid using variable length array for paca_ptrs.
> >
> >
> >   defs.h  |  12 ++++
> >   ppc64.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
> >   2 files changed, 203 insertions(+), 12 deletions(-)
> >
> > diff --git a/defs.h b/defs.h
> > index d1d3ea9..9b1b69a 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -6288,6 +6288,13 @@ struct ppc64_elf_prstatus {
> >
> >   #ifdef PPC64
> >
> > +enum emergency_stack_type {
> > +     NONE_STACK              = 0,
> > +     EMERGENCY_STACK,
> > +     NMI_EMERGENCY_STACK,
> > +     MC_EMERGENCY_STACK
> > +};
> > +
> >   struct ppc64_opal {
> >       uint64_t base;
> >       uint64_t entry;
> > @@ -6307,6 +6314,11 @@ struct machine_specific {
> >           char *hwstackbuf;
> >           uint hwstacksize;
> >
> > +     /* Emergency stacks */
> > +     ulong *emergency_sp;
> > +     ulong *nmi_emergency_sp;
> > +     ulong *mc_emergency_sp;
> > +
> >       uint l4_index_size;
> >       uint l3_index_size;
> >       uint l2_index_size;
> > diff --git a/ppc64.c b/ppc64.c
> > index 0a3aa5f..8ea91c2 100644
> > --- a/ppc64.c
> > +++ b/ppc64.c
> > @@ -48,6 +48,10 @@ static ulong ppc64_get_stackbase(ulong);
> >   static ulong ppc64_get_stacktop(ulong);
> >   void ppc64_compiler_warning_stub(void);
> >   static ulong ppc64_in_irqstack(ulong);
> > +static enum emergency_stack_type ppc64_in_emergency_stack(int cpu,
> ulong addr,
> > +                                                       bool verbose);
> > +static void ppc64_set_bt_emergency_stack(enum emergency_stack_type type,
> > +                                      struct bt_info *bt);
> >   static char * ppc64_check_eframe(struct ppc64_pt_regs *);
> >   static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
> >               struct bt_info *);
> > @@ -56,6 +60,7 @@ static int ppc64_paca_percpu_offset_init(int);
> >   static void ppc64_init_cpu_info(void);
> >   static int ppc64_get_cpu_map(void);
> >   static void ppc64_clear_machdep_cache(void);
> > +static void ppc64_init_paca_info(void);
> >   static void ppc64_vmemmap_init(void);
> >   static int ppc64_get_kvaddr_ranges(struct vaddr_range *);
> >   static uint get_ptetype(ulong pte);
> > @@ -692,6 +697,8 @@ ppc64_init(int when)
> >                               error(FATAL, "cannot malloc hwirqstack
> buffer space.");
> >               }
> >
> > +             ppc64_init_paca_info();
> > +
> >               if (!machdep->hz) {
> >                       machdep->hz = HZ;
> >                       if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
> > @@ -1204,6 +1211,70 @@ ppc64_kvtop(struct task_context *tc, ulong kvaddr,
> >               return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0],
> paddr, verbose);
> >   }
> >
> > +static void
> > +ppc64_init_paca_info(void)
> > +{
> > +     struct machine_specific *ms = machdep->machspec;
> > +     ulong *paca_ptr;
> > +     int i;
> > +
> > +     if (!(paca_ptrs = (ulong *)calloc(kt->cpus, sizeof(ulong))))
>
> s/paca_ptrs/paca_ptr/
>
> ppc64.c: In function ‘ppc64_init_paca_info’:
> ppc64.c:1219:8: error: ‘paca_ptrs’ undeclared (first use in this
> function); did you mean ‘paca_ptr’?
>    if (!(paca_ptrs = (ulong *)calloc(kt->cpus, sizeof(ulong))))
>          ^~~~~~~~~
>          paca_ptr
> ppc64.c:1219:8: note: each undeclared identifier is reported only once for
> each function it appears in
>
> > +             error(FATAL, "cannot malloc paca pointers space.\n");
> > +
> > +     /* Get paca pointers for all CPUs. */
> > +     if (symbol_exists("paca_ptrs")) {
> > +             ulong paca_loc;
> > +
> > +             readmem(symbol_value("paca_ptrs"), KVADDR, &paca_loc,
> sizeof(void *),
> > +                     "paca double pointer", FAULT_ON_ERROR);
> > +             readmem(paca_loc, KVADDR, paca_ptr, sizeof(void *) *
> kt->cpus,
> > +                     "paca pointers", FAULT_ON_ERROR);
> > +     } else if (symbol_exists("paca") &&
> > +                (get_symbol_type("paca", NULL, NULL) == TYPE_CODE_PTR))
> {
> > +             readmem(symbol_value("paca"), KVADDR, paca_ptr,
> sizeof(void *) * kt->cpus,
> > +                     "paca pointers", FAULT_ON_ERROR);
> > +     } else {
> > +             free(paca_ptrs);
>
> Ditto.
>
> > +             return;
> > +     }
> > +
> > +     /* Initialize emergency stacks info. */
> > +     if (MEMBER_EXISTS("paca_struct", "emergency_sp")) {
> > +             ulong offset = MEMBER_OFFSET("paca_struct",
> "emergency_sp");
> > +
> > +             if (!(ms->emergency_sp = (ulong *)calloc(kt->cpus,
> sizeof(ulong))))
> > +                     error(FATAL, "cannot malloc emergency stack
> space.\n");
> > +             for (i = 0; i < kt->cpus; i++)
> > +                     readmem(paca_ptr[i] + offset, KVADDR,
> &ms->emergency_sp[i],
> > +                             sizeof(void *), "paca->emergency_sp",
> > +                             FAULT_ON_ERROR);
> > +     }
> > +
> > +     if (MEMBER_EXISTS("paca_struct", "nmi_emergency_sp")) {
> > +             ulong offset = MEMBER_OFFSET("paca_struct",
> "nmi_emergency_sp");
> > +
> > +             if (!(ms->nmi_emergency_sp = (ulong *)calloc(kt->cpus,
> sizeof(ulong))))
> > +                     error(FATAL, "cannot malloc NMI emergency stack
> space.\n");
> > +             for (i = 0; i < kt->cpus; i++)
> > +                     readmem(paca_ptr[i] + offset, KVADDR,
> &ms->nmi_emergency_sp[i],
> > +                             sizeof(void *), "paca->nmi_emergency_sp",
> > +                             FAULT_ON_ERROR);
> > +     }
> > +
> > +     if (MEMBER_EXISTS("paca_struct", "mc_emergency_sp")) {
> > +             ulong offset = MEMBER_OFFSET("paca_struct",
> "mc_emergency_sp");
> > +
> > +             if (!(ms->mc_emergency_sp = (ulong *)calloc(kt->cpus,
> sizeof(ulong))))
> > +                     error(FATAL, "cannot malloc machine check
> emergency stack space.\n");
> > +             for (i = 0; i < kt->cpus; i++)
> > +                     readmem(paca_ptr[i] + offset, KVADDR,
> &ms->mc_emergency_sp[i],
> > +                             sizeof(void *), "paca->mc_emergency_sp",
> > +                             FAULT_ON_ERROR);
> > +     }
> > +
> > +     free(paca_ptrs);
>
> Ditto.
>
>
Thank you for the patchset, Hari.

Aside from the compilation issues, other changes look good to me and tests
are ok, so:
for the v2: ACK.

But these typos can be fixed when merging, no need to respin for this.
>
>
Kazu, can you help to add the following kernel commit to patch log? It will
help track the history of change.
[1] 729b0f715371 ("powerpc/book3s: Introduce exclusive emergency stack for
machine check exception.")
[2] b1ee8a3de579 ("powerpc/64s: Dedicated system reset interrupt stack")

Thanks.
Lianbo

Thanks,
> Kazu
>
>
> > +}
> > +
> >   /*
> >    *  Verify that the kernel has made the vmemmap list available,
> >    *  and if so, stash the relevant data required to make vtop
> > @@ -1755,6 +1826,11 @@ ppc64_eframe_search(struct bt_info *bt_in)
> >                       addr = bt->stackbase +
> >                               roundup(SIZE(thread_info), sizeof(ulong));
> >               } else if (!INSTACK(addr, bt)) {
> > +                     enum emergency_stack_type estype;
> > +
> > +                     if ((estype =
> ppc64_in_emergency_stack(bt->tc->processor, addr, false)))
> > +                             ppc64_set_bt_emergency_stack(estype, bt);
> > +
> >                       /*
> >                        * If the user specified SP is in HW interrupt
> stack
> >                        * (only for tasks running on other CPUs and in 2.4
> > @@ -1856,6 +1932,84 @@ ppc64_in_irqstack(ulong addr)
> >       return 0;
> >   }
> >
> > +/*
> > + * Check if the CPU is running in any of its emergency stacks.
> > + * Returns
> > + *   NONE_STACK          : if input is invalid or addr is not within
> any emergency stack.
> > + *   EMERGENCY_STACK     : if the addr is within emergency stack.
> > + *   NMI_EMERGENCY_STACK : if the addr is within NMI emergency stack.
> > + *   MC_EMERGENCY_STACK  : if the addr is within machine check
> emergency stack.
> > + */
> > +static enum emergency_stack_type
> > +ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose)
> > +{
> > +     struct machine_specific *ms = machdep->machspec;
> > +     ulong base, top;
> > +
> > +     if (cpu < 0  || cpu >= kt->cpus)
> > +             return NONE_STACK;
> > +
> > +     if (ms->emergency_sp) {
> > +             top = ms->emergency_sp[cpu];
> > +             base =  top - STACKSIZE();
> > +             if (addr >= base && addr < top) {
> > +                     if (verbose)
> > +                             fprintf(fp, "---<Emergency Stack>---\n");
> > +                     return EMERGENCY_STACK;
> > +             }
> > +     }
> > +
> > +     if (ms->nmi_emergency_sp) {
> > +             top = ms->nmi_emergency_sp[cpu];
> > +             base = top - STACKSIZE();
> > +             if (addr >= base && addr < top) {
> > +                     if (verbose)
> > +                             fprintf(fp, "---<NMI Emergency
> Stack>---\n");
> > +                     return NMI_EMERGENCY_STACK;
> > +             }
> > +     }
> > +
> > +     if (ms->mc_emergency_sp) {
> > +             top = ms->mc_emergency_sp[cpu];
> > +             base =  top - STACKSIZE();
> > +             if (addr >= base && addr < top) {
> > +                     if (verbose)
> > +                             fprintf(fp, "---<Machine Check Emergency
> Stack>---\n");
> > +                     return MC_EMERGENCY_STACK;
> > +             }
> > +     }
> > +
> > +     return NONE_STACK;
> > +}
> > +
> > +static void
> > +ppc64_set_bt_emergency_stack(enum emergency_stack_type type, struct
> bt_info *bt)
> > +{
> > +     struct machine_specific *ms = machdep->machspec;
> > +     ulong top;
> > +
> > +     switch (type) {
> > +     case EMERGENCY_STACK:
> > +             top = ms->emergency_sp[bt->tc->processor];
> > +             break;
> > +     case NMI_EMERGENCY_STACK:
> > +             top = ms->nmi_emergency_sp[bt->tc->processor];
> > +             break;
> > +     case MC_EMERGENCY_STACK:
> > +             top = ms->mc_emergency_sp[bt->tc->processor];
> > +             break;
> > +     default:
> > +             top = 0;
> > +             break;
> > +     }
> > +
> > +     if (top) {
> > +             bt->stackbase = top - STACKSIZE();
> > +             bt->stacktop = top;
> > +             alter_stackbuf(bt);
> > +     }
> > +}
> > +
> >   /*
> >    *  Unroll a kernel stack.
> >    */
> > @@ -1936,10 +2090,13 @@ ppc64_back_trace_cmd(struct bt_info *bt)
> >   static void
> >   ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
> >   {
> > -     int frame = 0;
> > -     ulong lr = 0; /* hack...need to pass in initial lr reg */
> > +     enum emergency_stack_type estype;
> >       ulong newpc = 0, newsp, marker;
> > +     int c = bt->tc->processor;
> > +     ulong nmi_sp = 0;
> >       int eframe_found;
> > +     int frame = 0;
> > +     ulong lr = 0; /* hack...need to pass in initial lr reg */
> >
> >       if (!INSTACK(req->sp, bt)) {
> >               ulong irqstack;
> > @@ -1949,6 +2106,10 @@ ppc64_back_trace(struct gnu_request *req, struct
> bt_info *bt)
> >                       bt->stackbase = irqstack;
> >                       bt->stacktop = bt->stackbase + STACKSIZE();
> >                       alter_stackbuf(bt);
> > +             } else if ((estype = ppc64_in_emergency_stack(c, req->sp,
> true))) {
> > +                     if (estype == NMI_EMERGENCY_STACK)
> > +                             nmi_sp = req->sp;
> > +                     ppc64_set_bt_emergency_stack(estype, bt);
> >               } else if (ms->hwintrstack) {
> >                       bt->stacktop = ms->hwintrstack[bt->tc->processor] +
> >                               sizeof(ulong);
> > @@ -1957,9 +2118,7 @@ ppc64_back_trace(struct gnu_request *req, struct
> bt_info *bt)
> >                       bt->stackbuf = ms->hwstackbuf;
> >                       alter_stackbuf(bt);
> >               } else {
> > -                     if (CRASHDEBUG(1)) {
> > -                             fprintf(fp, "cannot find the stack
> info.\n");
> > -                     }
> > +                     fprintf(fp, "cannot find the stack info.\n");
> >                       return;
> >               }
> >       }
> > @@ -1989,13 +2148,20 @@ ppc64_back_trace(struct gnu_request *req, struct
> bt_info *bt)
> >                               newsp =
> >                               *(ulong *)&bt->stackbuf[newsp -
> bt->stackbase];
> >                       if (!INSTACK(newsp, bt)) {
> > -                             /*
> > -                             * Switch HW interrupt stack to process's
> stack.
> > -                             */
> > -                             bt->stackbase = GET_STACKBASE(bt->task);
> > -                             bt->stacktop = GET_STACKTOP(bt->task);
> > -                             alter_stackbuf(bt);
> > -                     }
> > +                             if ((estype = ppc64_in_emergency_stack(c,
> newsp, true))) {
> > +                                     if (!nmi_sp && estype ==
> NMI_EMERGENCY_STACK)
> > +                                             nmi_sp = newsp;
> > +
>  ppc64_set_bt_emergency_stack(estype, bt);
> > +                             } else {
> > +                                     /*
> > +                                      * Switch HW interrupt stack or
> emergency stack
> > +                                      * to process's stack.
> > +                                      */
> > +                                     bt->stackbase =
> GET_STACKBASE(bt->task);
> > +                                     bt->stacktop =
> GET_STACKTOP(bt->task);
> > +                                     alter_stackbuf(bt);
> > +                             }
> > +                     }
> >                       if (IS_KVADDR(newsp) && INSTACK(newsp, bt))
> >                               newpc = *(ulong *)&bt->stackbuf[newsp + 16
> -
> >                                               bt->stackbase];
> > @@ -2039,6 +2205,16 @@ ppc64_back_trace(struct gnu_request *req, struct
> bt_info *bt)
> >                       }
> >               }
> >
> > +             /*
> > +              * NMI stack may not be re-entrant. In so, an SP in the
> NMI stack
> > +              * is likely to point back to an SP within the NMI stack,
> in case
> > +              * of a nested NMI.
> > +              */
> > +             if (nmi_sp && nmi_sp == newsp) {
> > +                     fprintf(fp, "---<Nested NMI>---\n");
> > +                     break;
> > +             }
> > +
> >               /*
> >                * Some Linux 3.7 kernel threads have been seen to have
> >                * their end-of-trace stack linkage pointer pointing
> > @@ -2416,6 +2592,9 @@ ppc64_get_dumpfile_stack_frame(struct bt_info
> *bt_in, ulong *nip, ulong *ksp)
> >               pt_regs = (struct ppc64_pt_regs *)bt->machdep;
> >               ur_nip = pt_regs->nip;
> >               ur_ksp = pt_regs->gpr[1];
> > +             /* Print the collected regs for panic task. */
> > +             ppc64_print_regs(pt_regs);
> > +             ppc64_print_nip_lr(pt_regs, 1);
> >       } else if ((pc->flags & KDUMP) ||
> >                  ((pc->flags & DISKDUMP) &&
> >                   (*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/crash-utility/attachments/20220705/87fa82e9/attachment-0001.htm>


More information about the Crash-utility mailing list