[Crash-utility] Problem in bt for ARM64

Dave Anderson anderson at redhat.com
Mon Oct 16 19:33:45 UTC 2017


Hi Takahiro,

One question about a segment of your patch that I can't test because
I don't have a 4.14 dumpfile.  Here in arm64_switch_stack(), you have
conditionalized the display of the exception frame:

@@ -2669,7 +2753,9 @@ arm64_switch_stack(struct bt_info *bt, struct arm64_stackframe *frame, FILE *ofp
        if (frame->fp == 0)
                return USER_MODE;

-       arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
+       if (!(machdep->flags & UNW_4_14))
+               arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
+
        return KERNEL_MODE;
 }

I don't understand -- what happens with a UNW_4_14 dumpfile?

Thanks,
  Dave




----- Original Message -----
> 
> Hi Takahiro,
> 
> Welcome back!  It's great to get you back in the fold again...
> 
> I just ran a quick test of your patch on a set of sample dumpfiles,
> and only found one issue.  It was on a 4.2 QEMU dump where all of the
> active tasks were running in user-space, and all of their "bt" commands
> fail immediately with a "zero-size" allocation message.  As it turns out,
> that was due to this GETBUF() call in arm64_in_kdump_text_on_irq_stack():
> 
> static int
> arm64_in_kdump_text_on_irq_stack(struct bt_info *bt)
> {
>         int cpu;
>         ulong stackbase;
>         char *stackbuf;
>         ulong *ptr, *start, *base;
>         struct machine_specific *ms;
> 
>         if ((machdep->flags & (IRQ_STACKS|KDUMP_ENABLED)) !=
>         (IRQ_STACKS|KDUMP_ENABLED))
>                 return FALSE;
> 
>         ms = machdep->machspec;
>         cpu = bt->tc->processor;
>         stackbase = ms->irq_stacks[cpu];
>         stackbuf = GETBUF(ms->irq_stack_size);  <===
> ...
> 
> ms->irq_stack_size is zero because that kernel version does not have IRQ
> stacks.
> As it turns out, the problem is that your reworked arm64_irq_stack_init() is
> setting
> the IRQ_STACKS flag unconditionally at the bottom of the function.  Moving
> the
> flag setting up into the two if-else segments fixes it.
> 
> I'll check out the patches in detail next week, but this looks good.
> 
> Thanks,
>   Dave
> 
>    
> 
> 
> 
> 
> 
> 
> 
> ----- Original Message -----
> > Dave,
> > 
> > On Fri, Sep 22, 2017 at 03:06:00PM -0400, Dave Anderson wrote:
> > > 
> > > Jan,
> > > 
> > > I went back to creating a machdep->machspec->user_eframe_offset value
> > > to be able to account for both the 4.7 and the upcoming 4.14 pt_regs
> > > changes:
> > > 
> > >   https://github.com/crash-utility/crash/commit/c975008e61121ef8785622c3bc26964da8fe0deb
> > >  
> > > Again, though, note that "bt" does not work with 4.14.
> > 
> > Even with your latest changes in 7.2.0, "bt" still has some issues:
> > a.register dump at exception frame doesn't have correct values
> >   (due to added stackframe[] in pt_regs)
> > b. tracing irq stack to process stack fails
> >   (due to irq-stack implementation changes and VMAP_STACK)
> > c."bt -o" seems to have been broken for a while
> > 
> > Attached is my tentative patch, which hopefully addresses (a) and (b).
> > While it is still far from perfect, it may help give you a heads-up.
> > 
> > Thanks,
> > -Takahiro AKASHI
> > 
> > ===8<===
> > >From 156ec115b2a436a0738908153d676f8eeed84cb1 Mon Sep 17 00:00:00 2001
> > From: AKASHI Takahiro <takahiro.akashi at linaro.org>
> > Date: Thu, 12 Oct 2017 10:46:34 +0900
> > Subject: [PATCH] arm64: backtrace for v4.14
> > 
> > ---
> >  arm64.c | 172
> >  ++++++++++++++++++++++++++++++++++++++++++++++++++--------------
> >  1 file changed, 136 insertions(+), 36 deletions(-)
> > 
> > diff --git a/arm64.c b/arm64.c
> > index 20c5d34..22c8556 100644
> > --- a/arm64.c
> > +++ b/arm64.c
> > @@ -72,6 +72,7 @@ static void arm64_cmd_mach(void);
> >  static void arm64_display_machine_stats(void);
> >  static int arm64_get_smp_cpus(void);
> >  static void arm64_clear_machdep_cache(void);
> > +static int arm64_on_process_stack(struct bt_info *, ulong);
> >  static int arm64_in_alternate_stack(int, ulong);
> >  static int arm64_on_irq_stack(int, ulong);
> >  static void arm64_set_irq_stack(struct bt_info *);
> > @@ -1336,29 +1337,60 @@ arm64_irq_stack_init(void)
> >  	req = &request;
> >  	struct machine_specific *ms = machdep->machspec;
> >  
> > -	if (!symbol_exists("irq_stack") ||
> > -	    !(sp = per_cpu_symbol_search("irq_stack")) ||
> > -	    !get_symbol_type("irq_stack", NULL, req) ||
> > -	    (req->typecode != TYPE_CODE_ARRAY) ||
> > -	    (req->target_typecode != TYPE_CODE_INT))
> > -		return;
> > +	if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus
> > +						* sizeof(ulong)))))
> > +		error(FATAL, "cannot malloc irq_stack addresses\n");
> >  
> > -	if (CRASHDEBUG(1)) {
> > -		fprintf(fp, "irq_stack: \n");
> > -		fprintf(fp, "  type: %s\n",
> > -			(req->typecode == TYPE_CODE_ARRAY) ? "TYPE_CODE_ARRAY" : "other");
> > -		fprintf(fp, "  target_typecode: %s\n",
> > -			req->target_typecode == TYPE_CODE_INT ? "TYPE_CODE_INT" : "other");
> > -		fprintf(fp, "  target_length: %ld\n", req->target_length);
> > -		fprintf(fp, "  length: %ld\n", req->length);
> > -	}
> > +	if (symbol_exists("irq_stack") &&
> > +	    (sp = per_cpu_symbol_search("irq_stack")) &&
> > +	    get_symbol_type("irq_stack", NULL, req)) {
> > +		/* before v4.14 or CONFIG_VMAP_STACK disabled */
> > +		if (CRASHDEBUG(1)) {
> > +			fprintf(fp, "irq_stack: \n");
> > +			fprintf(fp, "  type: %s\n",
> > +				(req->typecode == TYPE_CODE_ARRAY) ?
> > +						"TYPE_CODE_ARRAY" : "other");
> > +			fprintf(fp, "  target_typecode: %s\n",
> > +				req->target_typecode == TYPE_CODE_INT ?
> > +						"TYPE_CODE_INT" : "other");
> > +			fprintf(fp, "  target_length: %ld\n",
> > +						req->target_length);
> > +			fprintf(fp, "  length: %ld\n", req->length);
> > +		}
> >  
> > -	ms->irq_stack_size = req->length;
> > -	if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus *
> > sizeof(ulong)))))
> > -		error(FATAL, "cannot malloc irq_stack addresses\n");
> > +		ms->irq_stack_size = req->length;
> > +
> > +		for (i = 0; i < kt->cpus; i++)
> > +			ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value;
> > +	} else if (symbol_exists("irq_stack_ptr") &&
> > +		   (sp = per_cpu_symbol_search("irq_stack_ptr")) &&
> > +		   get_symbol_type("irq_stack_ptr", NULL, req)) {
> > +		/* v4.14 and later with CONFIG_VMAP_STACK enabled */
> > +		if (CRASHDEBUG(1)) {
> > +			fprintf(fp, "irq_stack_ptr: \n");
> > +			fprintf(fp, "  type: %x, %s\n",
> > +				(int)req->typecode,
> > +				(req->typecode == TYPE_CODE_PTR) ?
> > +						"TYPE_CODE_PTR" : "other");
> > +			fprintf(fp, "  target_typecode: %x, %s\n",
> > +				(int)req->target_typecode,
> > +				req->target_typecode == TYPE_CODE_INT ?
> > +						"TYPE_CODE_INT" : "other");
> > +			fprintf(fp, "  target_length: %ld\n",
> > +						req->target_length);
> > +			fprintf(fp, "  length: %ld\n", req->length);
> > +		}
> > +
> > +		ms->irq_stack_size = 16384;
> > +
> > +		for (i = 0; i < kt->cpus; i++) {
> > +			ulong p;
> >  
> > -	for (i = 0; i < kt->cpus; i++)
> > -		ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value;
> > +			p = kt->__per_cpu_offset[i] + sp->value;
> > +			readmem(p, KVADDR, &(ms->irq_stacks[i]), sizeof(ulong),
> > +					"IRQ stack pointer", RETURN_ON_ERROR);
> > +		}
> > +	}
> >  
> >  	machdep->flags |= IRQ_STACKS;
> >  }
> > @@ -1750,11 +1782,20 @@ arm64_display_full_frame(struct bt_info *bt, ulong
> > sp)
> >  	if (bt->frameptr == sp)
> >  		return;
> >  
> > -	if (!INSTACK(sp, bt) || !INSTACK(bt->frameptr, bt)) {
> > -		if (sp == 0)
> > -			sp = bt->stacktop - USER_EFRAME_OFFSET;
> > -		else
> > -			return;
> > +	if (INSTACK(bt->frameptr, bt)) {
> > +		if (INSTACK(sp, bt)) {
> > +			/* normal case */;
> > +		} else {
> > +			if (sp == 0)
> > +				/* interrupt in user mode */
> > +				sp = bt->stacktop - USER_EFRAME_OFFSET;
> > +			else
> > +				/* interrupt in kernel mode */
> > +				sp = bt->stacktop;
> > +		}
> > +	} else {
> > +		error(WARNING, "full display ?\n");
> > +		return;
> >  	}
> >  
> >  	words = (sp - bt->frameptr) / sizeof(ulong);
> > @@ -1873,6 +1914,25 @@ arm64_unwind_frame(struct bt_info *bt, struct
> > arm64_stackframe *frame)
> >  	 *  orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);   (pt_regs pointer
> >  	 on
> >  	 process stack)
> >  	 */
> >  	if (machdep->flags & IRQ_STACKS) {
> > +		if (machdep->flags & UNW_4_14) {
> > +			if ((bt->flags & BT_IRQSTACK) &&
> > +			    !arm64_on_irq_stack(bt->tc->processor, frame->fp)) {
> > +				if (arm64_on_process_stack(bt, frame->fp)) {
> > +					arm64_set_process_stack(bt);
> > +
> > +					frame->sp = frame->fp - SIZE(pt_regs) + 16;
> > +					/* for switch_stack */
> > +					/* fp still points to irq stack */
> > +					bt->bptr = fp;
> > +					/* for display_full_frame */
> > +					/* sp points to process stack */
> > +					bt->frameptr = frame->sp;
> > +				} else {
> > +					/* irq -> user */
> > +					return FALSE;
> > +				}
> > +			}
> > +		} else { /* !UNW_4_14 */
> >  		ms = machdep->machspec;
> >  		irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size -
> >  		16;
> >  
> > @@ -1896,6 +1956,7 @@ arm64_unwind_frame(struct bt_info *bt, struct
> > arm64_stackframe *frame)
> >  				return FALSE;
> >  			}
> >  		}
> > +		} /* UNW_4_14 */
> >  	}
> >  
> >  	return TRUE;
> > @@ -2086,10 +2147,17 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct
> > arm64_stackframe *frame,
> >  			 * We are on process stack. Just add a faked frame
> >  			 */
> >  
> > -			if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp))
> > -				frame->sp = ext_frame.fp
> > -					    - sizeof(struct arm64_pt_regs);
> > -			else {
> > +			if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) {
> > +				if (MEMBER_EXISTS("pt_regs", "stackframe")) {
> > +					frame->sp = ext_frame.fp
> > +						    - sizeof(struct arm64_pt_regs) - 16;
> > +					frame->fp = ext_frame.fp;
> > +				} else {
> > +					frame->sp = ext_frame.fp
> > +						    - sizeof(struct arm64_pt_regs);
> > +					frame->fp = frame->sp;
> > +				}
> > +			} else {
> >  				/*
> >  				 * FIXME: very exceptional case
> >  				 * We are already back on process stack, but
> > @@ -2109,10 +2177,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct
> > arm64_stackframe *frame,
> >  				 * Really ugly
> >  				 */
> >  				frame->sp = frame->fp + 0x20;
> > +				frame->fp = frame->sp;
> >  				fprintf(ofp, " (Next exception frame might be wrong)\n");
> >  			}
> >  
> > -			frame->fp = frame->sp;
> >  		} else {
> >  			/* We are on IRQ stack */
> >  
> > @@ -2122,9 +2190,15 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct
> > arm64_stackframe *frame,
> >  			if (ext_frame.fp != irq_stack_ptr) {
> >  				/* (2) Just add a faked frame */
> >  
> > -				frame->sp = ext_frame.fp
> > -					    - sizeof(struct arm64_pt_regs);
> > -				frame->fp = frame->sp;
> > +				if (MEMBER_EXISTS("pt_regs", "stackframe")) {
> > +					frame->sp = ext_frame.fp
> > +						    - sizeof(struct arm64_pt_regs);
> > +					frame->fp = ext_frame.fp;
> > +				} else {
> > +					frame->sp = ext_frame.fp
> > +						    - sizeof(struct arm64_pt_regs) - 16;
> > +					frame->fp = frame->sp;
> > +				}
> >  			} else {
> >  				/*
> >  				 * (3)
> > @@ -2304,11 +2378,19 @@ arm64_back_trace_cmd(struct bt_info *bt)
> >  		if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt))
> >  		{
> >  			if (!(bt->flags & BT_IRQSTACK) ||
> >  			    (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop)))
> > -				exception_frame = stackframe.fp - SIZE(pt_regs);
> > +			{
> > +				if (MEMBER_EXISTS("pt_regs", "stackframe"))
> > +					/* v4.14 or later */
> > +					exception_frame = stackframe.fp
> > +							  - SIZE(pt_regs) + 16;
> > +				else
> > +					exception_frame = stackframe.fp
> > +							  - SIZE(pt_regs);
> > +			}
> >  		}
> >  
> >  		if ((bt->flags & BT_IRQSTACK) &&
> > -		    !arm64_on_irq_stack(bt->tc->processor, stackframe.sp)) {
> > +		    !arm64_on_irq_stack(bt->tc->processor, stackframe.fp)) {
> >  			bt->flags &= ~BT_IRQSTACK;
> >  			if (arm64_switch_stack(bt, &stackframe, ofp) == USER_MODE)
> >  				break;
> > @@ -2424,6 +2506,8 @@ user_space:
> >  		 * otherwise show an exception frame.
> >  		 * Since exception entry code doesn't have a real
> >  		 * stackframe, we fake a dummy frame here.
> > +		 * Note: Since we have a real stack frame in pt_regs,
> > +		 * We no longer need a dummy frame on v4.14 or later.
> >  		 */
> >  		if (!arm64_in_exp_entry(stackframe.pc))
> >  			continue;
> > @@ -2669,7 +2753,9 @@ arm64_switch_stack(struct bt_info *bt, struct
> > arm64_stackframe *frame, FILE *ofp
> >  	if (frame->fp == 0)
> >  		return USER_MODE;
> >  
> > -	arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
> > +	if (!(machdep->flags & UNW_4_14))
> > +		arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
> > +
> >  	return KERNEL_MODE;
> >  }
> >  
> > @@ -3362,6 +3448,20 @@ arm64_clear_machdep_cache(void) {
> >  	return;
> >  }
> >  
> > +static int
> > +arm64_on_process_stack(struct bt_info *bt, ulong stkptr)
> > +{
> > +	ulong stackbase, stacktop;
> > +
> > +	stackbase = GET_STACKBASE(bt->task);
> > +	stacktop = GET_STACKTOP(bt->task);
> > +
> > +	if ((stkptr >= stackbase) && (stkptr < stacktop))
> > +		return TRUE;
> > +
> > +	return FALSE;
> > +}
> > +
> >  static int
> >  arm64_on_irq_stack(int cpu, ulong stkptr)
> >  {
> > --
> > 2.14.1
> > 
> > --
> > Crash-utility mailing list
> > Crash-utility at redhat.com
> > https://www.redhat.com/mailman/listinfo/crash-utility
> > 
> 




More information about the Crash-utility mailing list