[Crash-utility] [PATCH] x86_64: Add Linux 5.8+ exception functions to check exception frame

HAGIO KAZUHITO(萩尾 一仁) k-hagio-ab at nec.com
Thu Apr 15 10:06:01 UTC 2021


Thanks for the comments.
-----Original Message-----
> 在 2021年04月06日 16:46, HAGIO KAZUHITO(萩尾 一仁) 写道:
> > Fix for 'bt' command and options on Linux 5.8-rc1 or later kernels
> > that contain merge commit 076f14be7fc942e112c94c841baec44124275cd0.
> > The merged patches changed the name of exception functions that
> > have been used by the crash utility to check the exception frame.
> > Without the patch, the command and options cannot display it.
> >
> > Before:
> >   crash> bt
> >   PID: 8752   TASK: ffff8f80cb244380  CPU: 2   COMMAND: "insmod"
> >    #0 [ffffa3e40187f9f8] machine_kexec at ffffffffab25d267
> >    #1 [ffffa3e40187fa48] __crash_kexec at ffffffffab38e2ed
> >    #2 [ffffa3e40187fb10] crash_kexec at ffffffffab38f1dd
> >    #3 [ffffa3e40187fb28] oops_end at ffffffffab222cbd
> >    #4 [ffffa3e40187fb48] do_trap at ffffffffab21fea1
> >    #5 [ffffa3e40187fb90] do_error_trap at ffffffffab21ff75
> >    #6 [ffffa3e40187fbd0] exc_invalid_op at ffffffffabb76a2c
> >    #7 [ffffa3e40187fbf0] asm_exc_invalid_op at ffffffffabc00a72
> >    #8 [ffffa3e40187fc78] init_module at ffffffffc042b018 [invalid]
> >    #9 [ffffa3e40187fca0] init_module at ffffffffc042b018 [invalid]
> >   #10 [ffffa3e40187fca8] do_one_initcall at ffffffffab202806
> >   #11 [ffffa3e40187fd18] do_init_module at ffffffffab3888ba
> >   #12 [ffffa3e40187fd38] load_module at ffffffffab38afde
> >
> > After:
> >   crash> bt
> >   PID: 8752   TASK: ffff8f80cb244380  CPU: 2   COMMAND: "insmod"
> >    #0 [ffffa3e40187f9f8] machine_kexec at ffffffffab25d267
> >    #1 [ffffa3e40187fa48] __crash_kexec at ffffffffab38e2ed
> >    #2 [ffffa3e40187fb10] crash_kexec at ffffffffab38f1dd
> >    #3 [ffffa3e40187fb28] oops_end at ffffffffab222cbd
> >    #4 [ffffa3e40187fb48] do_trap at ffffffffab21fea1
> >    #5 [ffffa3e40187fb90] do_error_trap at ffffffffab21ff75
> >    #6 [ffffa3e40187fbd0] exc_invalid_op at ffffffffabb76a2c
> >    #7 [ffffa3e40187fbf0] asm_exc_invalid_op at ffffffffabc00a72
> >       [exception RIP: init_module+24]
> >       RIP: ffffffffc042b018  RSP: ffffa3e40187fca8  RFLAGS: 00010246
> >       RAX: 000000000000001c  RBX: 0000000000000000  RCX: 0000000000000000
> >       RDX: 0000000000000000  RSI: ffff8f80fbd18000  RDI: ffff8f80fbd18000
> >       RBP: ffffffffc042b000   R8: 000000000000029d   R9: 000000000000002c
> >       R10: 0000000000000000  R11: ffffa3e40187fb58  R12: ffffffffc042d018
> >       R13: ffffa3e40187fdf0  R14: ffffffffc042d000  R15: ffffa3e40187fe90
> >       ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
> >    #8 [ffffa3e40187fca0] init_module at ffffffffc042b018 [invalid]
> >    #9 [ffffa3e40187fca8] do_one_initcall at ffffffffab202806
> >   #10 [ffffa3e40187fd18] do_init_module at ffffffffab3888ba
> >   #11 [ffffa3e40187fd38] load_module at ffffffffab38afde
> >
> > Signed-off-by: Kazuhito Hagio <k-hagio-ab at nec.com>
> > ---
> >  defs.h   |  1 +
> >  x86_64.c | 43 ++++++++++++++++++++++++++++++++++++++++---
> >  2 files changed, 41 insertions(+), 3 deletions(-)
> >
> > diff --git a/defs.h b/defs.h
> > index c29b3fa..f9c711c 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -6026,6 +6026,7 @@ struct machine_specific {
> >  	ulong cpu_entry_area_start;
> >  	ulong cpu_entry_area_end;
> >  	ulong page_offset_force;
> > +	char **exception_functions;
> >  };
> >
> >  #define KSYMS_START    (0x1)
> > diff --git a/x86_64.c b/x86_64.c
> > index f5b2f7b..3f4a969 100644
> > --- a/x86_64.c
> > +++ b/x86_64.c
> > @@ -139,6 +139,9 @@ static void orc_dump(ulong);
> >
> >  struct machine_specific x86_64_machine_specific = { 0 };
> >
> > +static const char *exception_functions_orig[];
> > +static const char *exception_functions_5_8[];
> > +
> >  /*
> >   *  Do all necessary machine-specific setup here.  This is called several
> >   *  times during initialization.
> > @@ -735,6 +738,12 @@ x86_64_init(int when)
> >  		STRUCT_SIZE_INIT(percpu_data, "percpu_data");
> >
> >  		GART_init();
> > +
> > +		if (kernel_symbol_exists("asm_exc_divide_error"))
> 
> This symbol may be also changed in the future, I'm not sure if it could be better to use the kernel
> version. That can be consistent with the name of exception_functions_5_8. For example:
> 
> if (THIS_KERNEL_VERSION >= LINUX(5,8,0))
> ...

Hmm, but the symbol name in exception_function_5_8 has to be updated anyway
when it's changed, and the symbol search can provide "backport" compatibility.
So I thought this way would be better.

> 
> 
> > +			machdep->machspec->exception_functions = (char **)exception_functions_5_8;
> > +		else
> > +			machdep->machspec->exception_functions = (char **)exception_functions_orig;
> > +
> >  		break;
> >
> >  	case POST_VM:
> > @@ -1104,6 +1113,12 @@ x86_64_dump_machdep_table(ulong arg)
> >  		fprintf(fp, "%016lx\n", (ulong)ms->cpu_entry_area_end);
> >  	else
> >  		fprintf(fp, "(unused)\n");
> > +
> > +	fprintf(fp, "      excpetion_functions: ");
> > +	if (ms->exception_functions == (char **)exception_functions_5_8)
> > +		fprintf(fp, "excpetion_functions_5_8\n");
> > +	else
> > +		fprintf(fp, "excpetion_functions_orig\n");
> >  }
> >
> >  /*
> > @@ -3086,7 +3101,7 @@ text_lock_function(char *name, struct bt_info *bt, ulong locktext)
> >   * zeroentry xen_debug do_debug
> >   * zeroentry xen_int3 do_int3
> >  */
> > -static const char *exception_functions[] = {
> > +static const char *exception_functions_orig[] = {
> >  	"invalid_TSS",
> >  	"segment_not_present",
> >  	"alignment_check",
> > @@ -3109,6 +3124,28 @@ static const char *exception_functions[] = {
> >  	NULL,
> >  };
> >
> > +static const char *exception_functions_5_8[] = {
> > +	"asm_exc_invalid_tss",
> > +	"asm_exc_segment_not_present",
> > +	"asm_exc_alignment_check",
> > +	"asm_exc_general_protection",
> > +	"asm_exc_page_fault",
> > +	"asm_exc_divide_error",
> > +	"asm_exc_overflow",
> > +	"asm_exc_bounds",
> > +	"asm_exc_invalid_op",
> > +	"asm_exc_device_not_available",
> > +	"asm_exc_coproc_segment_overrun",
> > +	"asm_exc_spurious_interrupt_bug",
> > +	"asm_exc_coprocessor_error",
> > +	"asm_exc_simd_coprocessor_error",
> > +	"xen_asm_exc_stack_segment",
> > +	"xen_asm_exc_xen_hypervisor_callback",
> > +	"xen_asm_exc_debug"
> 
> For the asm_exc_debug, seems that it is not a specific xen exception any more? Can we remove the prefix
> "xen_" as "asm_exc_debug"?

Thanks, will fix.

> 
> BTW: I didn't see the asm_exc_nmi and asm_exc_double_fault in the above exception table. Are they missed
> or unnecessary?

My understanding is that for interrupts/exceptions like NMI and
double fault that have their specific exception stack, the crash
utility does not need their functions in the table to print the
exception frame:

crash> bt
PID: 0      TASK: ffffffffbca14840  CPU: 0   COMMAND: "swapper/0"
 #0 [fffffe000000bd20] machine_kexec at ffffffffbb064a1e
 #1 [fffffe000000bd70] __crash_kexec at ffffffffbb18ebaa
 #2 [fffffe000000be30] panic at ffffffffbb0dc5eb
 #3 [fffffe000000beb0] unknown_nmi_error.cold at ffffffffbb02c6b1
 #4 [fffffe000000bec8] exc_nmi at ffffffffbbb63e87
 #5 [fffffe000000bef0] end_repeat_nmi at ffffffffbbc014db
    [exception RIP: native_safe_halt+14]
    RIP: ffffffffbbb7384e  RSP: ffffffffbca03ea0  RFLAGS: 00000246
    RAX: ffffffffbbb73480  RBX: 0000000000000000  RCX: 0000000000000001
    RDX: 0000000000000000  RSI: 0000000000000083  RDI: 0000000000000000
    RBP: 0000000000000000   R8: ffff9e1b7bc1d5e0   R9: 0000000000000201
    R10: 00000e565d5115ef  R11: 0000000001dafec8  R12: 0000000000000000
    R13: 0000000000000000  R14: 00000000000000a5  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- <NMI exception stack> ---
 #6 [ffffffffbca03ea0] native_safe_halt at ffffffffbbb7384e

Actually the old exception function table does not have the functions
for NMI and double fault.

Thanks,
Kazu

> 
> 
> Thanks.
> Lianbo
> 
> > +	"xen_asm_exc_int3"
> > +	NULL,
> > +};
> > +
> >  /*
> >   *  print one entry of a stack trace
> >   */
> > @@ -3185,8 +3222,8 @@ x86_64_print_stack_entry(struct bt_info *bt, FILE *ofp, int level,
> >  	if ((THIS_KERNEL_VERSION >= LINUX(2,6,29)) &&
> >  	    (eframe_check == -1) && offset &&
> >  	    !(bt->flags & (BT_EXCEPTION_FRAME|BT_START|BT_SCHEDULE))) {
> > -		for (i = 0; exception_functions[i]; i++) {
> > -			if (STREQ(name, exception_functions[i])) {
> > +		for (i = 0; machdep->machspec->exception_functions[i]; i++) {
> > +			if (STREQ(name, machdep->machspec->exception_functions[i])) {
> >  				eframe_check = 8;
> >  				break;
> >  			}
> >





More information about the Crash-utility mailing list