[Crash-utility] [PATCH] x86_64: Add Linux 5.8+ exception functions to check exception frame

lijiang lijiang at redhat.com
Thu Apr 15 11:46:31 UTC 2021


在 2021年04月15日 18:06, HAGIO KAZUHITO(萩尾 一仁) 写道:
> Thanks for the comments.
> -----Original Message-----
>> 在 2021年04月06日 16:46, HAGIO KAZUHITO(萩尾 一仁) 写道:
>>> Fix for 'bt' command and options on Linux 5.8-rc1 or later kernels
>>> that contain merge commit 076f14be7fc942e112c94c841baec44124275cd0.
>>> The merged patches changed the name of exception functions that
>>> have been used by the crash utility to check the exception frame.
>>> Without the patch, the command and options cannot display it.
>>>
>>> Before:
>>>   crash> bt
>>>   PID: 8752   TASK: ffff8f80cb244380  CPU: 2   COMMAND: "insmod"
>>>    #0 [ffffa3e40187f9f8] machine_kexec at ffffffffab25d267
>>>    #1 [ffffa3e40187fa48] __crash_kexec at ffffffffab38e2ed
>>>    #2 [ffffa3e40187fb10] crash_kexec at ffffffffab38f1dd
>>>    #3 [ffffa3e40187fb28] oops_end at ffffffffab222cbd
>>>    #4 [ffffa3e40187fb48] do_trap at ffffffffab21fea1
>>>    #5 [ffffa3e40187fb90] do_error_trap at ffffffffab21ff75
>>>    #6 [ffffa3e40187fbd0] exc_invalid_op at ffffffffabb76a2c
>>>    #7 [ffffa3e40187fbf0] asm_exc_invalid_op at ffffffffabc00a72
>>>    #8 [ffffa3e40187fc78] init_module at ffffffffc042b018 [invalid]
>>>    #9 [ffffa3e40187fca0] init_module at ffffffffc042b018 [invalid]
>>>   #10 [ffffa3e40187fca8] do_one_initcall at ffffffffab202806
>>>   #11 [ffffa3e40187fd18] do_init_module at ffffffffab3888ba
>>>   #12 [ffffa3e40187fd38] load_module at ffffffffab38afde
>>>
>>> After:
>>>   crash> bt
>>>   PID: 8752   TASK: ffff8f80cb244380  CPU: 2   COMMAND: "insmod"
>>>    #0 [ffffa3e40187f9f8] machine_kexec at ffffffffab25d267
>>>    #1 [ffffa3e40187fa48] __crash_kexec at ffffffffab38e2ed
>>>    #2 [ffffa3e40187fb10] crash_kexec at ffffffffab38f1dd
>>>    #3 [ffffa3e40187fb28] oops_end at ffffffffab222cbd
>>>    #4 [ffffa3e40187fb48] do_trap at ffffffffab21fea1
>>>    #5 [ffffa3e40187fb90] do_error_trap at ffffffffab21ff75
>>>    #6 [ffffa3e40187fbd0] exc_invalid_op at ffffffffabb76a2c
>>>    #7 [ffffa3e40187fbf0] asm_exc_invalid_op at ffffffffabc00a72
>>>       [exception RIP: init_module+24]
>>>       RIP: ffffffffc042b018  RSP: ffffa3e40187fca8  RFLAGS: 00010246
>>>       RAX: 000000000000001c  RBX: 0000000000000000  RCX: 0000000000000000
>>>       RDX: 0000000000000000  RSI: ffff8f80fbd18000  RDI: ffff8f80fbd18000
>>>       RBP: ffffffffc042b000   R8: 000000000000029d   R9: 000000000000002c
>>>       R10: 0000000000000000  R11: ffffa3e40187fb58  R12: ffffffffc042d018
>>>       R13: ffffa3e40187fdf0  R14: ffffffffc042d000  R15: ffffa3e40187fe90
>>>       ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>>>    #8 [ffffa3e40187fca0] init_module at ffffffffc042b018 [invalid]
>>>    #9 [ffffa3e40187fca8] do_one_initcall at ffffffffab202806
>>>   #10 [ffffa3e40187fd18] do_init_module at ffffffffab3888ba
>>>   #11 [ffffa3e40187fd38] load_module at ffffffffab38afde
>>>
>>> Signed-off-by: Kazuhito Hagio <k-hagio-ab at nec.com>
>>> ---
>>>  defs.h   |  1 +
>>>  x86_64.c | 43 ++++++++++++++++++++++++++++++++++++++++---
>>>  2 files changed, 41 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/defs.h b/defs.h
>>> index c29b3fa..f9c711c 100644
>>> --- a/defs.h
>>> +++ b/defs.h
>>> @@ -6026,6 +6026,7 @@ struct machine_specific {
>>>  	ulong cpu_entry_area_start;
>>>  	ulong cpu_entry_area_end;
>>>  	ulong page_offset_force;
>>> +	char **exception_functions;
>>>  };
>>>
>>>  #define KSYMS_START    (0x1)
>>> diff --git a/x86_64.c b/x86_64.c
>>> index f5b2f7b..3f4a969 100644
>>> --- a/x86_64.c
>>> +++ b/x86_64.c
>>> @@ -139,6 +139,9 @@ static void orc_dump(ulong);
>>>
>>>  struct machine_specific x86_64_machine_specific = { 0 };
>>>
>>> +static const char *exception_functions_orig[];
>>> +static const char *exception_functions_5_8[];
>>> +
>>>  /*
>>>   *  Do all necessary machine-specific setup here.  This is called several
>>>   *  times during initialization.
>>> @@ -735,6 +738,12 @@ x86_64_init(int when)
>>>  		STRUCT_SIZE_INIT(percpu_data, "percpu_data");
>>>
>>>  		GART_init();
>>> +
>>> +		if (kernel_symbol_exists("asm_exc_divide_error"))
>>
>> This symbol may be also changed in the future, I'm not sure if it could be better to use the kernel
>> version. That can be consistent with the name of exception_functions_5_8. For example:
>>
>> if (THIS_KERNEL_VERSION >= LINUX(5,8,0))
>> ...
> 
> Hmm, but the symbol name in exception_function_5_8 has to be updated anyway
> when it's changed, and the symbol search can provide "backport" compatibility.
> So I thought this way would be better.
> 
Seems reasonable.

>>
>>
>>> +			machdep->machspec->exception_functions = (char **)exception_functions_5_8;
>>> +		else
>>> +			machdep->machspec->exception_functions = (char **)exception_functions_orig;
>>> +
>>>  		break;
>>>
>>>  	case POST_VM:
>>> @@ -1104,6 +1113,12 @@ x86_64_dump_machdep_table(ulong arg)
>>>  		fprintf(fp, "%016lx\n", (ulong)ms->cpu_entry_area_end);
>>>  	else
>>>  		fprintf(fp, "(unused)\n");
>>> +
>>> +	fprintf(fp, "      excpetion_functions: ");
>>> +	if (ms->exception_functions == (char **)exception_functions_5_8)
>>> +		fprintf(fp, "excpetion_functions_5_8\n");
>>> +	else
>>> +		fprintf(fp, "excpetion_functions_orig\n");
>>>  }
>>>
>>>  /*
>>> @@ -3086,7 +3101,7 @@ text_lock_function(char *name, struct bt_info *bt, ulong locktext)
>>>   * zeroentry xen_debug do_debug
>>>   * zeroentry xen_int3 do_int3
>>>  */
>>> -static const char *exception_functions[] = {
>>> +static const char *exception_functions_orig[] = {
>>>  	"invalid_TSS",
>>>  	"segment_not_present",
>>>  	"alignment_check",
>>> @@ -3109,6 +3124,28 @@ static const char *exception_functions[] = {
>>>  	NULL,
>>>  };
>>>
>>> +static const char *exception_functions_5_8[] = {
>>> +	"asm_exc_invalid_tss",
>>> +	"asm_exc_segment_not_present",
>>> +	"asm_exc_alignment_check",
>>> +	"asm_exc_general_protection",
>>> +	"asm_exc_page_fault",
>>> +	"asm_exc_divide_error",
>>> +	"asm_exc_overflow",
>>> +	"asm_exc_bounds",
>>> +	"asm_exc_invalid_op",
>>> +	"asm_exc_device_not_available",
>>> +	"asm_exc_coproc_segment_overrun",
>>> +	"asm_exc_spurious_interrupt_bug",
>>> +	"asm_exc_coprocessor_error",
>>> +	"asm_exc_simd_coprocessor_error",
>>> +	"xen_asm_exc_stack_segment",
>>> +	"xen_asm_exc_xen_hypervisor_callback",
>>> +	"xen_asm_exc_debug"
>>
>> For the asm_exc_debug, seems that it is not a specific xen exception any more? Can we remove the prefix
>> "xen_" as "asm_exc_debug"?
> 
> Thanks, will fix.
> >>
>> BTW: I didn't see the asm_exc_nmi and asm_exc_double_fault in the above exception table. Are they missed
>> or unnecessary?
> 
> My understanding is that for interrupts/exceptions like NMI and
> double fault that have their specific exception stack, the crash
> utility does not need their functions in the table to print the
> exception frame:
> 
OK, that answered my questions. Thanks for the explanation.

> crash> bt
> PID: 0      TASK: ffffffffbca14840  CPU: 0   COMMAND: "swapper/0"
>  #0 [fffffe000000bd20] machine_kexec at ffffffffbb064a1e
>  #1 [fffffe000000bd70] __crash_kexec at ffffffffbb18ebaa
>  #2 [fffffe000000be30] panic at ffffffffbb0dc5eb
>  #3 [fffffe000000beb0] unknown_nmi_error.cold at ffffffffbb02c6b1
>  #4 [fffffe000000bec8] exc_nmi at ffffffffbbb63e87
>  #5 [fffffe000000bef0] end_repeat_nmi at ffffffffbbc014db
>     [exception RIP: native_safe_halt+14]
>     RIP: ffffffffbbb7384e  RSP: ffffffffbca03ea0  RFLAGS: 00000246
>     RAX: ffffffffbbb73480  RBX: 0000000000000000  RCX: 0000000000000001
>     RDX: 0000000000000000  RSI: 0000000000000083  RDI: 0000000000000000
>     RBP: 0000000000000000   R8: ffff9e1b7bc1d5e0   R9: 0000000000000201
>     R10: 00000e565d5115ef  R11: 0000000001dafec8  R12: 0000000000000000
>     R13: 0000000000000000  R14: 00000000000000a5  R15: 0000000000000000
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
> --- <NMI exception stack> ---
>  #6 [ffffffffbca03ea0] native_safe_halt at ffffffffbbb7384e
> 
> Actually the old exception function table does not have the functions
> for NMI and double fault.
> 
> Thanks,
> Kazu
> 
>>
>>
>> Thanks.
>> Lianbo
>>
>>> +	"xen_asm_exc_int3"
>>> +	NULL,
>>> +};
>>> +
>>>  /*
>>>   *  print one entry of a stack trace
>>>   */
>>> @@ -3185,8 +3222,8 @@ x86_64_print_stack_entry(struct bt_info *bt, FILE *ofp, int level,
>>>  	if ((THIS_KERNEL_VERSION >= LINUX(2,6,29)) &&
>>>  	    (eframe_check == -1) && offset &&
>>>  	    !(bt->flags & (BT_EXCEPTION_FRAME|BT_START|BT_SCHEDULE))) {
>>> -		for (i = 0; exception_functions[i]; i++) {
>>> -			if (STREQ(name, exception_functions[i])) {
>>> +		for (i = 0; machdep->machspec->exception_functions[i]; i++) {
>>> +			if (STREQ(name, machdep->machspec->exception_functions[i])) {
>>>  				eframe_check = 8;
>>>  				break;
>>>  			}
>>>
> 




More information about the Crash-utility mailing list