[Crash-utility] [PATCH] Bugfix and optimization for ARM64 getting crash notes

Dave Anderson anderson at redhat.com
Tue Dec 10 16:51:12 UTC 2019



----- Original Message -----
> From: chenqiwu <chenqiwu at xiaomi.com>
> 
> 1) ARM64 call arm64_get_crash_notes() to retrieve active task
> registers when POST_VM before calling map_cpus_to_prstatus()
> to remap the NT_PRSTATUS elf notes to the online cpus. It's
> better to call arm64_get_crash_notes() when POST_INIT.
> 2) arm64_get_crash_notes() check the sanity of NT_PRSTATUS notes
> only for online cpus. If one cpu contains invalid note, it's
> better to continue finding the crash notes for other online cpus.
> So we can extract the backtraces for the online cpus which contain
> valid note by using command "bt -a".
> 3) map_cpus_to_prstatus() remap the NT_PRSTATUS notes only to the
> online cpus. Make sure there must be a one-to-one relationship
> between the number of online cpus and the number of notes.

The code in map_cpus_to_prstatus() and map_cpus_to_prstatus_kdump_cmprs()
has been in place forever.  Both the nd->nt_prstatus_percpu[] and 
dd->nt_prstatus_percpu[] arrays are per-cpu regardless whether
they are online or offline.  However, since kdump only creates 
NT_PRSTATUS notes for on-line cpus, the "i" index is needed for
each cpu, and the "j" index is needed for the existing NT_PRSTATUS
notes.  If a cpu is offline, its nt_prstatus_percpu[] entry will be 
zeroed out.  

I'm not arguing that the arm64 online-cpu handling may be suspect, but
your patch should not be making changes to architectural-neutral code
unless the issue affects all architectures.  So please leave those
two functions alone.

Dave


> 
> Signed-off-by: chenqiwu <chenqiwu at xiaomi.com>
> ---
>  arm64.c    | 49 +++++++++++++++++++++++++++++--------------------
>  diskdump.c |  9 +++------
>  netdump.c  |  4 ++--
>  3 files changed, 34 insertions(+), 28 deletions(-)
> 
> diff --git a/arm64.c b/arm64.c
> index 233029d..cbad461 100644
> --- a/arm64.c
> +++ b/arm64.c
> @@ -458,7 +458,7 @@ arm64_init(int when)
>  		arm64_stackframe_init()
>  		break;
>  
> -	case POST_VM:
> +	case POST_INIT:
>  		/*
>  		 * crash_notes contains machine specific information about the
>  		 * crash. In particular, it contains CPU registers at the time
> @@ -3587,7 +3587,7 @@ arm64_get_crash_notes(void)
>  	ulong offset;
>  	char *buf, *p;
>  	ulong *notes_ptrs;
> -	ulong i;
> +	ulong i, j;
>  
>  	if (!symbol_exists("crash_notes"))
>  		return FALSE;
> @@ -3620,12 +3620,12 @@ arm64_get_crash_notes(void)
>  	if (!(ms->panic_task_regs = calloc((size_t)kt->cpus, sizeof(struct
>  	arm64_pt_regs))))
>  		error(FATAL, "cannot calloc panic_task_regs space\n");
>  	
> -	for  (i = 0; i < kt->cpus; i++) {
> -
> +	for  (i = 0, j = 0; i < kt->cpus; i++) {
>  		if (!readmem(notes_ptrs[i], KVADDR, buf, SIZE(note_buf),
>  		    "note_buf_t", RETURN_ON_ERROR)) {
> -			error(WARNING, "failed to read note_buf_t\n");
> -			goto fail;
> +			error(WARNING, "cpu#%d: failed to read note_buf_t\n", i);
> +			++j;
> +			continue;
>  		}
>  
>  		/*
> @@ -3655,19 +3655,29 @@ arm64_get_crash_notes(void)
>  				    note->n_descsz == notesz)
>  					BCOPY((char *)note, buf, notesz);
>  			} else {
> -				error(WARNING,
> -					"cannot find NT_PRSTATUS note for cpu: %d\n", i);
> +				if (CRASHDEBUG(1))
> +					error(WARNING,
> +						"cpu#%d: cannot find NT_PRSTATUS note\n", i);
> +				++j;
>  				continue;
>  			}
>  		}
>  
> +		/*
> +		 * Check the sanity of NT_PRSTATUS note only for each online cpu.
> +		 * If this cpu has invalid note, continue to find the crash notes
> +		 * for other online cpus.
> +		 */
>  		if (note->n_type != NT_PRSTATUS) {
> -			error(WARNING, "invalid note (n_type != NT_PRSTATUS)\n");
> -			goto fail;
> +			error(WARNING, "cpu#%d: invalid note (n_type != NT_PRSTATUS)\n", i);
> +			++j;
> +			continue;
>  		}
> -		if (p[0] != 'C' || p[1] != 'O' || p[2] != 'R' || p[3] != 'E') {
> -			error(WARNING, "invalid note (name != \"CORE\"\n");
> -			goto fail;
> +
> +		if (!STRNEQ(p, "CORE")) {
> +			error(WARNING, "cpu#%d: invalid note (name != \"CORE\")\n", i);
> +			++j;
> +			continue;
>  		}
>  
>  		/*
> @@ -3684,14 +3694,13 @@ arm64_get_crash_notes(void)
>  
>  	FREEBUF(buf);
>  	FREEBUF(notes_ptrs);
> -	return TRUE;
>  
> -fail:
> -	FREEBUF(buf);
> -	FREEBUF(notes_ptrs);
> -	free(ms->panic_task_regs);
> -	ms->panic_task_regs = NULL;
> -	return FALSE;
> +	if (j == kt->cpus) {
> +		free(ms->panic_task_regs);
> +		ms->panic_task_regs = NULL;
> +		return FALSE;
> +	}
> +	return TRUE;
>  }
>  
>  static void
> diff --git a/diskdump.c b/diskdump.c
> index e88243e..12d8e9c 100644
> --- a/diskdump.c
> +++ b/diskdump.c
> @@ -130,12 +130,9 @@ map_cpus_to_prstatus_kdump_cmprs(void)
>  	 */
>  	nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
>  
> -	for (i = 0, j = 0; i < nrcpus; i++) {
> -		if (in_cpu_map(ONLINE_MAP, i)) {
> -			dd->nt_prstatus_percpu[i] = nt_ptr[j++];
> -			dd->num_prstatus_notes =
> -				MAX(dd->num_prstatus_notes, i+1);
> -		}
> +	for (i = 0; i < nrcpus; i++) {
> +		if (in_cpu_map(ONLINE_MAP, i))
> +			dd->nt_prstatus_percpu[i] = nt_ptr[i];
>  	}
>  
>  	FREEBUF(nt_ptr);
> diff --git a/netdump.c b/netdump.c
> index 406416a..849638a 100644
> --- a/netdump.c
> +++ b/netdump.c
> @@ -97,9 +97,9 @@ map_cpus_to_prstatus(void)
>  	 */
>  	nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
>  
> -	for (i = 0, j = 0; i < nrcpus; i++) {
> +	for (i = 0; i < nrcpus; i++) {
>  		if (in_cpu_map(ONLINE_MAP, i))
> -			nd->nt_prstatus_percpu[i] = nt_ptr[j++];
> +			nd->nt_prstatus_percpu[i] = nt_ptr[i];
>  	}
>  
>  	FREEBUF(nt_ptr);
> --
> 1.9.1
> 
> 




More information about the Crash-utility mailing list