[Crash-utility] [PATCH v2] Improve the performance of symbol searching for kernel modules

Philipp Rudo prudo at redhat.com
Mon Aug 30 15:49:32 UTC 2021


Hi Tao,

great patch. I have some comments and questions though.

On Sun, 22 Aug 2021 09:51:12 +0800
Tao Liu <ltao at redhat.com> wrote:

> Currently the sequence for crash searching a symbol is: 1) kernel symname
> hash table, 2) iterate all kernel symbols, 3) iterate all kernel modules
> and their symbols. In the worst case, if a non-exist symbol been searched,
> all 3 stages will be went through. The time consuming status for each stage
> is like:
> 
>     stage 1         stage 2         stage 3
>     0.007000(ms)    0.593000(ms)    2.421000(ms)
> 
> stage 3 takes too much time when comparing to stage 1. So let's introduce a
> symname hash table for kernel modules to improve the performance of symbol
> searching.
> 
> Signed-off-by: Tao Liu <ltao at redhat.com>
> ---
> v1 -> v2:
> - Removed unused variables within the modified functions.
> 
> ---
>  defs.h    |   1 +
>  kernel.c  |   1 +
>  symbols.c | 189 +++++++++++++++++++++++++++++++-----------------------
>  3 files changed, 111 insertions(+), 80 deletions(-)
> 
> diff --git a/defs.h b/defs.h
> index eb1c71b..58b8546 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2751,6 +2751,7 @@ struct symbol_table_data {
>          double val_hash_searches;
>          double val_hash_iterations;
>          struct syment *symname_hash[SYMNAME_HASH];
> +        struct syment *mod_symname_hash[SYMNAME_HASH];
   ^^^^^^^^
   there are quite some whitespace damages throughout the patch. Most
   of them seem to origin from old code that gets copy & pasted. It
   would be great if you could fix them on the lines you are touching.

>  	struct symbol_namespace kernel_namespace;
>  	struct syment *ext_module_symtable;
>  	struct syment *ext_module_symend;
> diff --git a/kernel.c b/kernel.c
> index 36fdea2..c56cc34 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -4663,6 +4663,7 @@ reinit_modules(void)
>          kt->mods_installed = 0;
>  	clear_text_value_cache();
>  
> +	memset(st->mod_symname_hash, 0, sizeof(st->mod_symname_hash));
>          module_init();
>  }
>  
> diff --git a/symbols.c b/symbols.c
> index bf6d94d..9b64734 100644
> --- a/symbols.c
> +++ b/symbols.c
> @@ -64,8 +64,9 @@ static int namespace_ctl(int, struct symbol_namespace *, void *, void *);
>  static void symval_hash_init(void);
>  static struct syment *symval_hash_search(ulong);
>  static void symname_hash_init(void);
> -static void symname_hash_install(struct syment *);
> -static struct syment *symname_hash_search(char *);
> +static void symname_hash_install(struct syment *[], struct syment *);
> +static void symname_hash_remove(struct syment *[], struct syment *);
> +static struct syment *symname_hash_search(struct syment *[], char *, int (*)(struct syment *, char *));
>  static void gnu_qsort(bfd *, void *, long, unsigned int, asymbol *, asymbol *);
>  static int check_gnu_debuglink(bfd *);
>  static int separate_debug_file_exists(const char *, unsigned long, int *);
> @@ -1119,7 +1120,7 @@ symname_hash_init(void)
>          struct syment *sp;
>  
>          for (sp = st->symtable; sp < st->symend; sp++) 
> -		symname_hash_install(sp);
> +		symname_hash_install(st->symname_hash, sp);
>  
>  	if ((sp = symbol_search("__per_cpu_start")))
>  		st->__per_cpu_start = sp->value;
> @@ -1127,21 +1128,48 @@ symname_hash_init(void)
>  		st->__per_cpu_end = sp->value;
>  }
>  
> +static void
> +mod_symtable_hash_install_range(struct syment *from, struct syment *to)
> +{
> +        struct syment *sp;
> +
> +	for (sp = from; sp <= to; sp++) {
> +		if (sp != NULL) {
> +			symname_hash_install(st->mod_symname_hash, sp);
> +		}
> +	}
> +}
> +
> +static void
> +mod_symtable_hash_remove_range(struct syment *from, struct syment *to)
> +{
> +        struct syment *sp;
> +
> +	for (sp = from; sp <= to; sp++) {
> +		if (sp != NULL) {
> +			symname_hash_remove(st->mod_symname_hash, sp);
> +		}
> +	}
> +}

1. 'if (sp)' is the same like 'if (sp != NULL)' but shorter.
   That's why personally I prefer the first version :)

2. Wouldn't it make sense to move this check to the beginning of
   symname_hash_{install,remove}? Then also the other users like
   symname_hash_init would benefit.

>  /*
>   *  Install a single static kernel symbol into the symname_hash.
>   */
>  static void
> -symname_hash_install(struct syment *spn)
> +symname_hash_install(struct syment *table[], struct syment *spn)
>  {
>  	struct syment *sp;
>          int index;
>  
>          index = SYMNAME_HASH_INDEX(spn->name);
> +	index = index > 0 ? index : -index;

true, in theory index could be negative which would be really bad. But
isn't that an independent problem from the rest of the patch? If so
this change should go in an extra patch.
Furthermore the fix should be done in SYMNAME_HASH_INDEX so that all of
its users are fixed. The way I see it this should be enough (using abs
from stdlib.h)

  #define SYMNAME_HASH_INDEX(name) \
- ((name[0] ^ (name[strlen(name)-1] * name[strlen(name)/2])) % SYMNAME_HASH)
+ (abs((name[0] ^ (name[strlen(name)-1] * name[strlen(name)/2])) % SYMNAME_HASH))



>  	spn->cnt = 1;
>  
> -        if ((sp = st->symname_hash[index]) == NULL) 
> -        	st->symname_hash[index] = spn;
> -	else {
> +        if ((sp = table[index]) == NULL) {
> +		table[index] = spn;
> +		spn->name_hash_next = NULL;

similar to above, isn't explicitly setting spn->name_hash_next = NULL
an independent problem from the rest of the changes and thus should go
to a separate patch?

> +	} else {
>  		while (sp) {
>  	        	if (STREQ(sp->name, spn->name)) {
>  	                	sp->cnt++;
> @@ -1151,23 +1179,67 @@ symname_hash_install(struct syment *spn)
>  				sp = sp->name_hash_next;
>  			else {
>  				sp->name_hash_next = spn;
> +				spn->name_hash_next = NULL;
>  				break;
>  			}
>  		}
>  	}
>  }
>  
> +static void
> +symname_hash_remove(struct syment *table[], struct syment *spn)
> +{
> +	struct syment *sp, **tmp;
> +        int index, first_encounter = 1;
> +
> +        index = SYMNAME_HASH_INDEX(spn->name);
> +	index = index > 0 ? index : -index;
> +
> +        if ((sp = table[index]) == NULL)
> +		return;
> +
> +	for (tmp = &table[index], sp = table[index]; sp; ) {
> +		if (STREQ(sp->name, spn->name)) {
> +			if (sp != spn) {
> +				sp->cnt--;
> +				spn->cnt--;
> +			} else if (!first_encounter) {
> +				sp->cnt--;
> +			} else {
> +				*tmp = sp->name_hash_next;
> +				first_encounter = 0;
> +
> +				tmp = &(*tmp)->name_hash_next;
> +				sp = sp->name_hash_next;
> +				spn->name_hash_next = NULL;
> +				continue;
> +			}
> +		}
> +		tmp = &sp->name_hash_next;
> +		sp = sp->name_hash_next;

What do you need tmp for? The way I see it you only assign to it but
never really use it.

> +	}
> +}
> +
>  /*
>   *  Static kernel symbol value search
>   */
>  static struct syment *
> -symname_hash_search(char *name)
> +symname_hash_search(struct syment *table[], char *name,
> +		int (*skip_condition)(struct syment *, char *))

this line should be indented to match the open parentheses after the
function name.

>  {
>  	struct syment *sp;
> +	int index;
>  
> -        sp = st->symname_hash[SYMNAME_HASH_INDEX(name)];
> +	index = SYMNAME_HASH_INDEX(name);
> +	index = index > 0 ? index : -index;
> +        sp = table[index];
>  
>  	while (sp) {
> +		if (skip_condition && skip_condition(sp, name)) {
> +			sp = sp->name_hash_next;
> +			continue;
> +		}
> +
>  		if (STREQ(sp->name, name)) 
>  			return sp;
>  		sp = sp->name_hash_next;
> @@ -1595,6 +1667,7 @@ store_module_symbols_v1(ulong total, int mods_installed)
>  				lm->mod_symend = sp;
>  			}
>  		}
> +		mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
>  	}
>  
>  	st->flags |= MODULE_SYMS;
> @@ -2075,6 +2148,8 @@ store_module_symbols_v2(ulong total, int mods_installed)
>  				lm->mod_init_symend = sp;
>  			}
>  		}
> +		mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
> +		mod_symtable_hash_install_range(lm->mod_init_symtable, lm->mod_init_symend);
>  	}
>  
>  	st->flags |= MODULE_SYMS;
> @@ -4482,6 +4557,16 @@ symbol_query(char *s, char *print_pad, struct syment **spp)
>  	return(cnt);
>  }
>  
> +static int
> +skip_symbols(struct syment *sp, char *s)
> +{
> +	int pseudos, skip = 0;
> +	pseudos = (strstr(s, "_MODULE_START_") || strstr(s, "_MODULE_END_") ||
> +		strstr(s, "_MODULE_INIT_START_") || strstr(s, "_MODULE_INIT_END_"));
> +	if (!pseudos && MODULE_PSEUDO_SYMBOL(sp))
> +		skip = 1;
> +	return skip;
> +}

It took really long for me to wrap my head around what is happening
here but in the end I'm pretty sure that the extra filtering is
unnecessary and can simply be dropped without problem. To be fair
what you are doing seems correct it's just by cleaning up the code the
problem became more obvious...

Let's see what is happening here:

1) strstr returns a pointer to the start of the second string if is is
   contained in the first one and NULL otherwise. This means 'pseudos'
   is true if 's' contains any of the _MODULE_* strings, i.e. if s is a
   pseudo symbol.

2) MODULE_PSEUDO_SYMBOL does basically the same only that it checks
   'sp->name' instead of 's' and enforces that the "_MODULE_*" strings
   are at the beginning of the symbol name not just contained in it.

Let's look at the different cases

3.1) both 's' and 'sp' are proper, i.e. no pseudo, symbols
     This means skip_symbols returns false so symname_hash_search
     doesn't skip the symbol but compares 's' to 'sp->name' to check if
     'sp' is the symbol you are searching for. This is basically the
     case you want.

3.2) both 's' and 'sp' are pseudo symbols
     Again skip_symbols returns false and symname_hash_search compares
     's' with 'sp->name' to check if 'sp' is the symbol you are
     searching for. I'm not entirely sure if this way
     symname_hash_search is intended to be used but I also don't see a
     reason why it shouldn't be done.

3.3) 's' is a pseudo and 'sp' a proper symbol
     same like 3.2).

3.4) 's' is a proper symbol and 'sp' a psudo symbol
     here skip_symbols returns true and symname_hash_search skips this
     case.

So the only case that is filtered out is 3.4 in which 's' must not
contain any '_MODULES_*' while 'sp->name' has to start with one. But
that's something a simple STREQ can handle like in case 3.3. So what's
the point in having this extra filtering?

>  /*
>   *  Return the syment of a symbol.
> @@ -4489,58 +4574,16 @@ symbol_query(char *s, char *print_pad, struct syment **spp)
>  struct syment *
>  symbol_search(char *s)
>  {
> -	int i;
> -        struct syment *sp_hashed, *sp, *sp_end;
> -	struct load_module *lm;
> -	int pseudos, search_init;
> +        struct syment *sp_hashed, *sp;
>  
> -	sp_hashed = symname_hash_search(s);
> +	sp_hashed = symname_hash_search(st->symname_hash, s, NULL);
>  
>          for (sp = sp_hashed ? sp_hashed : st->symtable; sp < st->symend; sp++) {
>                  if (STREQ(s, sp->name)) 
>                          return(sp);
>          }
>  
> -	pseudos = (strstr(s, "_MODULE_START_") || strstr(s, "_MODULE_END_"));
> -	search_init = FALSE;
> -
> -        for (i = 0; i < st->mods_installed; i++) {
> -                lm = &st->load_modules[i];
> -		if (lm->mod_flags & MOD_INIT)
> -			search_init = TRUE;
> -		sp = lm->mod_symtable;
> -                sp_end = lm->mod_symend;
> -
> -                for ( ; sp <= sp_end; sp++) {
> -                	if (!pseudos && MODULE_PSEUDO_SYMBOL(sp))
> -                        	continue;
> -                	if (STREQ(s, sp->name))
> -                        	return(sp);
> -                }
> -        }
> -
> -	if (!search_init)
> -		return((struct syment *)NULL);
> -
> -	pseudos = (strstr(s, "_MODULE_INIT_START_") || strstr(s, "_MODULE_INIT_END_"));
> -
> -	for (i = 0; i < st->mods_installed; i++) {
> -		lm = &st->load_modules[i];
> -		if (!lm->mod_init_symtable)
> -			continue;
> -		sp = lm->mod_init_symtable;
> -		sp_end = lm->mod_init_symend;
> -
> -		for ( ; sp < sp_end; sp++) {
> -			if (!pseudos && MODULE_PSEUDO_SYMBOL(sp))
> -				continue;
> -
> -			if (STREQ(s, sp->name))
> -				return(sp);
> -		}
> -	}
> -
> -        return((struct syment *)NULL);
> +	return symname_hash_search(st->mod_symname_hash, s, skip_symbols);
>  }
>  
>  /*
> @@ -5432,33 +5475,13 @@ value_symbol(ulong value)
>  int
>  symbol_exists(char *symbol)
>  {
> -	int i;
> -        struct syment *sp, *sp_end;
> -	struct load_module *lm;
> +        struct syment *sp;
>  
> -	if ((sp = symname_hash_search(symbol)))
> +	if ((sp = symname_hash_search(st->symname_hash, symbol, NULL)))
>  		return TRUE;
>  
> -        for (i = 0; i < st->mods_installed; i++) {
> -                lm = &st->load_modules[i];
> -		sp = lm->mod_symtable;
> -                sp_end = lm->mod_symend;
> -
> -                for ( ; sp < sp_end; sp++) {
> -                	if (STREQ(symbol, sp->name))
> -                        	return(TRUE);
> -                }
> -
> -		if (lm->mod_init_symtable) {
> -			sp = lm->mod_init_symtable;
> -			sp_end = lm->mod_init_symend;
> -	
> -			for ( ; sp < sp_end; sp++) {
> -				if (STREQ(symbol, sp->name))
> -					return(TRUE);
> -			}
> -		}
> -	}
> +	if ((sp = symname_hash_search(st->mod_symname_hash, symbol, NULL)))
> +		return TRUE;
>  
>          return(FALSE);
>  }

Isn't this function basically identical to symbol_search and thus can
be abbreviated to

	return !!(symbol_search(symbol));

> @@ -5515,7 +5538,7 @@ kernel_symbol_exists(char *symbol)
>  {
>  	struct syment *sp;
>  
> -        if ((sp = symname_hash_search(symbol)))
> +        if ((sp = symname_hash_search(st->symname_hash, symbol, NULL)))
>                  return TRUE;
>  	else
>          	return FALSE;

same like above. This could be abbreviated to

	return !!(symname_hash_search(st->symname_hash, symbol, NULL));

> @@ -5527,7 +5550,7 @@ kernel_symbol_exists(char *symbol)
>  struct syment *
>  kernel_symbol_search(char *symbol)
>  {
> -	return symname_hash_search(symbol);
> +	return symname_hash_search(st->symname_hash, symbol, NULL);
>  }
>  
>  /*
> @@ -12464,8 +12487,10 @@ store_load_module_symbols(bfd *bfd, int dynamic, void *minisyms,
>  		error(INFO, "%s: last symbol: %s is not _MODULE_END_%s?\n",
>  			lm->mod_name, lm->mod_load_symend->name, lm->mod_name);
>  
> +	mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend);
>          lm->mod_symtable = lm->mod_load_symtable;
>          lm->mod_symend = lm->mod_load_symend;
> +	mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
>  
>  	lm->mod_flags &= ~MOD_EXT_SYMS;
>  	lm->mod_flags |= MOD_LOAD_SYMS;
> @@ -12495,6 +12520,7 @@ delete_load_module(ulong base_addr)
>          			req->name = lm->mod_namelist;
>          			gdb_interface(req); 
>  			}
> +			mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend);
>  			if (lm->mod_load_symtable) {
>                          	free(lm->mod_load_symtable);
>                                  namespace_ctl(NAMESPACE_FREE,
> @@ -12504,6 +12530,7 @@ delete_load_module(ulong base_addr)
>  				unlink_module(lm);
>  			lm->mod_symtable = lm->mod_ext_symtable;
>  			lm->mod_symend = lm->mod_ext_symend;
> +			mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
>  			lm->mod_flags &= ~(MOD_LOAD_SYMS|MOD_REMOTE|MOD_NOPATCH);
>  			lm->mod_flags |= MOD_EXT_SYMS;
>  			lm->mod_load_symtable = NULL;
> @@ -12532,6 +12559,7 @@ delete_load_module(ulong base_addr)
>                          	req->name = lm->mod_namelist;
>                          	gdb_interface(req);
>  			}
> +			mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend);
>  			if (lm->mod_load_symtable) {
>                          	free(lm->mod_load_symtable);
>  				namespace_ctl(NAMESPACE_FREE,
> @@ -12541,6 +12569,7 @@ delete_load_module(ulong base_addr)
>  				unlink_module(lm);
>  			lm->mod_symtable = lm->mod_ext_symtable;
>  			lm->mod_symend = lm->mod_ext_symend;
> +			mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend);
>                          lm->mod_flags &= ~(MOD_LOAD_SYMS|MOD_REMOTE|MOD_NOPATCH);
>                          lm->mod_flags |= MOD_EXT_SYMS;
>                          lm->mod_load_symtable = NULL;

I must admit I don't understand how the last two functions work, so I'm
relying on Kazu to comment on those.

Thanks
Philipp




More information about the Crash-utility mailing list