<div dir="ltr"><div dir="ltr">On Thu, May 11, 2023 at 12:35 PM HAGIO KAZUHITO(萩尾 一仁) <<a href="mailto:k-hagio-ab@nec.com" target="_blank">k-hagio-ab@nec.com</a>> wrote:<br></div><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Support:<br>
- sym -l<br>
- sym -M<br>
- sym -m module<br>
<br>
Signed-off-by: Kazuhito Hagio <<a href="mailto:k-hagio-ab@nec.com" target="_blank">k-hagio-ab@nec.com</a>><br>
---<br>
symbols.c | 234 +++++++++++++++++++++++++++++++++++++++++++++++++++---<br>
1 file changed, 225 insertions(+), 9 deletions(-)<br>
<br>
diff --git a/symbols.c b/symbols.c<br>
index 88849833bada..669fa2e2f3da 100644<br>
--- a/symbols.c<br>
+++ b/symbols.c<br>
@@ -103,9 +103,16 @@ static void free_structure(struct struct_elem *);<br>
static unsigned char is_right_brace(const char *);<br>
static struct struct_elem *find_node(struct struct_elem *, char *);<br>
static void dump_node(struct struct_elem *, char *, unsigned char, unsigned char);<br>
-static int _in_module(ulong, struct load_module *, int);<br>
+<br>
+static int module_mem_type(ulong, struct load_module *);<br>
static ulong module_mem_end(ulong, struct load_module *);<br>
+static int _in_module(ulong, struct load_module *, int);<br>
+struct syment *value_search_module_v2(ulong, ulong *);<br>
<br>
+static const char *module_start_tags[];<br>
+static const char *module_start_strs[];<br>
+static const char *module_end_tags[];<br>
+static const char *module_end_strs[];<br>
<br>
/*<br>
* structure/union printing stuff<br>
@@ -1270,10 +1277,14 @@ symname_hash_search(struct syment *table[], char *name)<br>
* Output for sym -[lL] command.<br>
*/<br>
<br>
+#define MODULE_PSEUDO_SYMBOL(sp) (STRNEQ((sp)->name, "_MODULE_"))<br>
+<br></blockquote><div><br></div><div>Good understanding. But I'm concerned if the name "_MODULE_" is too short to distinguish kernel symbols from (pseudo)module symbols, although I do not see any kernel symbols with the prefix "_MODULE_".</div><div><br></div><div># objdump -t vmlinux|grep _MODULE_<br></div><div><br></div><div>And in crash-utility, most of the time it uses the strncmp()/strcmp() to compare pseudo symbols, but sometimes it also uses strstr() to search for the pseudo symbols. Maybe the following symbols may not be loaded by crash-utility, only some strings.</div><div># objdump -s vmlinux |grep _MODULE_<br> 015a20 52494659 494e475f 4d4f4455 4c455f53 RIFYING_MODULE_S<br> 03ee70 574e5f4d 4f44554c 455f5349 474e4154 WN_MODULE_SIGNAT<br> 03f180 444f574e 5f4d4f44 554c455f 50415241 DOWN_MODULE_PARA<br> 1bb530 45544854 4f4f4c5f 4d4f4455 4c455f50 ETHTOOL_MODULE_P<br> 1bba20 4f4f4c5f 4d4f4455 4c455f50 4f574552 OOL_MODULE_POWER<br> 1bba80 54455f4d 4f44554c 455f434d 49535f4e TE_MODULE_CMIS_N<br></div><div>...</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+/*<br>
#define MODULE_PSEUDO_SYMBOL(sp) \<br>
((STRNEQ((sp)->name, "_MODULE_START_") || STRNEQ((sp)->name, "_MODULE_END_")) || \<br>
(STRNEQ((sp)->name, "_MODULE_INIT_START_") || STRNEQ((sp)->name, "_MODULE_INIT_END_")) || \<br>
(STRNEQ((sp)->name, "_MODULE_SECTION_")))<br>
+*/<br>
<br>
#define MODULE_START(sp) (STRNEQ((sp)->name, "_MODULE_START_"))<br>
#define MODULE_END(sp) (STRNEQ((sp)->name, "_MODULE_END_"))<br>
@@ -1282,6 +1293,93 @@ symname_hash_search(struct syment *table[], char *name)<br>
#define MODULE_SECTION_START(sp) (STRNEQ((sp)->name, "_MODULE_SECTION_START"))<br>
#define MODULE_SECTION_END(sp) (STRNEQ((sp)->name, "_MODULE_SECTION_END"))<br>
<br>
+#define MODULE_MEM_START(sp,i) (STRNEQ((sp)->name, module_start_tags[i]))<br>
+#define MODULE_MEM_END(sp,i) (STRNEQ((sp)->name, module_end_tags[i]))<br>
+<br>
+/* For 6.4 and later */<br>
+static void<br>
+module_symbol_dump(char *module)<br>
+{<br>
+ int i, j, start, percpu_syms;<br>
+ struct syment *sp, *sp_end;<br></blockquote><div><br></div><div>Indent issues.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ struct load_module *lm;<br>
+ const char *p1, *p2;<br>
+<br>
+#define TBD 1<br>
+#define DISPLAYED 2<br>
+<br>
+ for (i = 0; i < st->mods_installed; i++) {<br>
+<br>
+ lm = &st->load_modules[i];<br>
+ if (module && !STREQ(module, lm->mod_name))<br>
+ continue;<br>
+<br>
+ if (received_SIGINT() || output_closed())<br>
+ return;<br>
+<br></blockquote><div><br></div><div>The variable "j" should be defined as enum mod_mem_type.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ for (j = MOD_TEXT; j < MOD_MEM_NUM_TYPES; j++) {<br>
+<br>
+ if (!lm->symtable[j])<br>
+ continue;<br>
+<br>
+ sp = lm->symtable[j];<br>
+ sp_end = lm->symend[j];<br>
+ percpu_syms = 0;<br>
+<br></blockquote><div> </div><div>The variable start is set to FALSE, looks strange, why not just set it to zero?</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ for (start = FALSE; sp <= sp_end; sp++) {<br>
+ /* TODO<br>
+ if (IN_MODULE_PERCPU(sp->value, lm)) {<br>
+ if (percpu_syms == DISPLAYED)<br>
+ continue;<br>
+ if (!start) {<br>
+ percpu_syms = TBD;<br>
+ continue;<br>
+ }<br>
+ dump_percpu_symbols(lm);<br>
+ percpu_syms = DISPLAYED;<br>
+ }<br>
+ */<br>
+ if (MODULE_PSEUDO_SYMBOL(sp)) {<br>
+ if (MODULE_SECTION_START(sp)) {<br>
+ p1 = sp->name + strlen("_MODULE_SECTION_START ");<br>
+ p2 = "section start";<br>
+ } else if (MODULE_SECTION_END(sp)) {<br>
+ p1 = sp->name + strlen("_MODULE_SECTION_END ");<br>
+ p2 = "section end";<br>
+ } else if (STRNEQ(sp->name, module_start_tags[j])) {<br></blockquote><div><br></div><div>MODULE_MEM_START(sp, i) </div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ p1 = module_start_strs[j];<br>
+ p2 = sp->name + strlen(module_start_tags[j]);<br>
+ start = TRUE;<br>
+ } else if (STRNEQ(sp->name, module_end_tags[j])) {<br></blockquote><div><br></div><div>MODULE_MEM_END(sp, i)</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ p1 = module_end_strs[j];<br>
+ p2 = sp->name + strlen(module_end_tags[j]);<br>
+ /* TODO<br>
+ if (MODULE_PERCPU_SYMS_LOADED(lm) &&<br>
+ !percpu_syms) {<br>
+ dump_percpu_symbols(lm);<br>
+ percpu_syms = DISPLAYED;<br>
+ }<br>
+ */<br>
+ } else {<br>
+ p1 = "unknown tag";<br>
+ p2 = sp->name;<br>
+ }<br>
+<br>
+ fprintf(fp, "%lx %s: %s\n", sp->value, p1, p2);<br>
+<br>
+ /* TODO<br>
+ if (percpu_syms == TBD) {<br>
+ dump_percpu_symbols(lm);<br>
+ percpu_syms = DISPLAYED;<br>
+ }<br>
+ */<br>
+ } else<br>
+ show_symbol(sp, 0, SHOW_RADIX());<br>
+ }<br>
+ }<br>
+ }<br>
+}<br>
+<br>
static void<br>
symbol_dump(ulong flags, char *module)<br>
{<br>
@@ -1304,6 +1402,11 @@ symbol_dump(ulong flags, char *module)<br>
if (!(flags & MODULE_SYMS))<br>
return;<br>
<br>
+ if (MODULE_MEMORY()) { /* 6.4 and later */<br>
+ module_symbol_dump(module);<br>
+ return;<br>
+ }<br>
+<br>
for (i = 0; i < st->mods_installed; i++) {<br>
<br>
lm = &st->load_modules[i];<br>
@@ -1808,6 +1911,24 @@ static const char *module_end_tags[] = {<br>
"_MODULE_INIT_DATA_END_",<br>
"_MODULE_INIT_RODATA_END_"<br>
};<br>
+static const char *module_start_strs[] = {<br>
+ "MODULE TEXT START",<br>
+ "MODULE DATA START",<br>
+ "MODULE RODATA START",<br>
+ "MODULE RO_AFTER_INIT START",<br>
+ "MODULE INIT_TEXT START",<br>
+ "MODULE INIT_DATA START",<br>
+ "MODULE INIT_RODATA START"<br>
+};<br>
+static const char *module_end_strs[] = {<br>
+ "MODULE TEXT END",<br>
+ "MODULE DATA END",<br>
+ "MODULE RODATA END",<br>
+ "MODULE RO_AFTER_INIT END",<br>
+ "MODULE INIT_TEXT END",<br>
+ "MODULE INIT_DATA END",<br>
+ "MODULE INIT_RODATA END"<br>
+};<br>
<br></blockquote><div><br></div><div>As I mentioned in the [PATCH 01/15], similarly the above strings are in pairs, so they can be defined as one array or macro substitution.</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
/*<br>
* Linux 6.4 introduced module.mem memory layout<br>
@@ -5278,6 +5399,85 @@ module_symbol(ulong value,<br>
return FALSE;<br>
}<br>
<br>
+/* Linux 6.4 and later */<br>
+struct syment *<br>
+value_search_module_v2(ulong value, ulong *offset)<br>
+{<br>
+ int i, j;<br>
+ struct syment *sp, *sp_end, *spnext, *splast;<br></blockquote><div><br></div><div>Indent issues. </div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ struct load_module *lm;<br>
+<br>
+ for (i = 0; i < st->mods_installed; i++) {<br>
+ lm = &st->load_modules[i];<br>
+<br>
+ if (!IN_MODULE(value, lm) && !IN_MODULE_INIT(value, lm))<br>
+ continue;<br>
+<br></blockquote><div> </div><div>The variable "j" should be defined as enum mod_mem_type.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ for (j = MOD_TEXT; j < MOD_MEM_NUM_TYPES; j++) {<br>
+ sp = lm->symtable[j];<br>
+ sp_end = lm->symend[j];<br>
+<br>
+ if (value < sp->value)<br>
+ continue;<br>
+<br>
+ splast = NULL;<br>
+ for ( ; sp <= sp_end; sp++) {<br>
+ /* TODO<br>
+ if (machine_type("ARM64") &&<br>
+ IN_MODULE_PERCPU(sp->value, lm) &&<br>
+ !IN_MODULE_PERCPU(value, lm))<br>
+ continue;<br>
+ */<br>
+<br></blockquote><div><br></div><div>Currently, I'm still building the kernel for aarch64, will check it next time.</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ if (value == sp->value) {<br>
+ if (MODULE_MEM_END(sp, j))<br>
+ break;<br>
+<br>
+ if (MODULE_PSEUDO_SYMBOL(sp)) {<br>
+ spnext = sp + 1;<br>
+ if (MODULE_PSEUDO_SYMBOL(spnext))<br>
+ continue;<br>
+ if (spnext->value == value)<br>
+ sp = spnext;<br>
+ }<br>
+ /* TODO: probably not needed anymore? */<br></blockquote><div><br></div><div>It's hard to track the change log for an old kernel and crash-utility. But for the kernel 6.4 and later, it could be safe to drop it, crash won't go into this code path.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ if (is_insmod_builtin(lm, sp)) {<br>
+ spnext = sp+1;<br>
+ if ((spnext < sp_end) &&<br>
+ (value == spnext->value))<br>
+ sp = spnext;<br>
+ }<br>
+ if (sp->name[0] == '.') {<br>
+ spnext = sp+1;<br>
+ if (spnext->value == value)<br>
+ sp = spnext;<br>
+ }<br>
+ if (offset)<br>
+ *offset = 0;<br>
+ return sp;<br>
+ }<br>
+<br>
+ if (sp->value > value) {<br>
+ sp = splast ? splast : sp - 1;<br>
+ if (offset)<br>
+ *offset = value - sp->value;<br>
+ return sp;<br>
+ }<br>
+<br></blockquote><div><br></div><div>Why is it needed? The splast will also store the "__insmod_"-type symbols?</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ if (!MODULE_PSEUDO_SYMBOL(sp)) {<br>
+ if (is_insmod_builtin(lm, sp)) {<br>
+ if (!splast || (sp->value > splast->value))<br>
+ splast = sp;<br>
+ } else<br>
+ splast = sp;<br>
+ }<br>
+ }<br>
+ }<br>
+ }<br>
+<br>
+ return NULL;<br>
+}<br>
+<br>
struct syment *<br>
value_search_module(ulong value, ulong *offset)<br>
{<br>
@@ -5286,6 +5486,9 @@ value_search_module(ulong value, ulong *offset)<br>
struct load_module *lm;<br>
int search_init_sections, search_init;<br>
<br>
+ if (MODULE_MEMORY()) /* Linux 6.4 and later */<br>
+ return value_search_module_v2(value, offset);<br>
+<br></blockquote><div> </div><div>Here, the suffix _v2 also looks fine to me, but if it can be aligned with the kernel version suffix, that will keep the same code style as the [patch 01/15].</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
search_init = FALSE;<br>
search_init_sections = 0;<br>
<br>
@@ -13958,19 +14161,32 @@ _in_module(ulong addr, struct load_module *lm, int type)<br>
return ((addr >= base) && (addr < (base + size)));<br>
}<br>
<br>
-/* Returns the end address of the module memory region. */<br>
-static ulong<br>
-module_mem_end(ulong addr, struct load_module *lm)<br>
+/* Returns module memory type, otherwise MOD_INVALID(-1) */<br>
+static int<br>
+module_mem_type(ulong addr, struct load_module *lm)<br>
{<br>
- ulong base, end;<br>
+ ulong base;<br>
int i;<br></blockquote><div><br></div><div>The variable "i" should be defined as enum mod_mem_type, the compiler will be helpful to check for any potential errors(warnings). </div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
for (i = MOD_TEXT; i < MOD_MEM_NUM_TYPES; i++) {<br>
base = lm->mem[i].base;<br>
if (!base)<br>
continue;<br></blockquote><div><br></div><div>Kernel usually tends to check the lm->mem[i].size instead of lm->mem[i].base, for example:</div><div><br></div><div>ulong size;</div><div>...</div><div>size = lm->mem[i].size;</div><div>if (!size)</div><div> continue;</div><div><br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
- end = base + lm->mem[i].size;<br>
- if ((addr >= base) && (addr < end))<br>
- return end;<br>
+ if ((addr >= base) && (addr < base + lm->mem[i].size))<br>
+ return i;<br></blockquote><div><br></div><div>base = lm->mem[i].base;</div><div>if ((addr >= base) && (addr < base + size))</div><div><br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
}<br>
- return 0;<br>
+<br>
+ return MOD_INVALID;<br>
+}<br>
+<br>
+/* Returns the end address of the module memory region. */<br>
+static ulong<br>
+module_mem_end(ulong addr, struct load_module *lm)<br>
+{<br>
+ int type = module_mem_type(addr, lm);<br>
+<br></blockquote><div><br></div><div>The module_mem_type() will ensure that the type does not exceed the range of type, just like the code comment. So no need to check for the condition "type >= MOD_MEM_NUM_TYPES" as below.</div><div> </div><div>Thanks</div><div>Lianbo</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ if (type == MOD_INVALID || type >= MOD_MEM_NUM_TYPES)<br>
+ return 0;<br>
+<br>
+ return lm->mem[type].base + lm->mem[type].size;<br>
}<br>
-- <br>
2.31.1<br>
<br>
</blockquote></div></div>