From 043f599d0fa02e9f81cd2187b27dff3bacfecdf6 Mon Sep 17 00:00:00 2001 From: panfengyun Date: Tue, 9 Sep 2014 13:26:22 +0800 Subject: [PATCH] improve ps performance When using ps command in the system which contains numerous tasks, especilly RHEL 7, it will spend too much time to get the data of tasks. we collect an output of ps command as part of summary information of crash dump, and in catastrophic case, crash dump could have a huge number of tasks due to the bug causing the system panic. But it's a problem if ps command needs to spend several hours to finish execution. After investigation, what we found is, because of the following kernel commit and a fix in crash 7.0.4, we need a lot time to wait for ps command to calculate rss. kernel commit: commit 34e55232e59f7b19050267a05ff1226e5cd122a5 Author: KAMEZAWA Hiroyuki Date: Fri Mar 5 13:41:40 2010 -0800 mm: avoid false sharing of mm_counter ... crash fix: 7.0.4 - Fix for the "ps" command's display of per-task RSS and %MEM values in Linux 2.6.34 and later kernels in which SPLIT_RSS_COUNTING is enabled. Without the patch, the values are only taken from each task's mm_struct.rss_stat structure, which may contain stale values because they may not be synchronized with the RSS values stored in each per-thread task_struct.rss_stat structure; this may lead to invalid or slightly low RSS values, and worst-case, the %MEM value may show garbage percentage values. (vinayakm.list@gmail.com) So: 1. we fix rss calculation in get_task_mem_usage() to improve performance of ps command. 2. And we also fix the part of getting enumerators MM_FILEPAGES and MM_ANONPAGES which also speeds up ps command. The 1st fix improves the performance of ps mostly. we also have some tests. please check the following data: OS process number without fix fix all 2 parts --------------------------------------------------------------------- RHEL6 ~100k ~180s ~70s RHEL7 ~100k ~51870s(~14h) ~58s Signed-off-by: panfengyun --- crash-7.0.7/defs.h | 10 ++++++ crash-7.0.7/main.c | 1 + crash-7.0.7/memory.c | 75 ++++++++++++++++++++++++++++++++++++++++---------- crash-7.0.7/task.c | 49 ++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 15 deletions(-) diff --git a/crash-7.0.7/defs.h b/crash-7.0.7/defs.h index 44df6ae..2fb3a4c 100755 --- a/crash-7.0.7/defs.h +++ b/crash-7.0.7/defs.h @@ -761,10 +761,18 @@ struct task_context { /* context stored for each task */ struct task_context *tc_next; }; +struct tgid_task_context{ /* tgid and task stored for each task */ + ulong tgid; + ulong task; +}; + struct task_table { /* kernel/local task table data */ struct task_context *current; struct task_context *context_array; + struct tgid_task_context *ttc_array; void (*refresh_task_table)(void); + long filepages; + long anonpages; ulong flags; ulong task_start; ulong task_end; @@ -4834,6 +4842,8 @@ ulong generic_get_stackbase(ulong); ulong generic_get_stacktop(ulong); void dump_task_table(int); void sort_context_array(void); +void sort_tgid_task_context_array(void); +int sort_by_tgid(const void *, const void *); int in_irq_ctx(ulonglong, int, ulong); /* diff --git a/crash-7.0.7/main.c b/crash-7.0.7/main.c index b0524d2..3d0485c 100755 --- a/crash-7.0.7/main.c +++ b/crash-7.0.7/main.c @@ -773,6 +773,7 @@ reattempt: } else if (!(pc->flags & MINIMAL_MODE)) { tt->refresh_task_table(); sort_context_array(); + sort_tgid_task_context_array(); } } if (!STREQ(pc->curcmd, pc->program_name)) diff --git a/crash-7.0.7/memory.c b/crash-7.0.7/memory.c index c97dd39..49b86f7 100755 --- a/crash-7.0.7/memory.c +++ b/crash-7.0.7/memory.c @@ -225,6 +225,7 @@ static int next_module_vaddr(ulong, ulong *); static int next_identity_mapping(ulong, ulong *); static int vm_area_page_dump(ulong, ulong, ulong, ulong, ulong, struct reference *); +void set_pages(void); static int dump_swap_info(ulong, ulong *, ulong *); static void swap_info_init(void); static char *get_swapdev(ulong, char *); @@ -1066,6 +1067,7 @@ vm_init(void) page_flags_init(); + set_pages(); vt->flags |= VM_INIT; } @@ -4072,6 +4074,31 @@ in_user_stack(ulong task, ulong vaddr) } /* + * set the const value of filepages and anonpages + * according to MM_FILEPAGES and MM_ANONPAGES. + */ +void +set_pages(void) +{ + long anonpages, filepages; + if (VALID_MEMBER(mm_struct_rss)) + return; + if (VALID_MEMBER(mm_struct_rss_stat)) + { + if (!enumerator_value("MM_FILEPAGES", &filepages) || + !enumerator_value("MM_ANONPAGES", &anonpages)) + { + filepages = 0; + anonpages = 1; + } + tt->filepages = filepages; + tt->anonpages = anonpages; + } else { + return; + } +} + +/* * Fill in the task_mem_usage structure with the RSS, virtual memory size, * percent of physical memory being used, and the mm_struct address. */ @@ -4108,11 +4135,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) if (VALID_MEMBER(mm_struct_rss_stat)) { long anonpages, filepages; - if (!enumerator_value("MM_FILEPAGES", &filepages) || - !enumerator_value("MM_ANONPAGES", &anonpages)) { - filepages = 0; - anonpages = 1; - } + anonpages = tt->anonpages; + filepages = tt->filepages; rss += LONG(tt->mm_struct + OFFSET(mm_struct_rss_stat) + OFFSET(mm_rss_stat_count) + @@ -4125,19 +4149,36 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) /* Check whether SPLIT_RSS_COUNTING is enabled */ if (VALID_MEMBER(task_struct_rss_stat)) { - int i, sync_rss; - ulong tgid; - struct task_context *tc1; + int sync_rss; + struct tgid_task_context *ttc_array, *ttc, *first, *last; + + ttc_array = tt->ttc_array; + if (!(ttc = (struct tgid_task_context *) + malloc(sizeof(struct tgid_task_context)))) + error(FATAL, "cannot malloc ttc at get_task_mem_usage()\n"); + ttc->tgid = task_tgid(task); + + ttc = (struct tgid_task_context *)bsearch(ttc, ttc_array, RUNNING_TASKS(), + sizeof(struct tgid_task_context), sort_by_tgid); + if (NULL == ttc) + error(FATAL, + "cannot search the tgid_task_context int ttc_array : %lx\n",task); - tgid = task_tgid(task); + /* find the first element which have same tgid */ + first = ttc; + while ((first > ttc_array) && ((first -1 )->tgid == first->tgid)) + first--; - tc1 = FIRST_CONTEXT(); - for (i = 0; i < RUNNING_TASKS(); i++, tc1++) { - if (task_tgid(tc1->task) != tgid) - continue; + /* find the last element which have same tgid */ + last = ttc; + while ((last < (ttc_array + (RUNNING_TASKS() - 1))) && + (last->tgid == (last + 1)->tgid)) + last++; + while (first <= last) + { /* count 0 -> filepages */ - if (!readmem(tc1->task + + if (!readmem(first->task + OFFSET(task_struct_rss_stat) + OFFSET(task_rss_stat_count), KVADDR, &sync_rss, @@ -4149,7 +4190,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) rss += sync_rss; /* count 1 -> anonpages */ - if (!readmem(tc1->task + + if (!readmem(first->task + OFFSET(task_struct_rss_stat) + OFFSET(task_rss_stat_count) + sizeof(int), @@ -4160,6 +4201,10 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) continue; rss += sync_rss; + + if(first == last) + break; + first++; } } diff --git a/crash-7.0.7/task.c b/crash-7.0.7/task.c index 75b1964..ce7ca59 100755 --- a/crash-7.0.7/task.c +++ b/crash-7.0.7/task.c @@ -493,6 +493,7 @@ task_init(void) } sort_context_array(); + sort_tgid_task_context_array(); if (pc->flags & SILENT) initialize_task_state(); @@ -639,6 +640,11 @@ allocate_task_space(int cnt) malloc(cnt * sizeof(struct task_context)))) error(FATAL, "cannot malloc context array (%d tasks)", cnt); + if (!(tt->ttc_array = (struct tgid_task_context *) + malloc(cnt * sizeof(struct tgid_task_context)))) + error(FATAL, "cannot malloc ttc array (%d tasks)", + cnt); + } else { if (!(tt->task_local = (void *) realloc(tt->task_local, cnt * sizeof(void *)))) @@ -652,6 +658,13 @@ allocate_task_space(int cnt) error(FATAL, "%scannot realloc context array (%d tasks)", (pc->flags & RUNTIME) ? "" : "\n", cnt); + + if (!(tt->ttc_array = (struct tgid_task_context *) + realloc(tt->ttc_array, + cnt * sizeof(struct tgid_task_context)))) + error(FATAL, + "%scannot realloc ttc array (%d tasks)", + (pc->flags & RUNTIME) ? "" : "\n", cnt); } } @@ -2281,8 +2294,10 @@ store_context(struct task_context *tc, ulong task, char *tp) int *processor_addr; ulong *parent_addr; ulong *mm_addr; + ulong *tgid; int has_cpu; int do_verify; + struct tgid_task_context *ttc; processor_addr = NULL; @@ -2320,6 +2335,7 @@ store_context(struct task_context *tc, ulong task, char *tp) else parent_addr = (ulong *)(tp + OFFSET(task_struct_parent)); mm_addr = (ulong *)(tp + OFFSET(task_struct_mm)); + tgid = (ulong *)(tp + OFFSET(task_struct_tgid)); has_cpu = task_has_cpu(task, tp); tc->pid = (ulong)(*pid_addr); @@ -2330,6 +2346,14 @@ store_context(struct task_context *tc, ulong task, char *tp) tc->task = task; tc->tc_next = NULL; + /* + * Fill a tgid_task_context structure with the data from tc. + */ + ttc = tt->ttc_array + tt->running_tasks; + ttc->tgid = *tgid; + ttc->task = task; + + if (do_verify && !verify_task(tc, do_verify)) { error(INFO, "invalid task address: %lx\n", tc->task); BZERO(tc, sizeof(struct task_context)); @@ -2445,6 +2469,30 @@ sort_context_array_by_last_run(void) } /* + * Set the tgid_task_context array by tgid number. + */ +void +sort_tgid_task_context_array(void) +{ + if (VALID_MEMBER(mm_struct_rss) || (!VALID_MEMBER(task_struct_rss_stat))) + return; + + qsort((void *)tt->ttc_array, (size_t)tt->running_tasks, + sizeof(struct tgid_task_context), sort_by_tgid); +} +int +sort_by_tgid(const void *arg1, const void *arg2) +{ + struct tgid_task_context *t1, *t2; + + t1 = (struct tgid_task_context *)arg1; + t2 = (struct tgid_task_context *)arg2; + + return (t1->tgid < t2->tgid ? -1 : + t1->tgid == t2->tgid ? 0 : 1); +} + +/* * Keep a stash of the last task_struct accessed. Chances are it will * be hit several times before the next task is accessed. */ @@ -6577,6 +6625,7 @@ dump_task_table(int verbose) fprintf(fp, " .tc_next: %lx\n", (ulong)tc->tc_next); } fprintf(fp, " context_array: %lx\n", (ulong)tt->context_array); + fprintf(fp, " ttc_array: %lx\n", (ulong)tt->ttc_array); fprintf(fp, "refresh_task_table: "); if (tt->refresh_task_table == refresh_fixed_task_table) fprintf(fp, "refresh_fixed_task_table()\n"); -- 1.7.1