[Crash-utility] [PATCH 1/2] Fix cpu_slab freelist handling on SLUB
Dave Anderson
anderson at redhat.com
Sun Apr 17 20:00:37 UTC 2016
Can you show a before-and-after example of the "kmem -s" and "kmem -S"
output of a particular slab where your patch makes a difference?
Thanks,
Dave
----- Original Message -----
> Hi,
>
> SLUB cpu_slab has 2 freelist. One is cpu_slab->freelist for local
> cpu. One is cpu_slab->page->freelist for remote cpu.
>
> So, we have to check both of freelists to know details. Note,
> page->inuse counts only for free on page->freelist, not
> cpu_slab->freelist.
>
> so total free objects are
>
> (page->objects - page->inuse) + count(cpu_slab->freelist))
> ---
>
> memory.c | 213
> ++++++++++++++++++++++++++++----------------------------------
> 1 file changed, 99 insertions(+), 114 deletions(-)
>
> diff -puN memory.c~crash-slub-freelist-fix memory.c
> --- crash-64/memory.c~crash-slub-freelist-fix 2016-04-18 02:29:57.743774055
> +0900
> +++ crash-64-hirofumi/memory.c 2016-04-18 02:32:30.999515870 +0900
> @@ -17914,15 +17914,62 @@ bailout:
> FREEBUF(si->cache_buf);
> }
>
> +static ushort slub_page_objects(struct meminfo *si, ulong page)
> +{
> + ulong objects_vaddr;
> + ushort objects;
> +
> + /*
> + * Pre-2.6.27, the object count and order were fixed in the
> + * kmem_cache structure. Now they may change, say if a high
> + * order slab allocation fails, so the per-slab object count
> + * is kept in the slab.
> + */
> + if (VALID_MEMBER(page_objects)) {
> + objects_vaddr = page + OFFSET(page_objects);
> + if (si->flags & SLAB_BITFIELD)
> + objects_vaddr += sizeof(ushort);
> + if (!readmem(objects_vaddr, KVADDR, &objects,
> + sizeof(ushort), "page.objects", RETURN_ON_ERROR))
> + return 0;
> + /*
> + * Strip page.frozen bit.
> + */
> + if (si->flags & SLAB_BITFIELD) {
> + if (__BYTE_ORDER == __LITTLE_ENDIAN) {
> + objects <<= 1;
> + objects >>= 1;
> + }
> + if (__BYTE_ORDER == __BIG_ENDIAN)
> + objects >>= 1;
> + }
> +
> + if (CRASHDEBUG(1) && (objects != si->objects))
> + error(NOTE, "%s: slab: %lx oo objects: %ld "
> + "slab objects: %d\n",
> + si->curname, si->slab,
> + si->objects, objects);
> +
> + if (objects == (ushort)(-1)) {
> + error(INFO, "%s: slab: %lx invalid page.objects: -1\n",
> + si->curname, si->slab);
> + return 0;
> + }
> + } else
> + objects = (ushort)si->objects;
> +
> + return objects;
> +}
> +
> static short
> count_cpu_partial(struct meminfo *si, int cpu)
> {
> short cpu_partial_inuse, cpu_partial_objects, free_objects;
> - ulong cpu_partial, objects_vaddr;
> + ulong cpu_partial;
>
> free_objects = 0;
>
> - if (VALID_MEMBER(kmem_cache_cpu_partial)) {
> + if (VALID_MEMBER(kmem_cache_cpu_partial) && VALID_MEMBER(page_objects)) {
> readmem(ULONG(si->cache_buf + OFFSET(kmem_cache_cpu_slab)) +
> kt->__per_cpu_offset[cpu] + OFFSET(kmem_cache_cpu_partial),
> KVADDR, &cpu_partial, sizeof(ulong),
> @@ -17939,27 +17986,13 @@ count_cpu_partial(struct meminfo *si, in
> return 0;
> if (cpu_partial_inuse == -1)
> return 0;
> - if (VALID_MEMBER(page_objects)) {
> - objects_vaddr = cpu_partial + OFFSET(page_objects);
> - if (si->flags & SLAB_BITFIELD)
> - objects_vaddr += sizeof(ushort);
> - if (!readmem(objects_vaddr, KVADDR,
> - &cpu_partial_objects, sizeof(ushort),
> - "page.objects", RETURN_ON_ERROR))
> - return 0;
> - if (si->flags & SLAB_BITFIELD) {
> - if (__BYTE_ORDER == __LITTLE_ENDIAN) {
> - cpu_partial_objects <<= 1;
> - cpu_partial_objects >>= 1;
> - }
> - if (__BYTE_ORDER == __BIG_ENDIAN)
> - cpu_partial_objects >>= 1;
> - }
> - if (cpu_partial_objects == (short)(-1))
> - return 0;
> - free_objects +=
> - cpu_partial_objects - cpu_partial_inuse;
> - }
> +
> + cpu_partial_objects = slub_page_objects(si,
> + cpu_partial);
> + if (!cpu_partial_objects)
> + return 0;
> + free_objects += cpu_partial_objects - cpu_partial_inuse;
> +
> readmem(cpu_partial + OFFSET(page_next), KVADDR,
> &cpu_partial, sizeof(ulong), "page.next",
> RETURN_ON_ERROR);
> @@ -18011,14 +18044,12 @@ get_kmem_cache_slub_data(long cmd, struc
> KVADDR, &inuse, sizeof(short),
> "page inuse", RETURN_ON_ERROR))
> return FALSE;
> - if (!cpu_freelist)
> - if (!readmem(cpu_slab_ptr + OFFSET(page_freelist),
> - KVADDR, &cpu_freelist, sizeof(ulong),
> - "page freelist", RETURN_ON_ERROR))
> - return FALSE;
> + objects = slub_page_objects(si, cpu_slab_ptr);
> + if (!objects)
> + return FALSE;
>
> - free_objects +=
> - count_free_objects(si, cpu_freelist);
> + free_objects += objects - inuse;
> + free_objects += count_free_objects(si, cpu_freelist);
> free_objects += count_cpu_partial(si, i);
>
> if (!node_total_avail)
> @@ -18255,7 +18286,7 @@ static int
> do_slab_slub(struct meminfo *si, int verbose)
> {
> physaddr_t paddr;
> - ulong vaddr, objects_vaddr;
> + ulong vaddr;
> ushort inuse, objects;
> ulong freelist, cpu_freelist, cpu_slab_ptr;
> int i, free_objects, cpu_slab, is_free, node;
> @@ -18287,50 +18318,17 @@ do_slab_slub(struct meminfo *si, int ver
> if (!readmem(si->slab + OFFSET(page_freelist), KVADDR, &freelist,
> sizeof(void *), "page.freelist", RETURN_ON_ERROR))
> return FALSE;
> - /*
> - * Pre-2.6.27, the object count and order were fixed in the
> - * kmem_cache structure. Now they may change, say if a high
> - * order slab allocation fails, so the per-slab object count
> - * is kept in the slab.
> - */
> - if (VALID_MEMBER(page_objects)) {
> - objects_vaddr = si->slab + OFFSET(page_objects);
> - if (si->flags & SLAB_BITFIELD)
> - objects_vaddr += sizeof(ushort);
> - if (!readmem(objects_vaddr, KVADDR, &objects,
> - sizeof(ushort), "page.objects", RETURN_ON_ERROR))
> - return FALSE;
> - /*
> - * Strip page.frozen bit.
> - */
> - if (si->flags & SLAB_BITFIELD) {
> - if (__BYTE_ORDER == __LITTLE_ENDIAN) {
> - objects <<= 1;
> - objects >>= 1;
> - }
> - if (__BYTE_ORDER == __BIG_ENDIAN)
> - objects >>= 1;
> - }
> -
> - if (CRASHDEBUG(1) && (objects != si->objects))
> - error(NOTE, "%s: slab: %lx oo objects: %ld "
> - "slab objects: %d\n",
> - si->curname, si->slab,
> - si->objects, objects);
>
> - if (objects == (ushort)(-1)) {
> - error(INFO, "%s: slab: %lx invalid page.objects: -1\n",
> - si->curname, si->slab);
> - return FALSE;
> - }
> - } else
> - objects = (ushort)si->objects;
> + objects = slub_page_objects(si, si->slab);
> + if (!objects)
> + return FALSE;
>
> if (!verbose) {
> DUMP_SLAB_INFO_SLUB();
> return TRUE;
> }
>
> + cpu_freelist = 0;
> for (i = 0, cpu_slab = -1; i < kt->cpus; i++) {
> cpu_slab_ptr = get_cpu_slab_ptr(si, i, &cpu_freelist);
>
> @@ -18342,11 +18340,15 @@ do_slab_slub(struct meminfo *si, int ver
> * Later slub scheme uses the per-cpu freelist
> * so count the free objects by hand.
> */
> - if (cpu_freelist)
> - freelist = cpu_freelist;
> - if ((free_objects = count_free_objects(si, freelist)) < 0)
> + if ((free_objects = count_free_objects(si, cpu_freelist)) < 0)
> return FALSE;
> - inuse = si->objects - free_objects;
> + /*
> + * If the object is freed on foreign cpu, the
> + * object is liked to page->freelist.
> + */
> + if (freelist)
> + free_objects += objects - inuse;
> + inuse = objects - free_objects;
> break;
> }
> }
> @@ -18377,28 +18379,31 @@ do_slab_slub(struct meminfo *si, int ver
> for (p = vaddr; p < vaddr + objects * si->size; p += si->size) {
> hq_open();
> is_free = FALSE;
> - for (is_free = 0, q = freelist; q;
> - q = get_freepointer(si, (void *)q)) {
> + /* Search an object on both of freelist and cpu_freelist */
> + ulong lists[] = { freelist, cpu_freelist, };
> + for (int i = 0; i < sizeof(lists) / sizeof(lists[0]); i++) {
> + for (is_free = 0, q = lists[i]; q;
> + q = get_freepointer(si, (void *)q)) {
>
> - if (q == BADADDR) {
> - hq_close();
> - return FALSE;
> - }
> - if (q & PAGE_MAPPING_ANON)
> - break;
> - if (p == q) {
> - is_free = TRUE;
> - break;
> - }
> - if (!hq_enter(q)) {
> - hq_close();
> - error(INFO,
> - "%s: slab: %lx duplicate freelist object: %lx\n",
> - si->curname, si->slab, q);
> - return FALSE;
> + if (q == BADADDR) {
> + hq_close();
> + return FALSE;
> + }
> + if (q & PAGE_MAPPING_ANON)
> + break;
> + if (p == q) {
> + is_free = TRUE;
> + goto found_object;
> + }
> + if (!hq_enter(q)) {
> + hq_close();
> + error(INFO, "%s: slab: %lx duplicate freelist object: %lx\n",
> + si->curname, si->slab, q);
> + return FALSE;
> + }
> }
> -
> }
> + found_object:
> hq_close();
>
> if (si->flags & ADDRESS_SPECIFIED) {
> @@ -18677,7 +18682,7 @@ compound_head(ulong page)
> long
> count_partial(ulong node, struct meminfo *si, ulong *free)
> {
> - ulong list_head, next, last, objects_vaddr;
> + ulong list_head, next, last;
> short inuse, objects;
> ulong total_inuse;
> ulong count = 0;
> @@ -18708,31 +18713,11 @@ count_partial(ulong node, struct meminfo
> total_inuse += inuse;
>
> if (VALID_MEMBER(page_objects)) {
> - objects_vaddr = last + OFFSET(page_objects);
> - if (si->flags & SLAB_BITFIELD)
> - objects_vaddr += sizeof(ushort);
> - if (!readmem(objects_vaddr, KVADDR, &objects,
> - sizeof(ushort), "page.objects", RETURN_ON_ERROR)) {
> - hq_close();
> - return -1;
> - }
> -
> - if (si->flags & SLAB_BITFIELD) {
> - if (__BYTE_ORDER == __LITTLE_ENDIAN) {
> - objects <<= 1;
> - objects >>= 1;
> - }
> - if (__BYTE_ORDER == __BIG_ENDIAN)
> - objects >>= 1;
> - }
> -
> - if (objects == (short)(-1)) {
> - error(INFO, "%s: slab: %lx invalid page.objects: -1\n",
> - si->curname, last);
> + objects = slub_page_objects(si, last);
> + if (!objects) {
> hq_close();
> return -1;
> }
> -
> *free += objects - inuse;
> }
>
> _
>
More information about the Crash-utility
mailing list