[Crash-utility] [PATCH] ARM64 support for 3-level page tables with 64K pages

Dave Anderson anderson at redhat.com
Thu Jun 16 20:14:11 UTC 2016


Hi Jim,

The problem looks to be the use of PAGEOFFSET() into the 512-byte pgd used
for 3-level/64K-pages:

static int
arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
{
        ulong *pgd_base, *pgd_ptr, pgd_val;
        ulong *pmd_base, *pmd_ptr, pmd_val;
        ulong *pte_base, *pte_ptr, pte_val;

        if (verbose)
                fprintf(fp, "PAGE DIRECTORY: %lx\n", pgd);

        pgd_base = (ulong *)pgd;
        FILL_PGD(pgd_base, KVADDR, PTRS_PER_PGD_L3_64K * sizeof(ulong));
        pgd_ptr = pgd_base + (((vaddr) >> PGDIR_SHIFT_L3_64K) & (PTRS_PER_PGD_L3_64K - 1));
        pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
                                       ^^^^^^^^^^^^^^^^^^

For kernel vmalloc addresses, the kernel's "swapper_pg_dir" is used, which has a base 
address that's aligned on a 64k page boundary.  So PAGEOFFSET() works as an index.

But for user-space addresses, the mm_struct->pgd is not 64K page aligned so the
use of PAGEOFFSET() calculates an address that's goes way beyond the end of the
512-byte aligned pgd:

  crash> for user vm -p | grep -e PGD -e "k "
         MM               PGD          RSS    TOTAL_VM
  ffff8003d61ec240  ffff8003d5b12c00  11904k   12992k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d60aec40  ffff8003dcb7ca00  4864k    88448k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000dbf1dc80  ffff8003d69bca00  8704k    92928k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000dbf1e700  ffff8003d69b4a00  8768k    17536k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d65c9300  ffff8000d4055600  5696k    16576k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d65c9300  ffff8000d4055600  5696k    16576k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d65c9840  ffff8000d4051800  1664k    2560k  
         MM               PGD          RSS    TOTAL_VM
  ffff8003d60a8340  ffff8003dcb7fa00  10944k  303552k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d65c8880  ffff8000d4052c00  19456k  353600k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d61ab7c0  ffff8003dc86b800  4864k    14016k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000dbf18340  ffff8003d69b1400  7424k   157632k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d60a8340  ffff8003dcb7fa00  10944k  303552k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d60a8340  ffff8003dcb7fa00  10944k  303552k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000dbf18340  ffff8003d69b1400  7424k   157632k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000dbf18340  ffff8003d69b1400  7424k   157632k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000dbf18340  ffff8003d69b1400  7424k   157632k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000dbf18340  ffff8003d69b1400  7424k   157632k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000dbf18340  ffff8003d69b1400  7424k   157632k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d61ab7c0  ffff8003dc86b800  4864k    14016k 
         MM               PGD          RSS    TOTAL_VM
  ffff8000c403b7c0  ffff8003d6597800  4928k    5888k  
         MM               PGD          RSS    TOTAL_VM
  ffff8003d59db7c0  ffff8003dcd18600  4352k    5248k  
         MM               PGD          RSS    TOTAL_VM
  ffff8003d61ead40  ffff8003d4107400  3776k    5760k  
         MM               PGD          RSS    TOTAL_VM
  ffff8000c403ad40  ffff8003d6592000  4224k    5120k  
         MM               PGD          RSS    TOTAL_VM
  ffff8003d60a78c0  ffff8003dcb72600  2496k   109888k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d61a8880  ffff8003dc86b200  2176k   109888k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d65c8880  ffff8000d4052c00  19456k  353600k 
         MM               PGD          RSS    TOTAL_VM
  ffff8003d65c8880  ffff8000d4052c00  19456k  353600k 
  ...

Dave



----- Original Message -----
> 
> ----- Original Message -----
> > > Adds ARM64 support for 3-level page tables with 64K pages and 48 VA bits.
> >
> > Nicely done, Jim.  Queued for crash-7.1.5:
> >
> >   https://github.com/crash-utility/crash/commit/ab91852f945bfecfa0bca6a42253fbecb38723db
> >
> > Thanks,
> >   Dave
> >
> 
> Hi Jim,
> 
> I just noticed today that your 3-level 64K patch does not work for user
> virtual address space.
> I haven't looked too deeply into it, but for example on a live system, all
> user virtual address
> vtop operations fail, all disk-backed user memory space shows the "FILE:"
> backing, and the
> anonymous space shows "(not mapped)":
> 
>   crash> help -m | grep VM
>                flags: 10400069
>                (KSYMS_START|VM_L3_64K|VMEMMAP|KDUMP_ENABLED|IRQ_STACKS|MACHDEP_BT_TEXT)
>   crash> sys | grep RELEASE
>      RELEASE: 4.5.0-0.38.el7.aarch64
>   crash> set
>       PID: 1212
>   COMMAND: "crash"
>      TASK: ffff8003d74f3f00  [THREAD_INFO: ffff8003d7454000]
>       CPU: 1
>     STATE: TASK_RUNNING (ACTIVE)
>   crash> vm -p
>   PID: 1212   TASK: ffff8003d74f3f00  CPU: 1   COMMAND: "crash"
>          MM               PGD          RSS    TOTAL_VM
>   ffff8000c40363c0  ffff8003db6a9200  211904k  355264k
>         VMA           START       END     FLAGS FILE
>   ffff8003de746d40     400000     a00000    875 /root/crash.git/crash
>   VIRTUAL     PHYSICAL
>   400000      FILE: /root/crash.git/crash  OFFSET: 0
>   410000      FILE: /root/crash.git/crash  OFFSET: 10000
>   420000      FILE: /root/crash.git/crash  OFFSET: 20000
>   430000      FILE: /root/crash.git/crash  OFFSET: 30000
>   440000      FILE: /root/crash.git/crash  OFFSET: 40000
>   450000      FILE: /root/crash.git/crash  OFFSET: 50000
>   ... [ cut ] ...
>         VMA           START       END     FLAGS FILE
>   ffff8003de745d70     a50000     b00000 100073
>   VIRTUAL     PHYSICAL
>   a50000      (not mapped)
>   a60000      (not mapped)
>   a70000      (not mapped)
>   a80000      (not mapped)
>   a90000      (not mapped)
>   aa0000      (not mapped)
>   ab0000      (not mapped)
>   ac0000      (not mapped)
>   ...
> 
> In all cases, the PGD value reads as 0 and therefore fails:
>   
>   crash> vtop 400000
>   VIRTUAL     PHYSICAL
>   400000      (not mapped)
>   
>   PAGE DIRECTORY: ffff8003db6a9200
>      PGD: ffff8003db6a9200 => 0
>   
>         VMA           START       END     FLAGS FILE
>   ffff8003de746d40     400000     a00000    875 /root/crash.git/crash
>   
>   FILE: /root/crash.git/crash  OFFSET: 0
>   
>   crash>
> 
> That is the correct PGD address, and when read, it looks like a valid PTE:
> 
>   crash> rd ffff8003db6a9200
>   ffff8003db6a9200:  00000043dee60003                    ....C...
>   crash>
> 
> vmalloc() addresses translate just fine, and since they use the same
> function,
> I'm not sure what's going on?  Did you ever check user-space translations?
> 
> Thanks,
>   Dave
> 
>   
>   
> > 
> > 
> > > ---
> > >  arm64.c | 126
> > >  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
> > >  defs.h  |  28 +++++++++++----
> > >  2 files changed, 133 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/arm64.c b/arm64.c
> > > index f6ea7a1..d1c9c3e 100644
> > > --- a/arm64.c
> > > +++ b/arm64.c
> > > @@ -34,6 +34,7 @@ static void arm64_init_kernel_pgd(void);
> > >  static int arm64_kvtop(struct task_context *, ulong, physaddr_t *, int);
> > >  static int arm64_uvtop(struct task_context *, ulong, physaddr_t *, int);
> > >  static int arm64_vtop_2level_64k(ulong, ulong, physaddr_t *, int);
> > > +static int arm64_vtop_3level_64k(ulong, ulong, physaddr_t *, int);
> > >  static int arm64_vtop_3level_4k(ulong, ulong, physaddr_t *, int);
> > >  static ulong arm64_get_task_pgd(ulong);
> > >  static void arm64_irq_stack_init(void);
> > > @@ -188,15 +189,29 @@ arm64_init(int when)
> > >  			break;
> > >  
> > >  		case 65536:
> > > -			machdep->flags |= VM_L2_64K;
> > > -			machdep->ptrs_per_pgd = PTRS_PER_PGD_L2_64K;
> > > -			if ((machdep->pgd =
> > > -			    (char *)malloc(PTRS_PER_PGD_L2_64K * 8)) == NULL)
> > > -				error(FATAL, "cannot malloc pgd space.");
> > > -			if ((machdep->ptbl =
> > > -			    (char *)malloc(PTRS_PER_PTE_L2_64K * 8)) == NULL)
> > > -				error(FATAL, "cannot malloc ptbl space.");
> > > -			machdep->pmd = NULL;  /* not used */
> > > +			if (machdep->machspec->VA_BITS > PGDIR_SHIFT_L3_64K) {
> > > +				machdep->flags |= VM_L3_64K;
> > > +				machdep->ptrs_per_pgd = PTRS_PER_PGD_L3_64K;
> > > +				if ((machdep->pgd =
> > > +				    (char *)malloc(PTRS_PER_PGD_L3_64K * 8)) == NULL)
> > > +					error(FATAL, "cannot malloc pgd space.");
> > > +				if ((machdep->pmd =
> > > +				    (char *)malloc(PTRS_PER_PMD_L3_64K * 8)) == NULL)
> > > +					error(FATAL, "cannot malloc pmd space.");
> > > +				if ((machdep->ptbl =
> > > +				    (char *)malloc(PTRS_PER_PTE_L3_64K * 8)) == NULL)
> > > +					error(FATAL, "cannot malloc ptbl space.");
> > > +			} else {
> > > +				machdep->flags |= VM_L2_64K;
> > > +				machdep->ptrs_per_pgd = PTRS_PER_PGD_L2_64K;
> > > +				if ((machdep->pgd =
> > > +				    (char *)malloc(PTRS_PER_PGD_L2_64K * 8)) == NULL)
> > > +					error(FATAL, "cannot malloc pgd space.");
> > > +				if ((machdep->ptbl =
> > > +				    (char *)malloc(PTRS_PER_PTE_L2_64K * 8)) == NULL)
> > > +					error(FATAL, "cannot malloc ptbl space.");
> > > +				machdep->pmd = NULL;  /* not used */
> > > +			}
> > >  			machdep->pud = NULL;  /* not used */
> > >  			break;
> > >  
> > > @@ -379,6 +394,8 @@ arm64_dump_machdep_table(ulong arg)
> > >  		fprintf(fp, "%sPHYS_OFFSET", others++ ? "|" : "");
> > >  	if (machdep->flags & VM_L2_64K)
> > >  		fprintf(fp, "%sVM_L2_64K", others++ ? "|" : "");
> > > +	if (machdep->flags & VM_L3_64K)
> > > +		fprintf(fp, "%sVM_L3_64K", others++ ? "|" : "");
> > >  	if (machdep->flags & VM_L3_4K)
> > >  		fprintf(fp, "%sVM_L3_4K", others++ ? "|" : "");
> > >  	if (machdep->flags & VMEMMAP)
> > > @@ -410,10 +427,14 @@ arm64_dump_machdep_table(ulong arg)
> > >  	fprintf(fp, "     processor_speed: arm64_processor_speed()\n");
> > >  	fprintf(fp, "               uvtop: arm64_uvtop()->%s()\n",
> > >  		machdep->flags & VM_L3_4K ?
> > > -		"arm64_vtop_3level_4k" : "arm64_vtop_2level_64k");
> > > +		"arm64_vtop_3level_4k" :
> > > +		machdep->flags & VM_L3_64K ?
> > > +		"arm64_vtop_3level_64k" : "arm64_vtop_2level_64k");
> > >  	fprintf(fp, "               kvtop: arm64_kvtop()->%s()\n",
> > >  		machdep->flags & VM_L3_4K ?
> > > -		"arm64_vtop_3level_4k" : "arm64_vtop_2level_64k");
> > > +		"arm64_vtop_3level_4k" :
> > > +		machdep->flags & VM_L3_64K ?
> > > +		"arm64_vtop_3level_64k" : "arm64_vtop_2level_64k");
> > >  	fprintf(fp, "        get_task_pgd: arm64_get_task_pgd()\n");
> > >  	fprintf(fp, "            dump_irq: generic_dump_irq()\n");
> > >  	fprintf(fp, "     get_stack_frame: arm64_get_stack_frame()\n");
> > > @@ -719,10 +740,12 @@ arm64_kvtop(struct task_context *tc, ulong kvaddr,
> > > physaddr_t *paddr, int verbos
> > >  	kernel_pgd = vt->kernel_pgd[0];
> > >  	*paddr = 0;
> > >  
> > > -	switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > > +	switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> > >  	{
> > >  	case VM_L2_64K:
> > >  		return arm64_vtop_2level_64k(kernel_pgd, kvaddr, paddr, verbose);
> > > +	case VM_L3_64K:
> > > +		return arm64_vtop_3level_64k(kernel_pgd, kvaddr, paddr, verbose);
> > >  	case VM_L3_4K:
> > >  		return arm64_vtop_3level_4k(kernel_pgd, kvaddr, paddr, verbose);
> > >  	default:
> > > @@ -740,10 +763,12 @@ arm64_uvtop(struct task_context *tc, ulong uvaddr,
> > > physaddr_t *paddr, int verbos
> > >  
> > >  	*paddr = 0;
> > >  
> > > -	switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > > +	switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> > >  	{
> > >  	case VM_L2_64K:
> > >  		return arm64_vtop_2level_64k(user_pgd, uvaddr, paddr, verbose);
> > > +	case VM_L3_64K:
> > > +		return arm64_vtop_3level_64k(user_pgd, uvaddr, paddr, verbose);
> > >  	case VM_L3_4K:
> > >  		return arm64_vtop_3level_4k(user_pgd, uvaddr, paddr, verbose);
> > >  	default:
> > > @@ -820,6 +845,78 @@ no_page:
> > >  	return FALSE;
> > >  }
> > >  
> > > +static int
> > > +arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int
> > > verbose)
> > > +{
> > > +	ulong *pgd_base, *pgd_ptr, pgd_val;
> > > +	ulong *pmd_base, *pmd_ptr, pmd_val;
> > > +	ulong *pte_base, *pte_ptr, pte_val;
> > > +
> > > +        if (verbose)
> > > +                fprintf(fp, "PAGE DIRECTORY: %lx\n", pgd);
> > > +
> > > +	pgd_base = (ulong *)pgd;
> > > +	FILL_PGD(pgd_base, KVADDR, PTRS_PER_PGD_L3_64K * sizeof(ulong));
> > > +	pgd_ptr = pgd_base + (((vaddr) >> PGDIR_SHIFT_L3_64K) &
> > > (PTRS_PER_PGD_L3_64K - 1));
> > > +        pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
> > > +        if (verbose)
> > > +                fprintf(fp, "   PGD: %lx => %lx\n", (ulong)pgd_ptr,
> > > pgd_val);
> > > +	if (!pgd_val)
> > > +		goto no_page;
> > > +
> > > +	/*
> > > +	 * #define __PAGETABLE_PUD_FOLDED
> > > +	 */
> > > +
> > > +	pmd_base = (ulong *)PTOV(pgd_val & PHYS_MASK & (s32)machdep->pagemask);
> > > +	FILL_PMD(pmd_base, KVADDR, PTRS_PER_PMD_L3_64K * sizeof(ulong));
> > > +	pmd_ptr = pmd_base + (((vaddr) >> PMD_SHIFT_L3_64K) &
> > > (PTRS_PER_PMD_L3_64K
> > > - 1));
> > > +        pmd_val = ULONG(machdep->pmd + PAGEOFFSET(pmd_ptr));
> > > +        if (verbose)
> > > +                fprintf(fp, "   PMD: %lx => %lx\n", (ulong)pmd_ptr,
> > > pmd_val);
> > > +	if (!pmd_val)
> > > +		goto no_page;
> > > +
> > > +	if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
> > > +		ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_512MB) & PHYS_MASK;
> > > +		if (verbose) {
> > > +			fprintf(fp, "  PAGE: %lx  (512MB)\n\n", sectionbase);
> > > +			arm64_translate_pte(pmd_val, 0, 0);
> > > +		}
> > > +		*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
> > > +		return TRUE;
> > > +	}
> > > +
> > > +	pte_base = (ulong *)PTOV(pmd_val & PHYS_MASK & (s32)machdep->pagemask);
> > > +	FILL_PTBL(pte_base, KVADDR, PTRS_PER_PTE_L3_64K * sizeof(ulong));
> > > +	pte_ptr = pte_base + (((vaddr) >> machdep->pageshift) &
> > > (PTRS_PER_PTE_L3_64K - 1));
> > > +        pte_val = ULONG(machdep->ptbl + PAGEOFFSET(pte_ptr));
> > > +        if (verbose)
> > > +                fprintf(fp, "   PTE: %lx => %lx\n", (ulong)pte_ptr,
> > > pte_val);
> > > +	if (!pte_val)
> > > +		goto no_page;
> > > +
> > > +	if (pte_val & PTE_VALID) {
> > > +		*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
> > > +		if (verbose) {
> > > +			fprintf(fp, "  PAGE: %lx\n\n", PAGEBASE(*paddr));
> > > +			arm64_translate_pte(pte_val, 0, 0);
> > > +		}
> > > +	} else {
> > > +		if (IS_UVADDR(vaddr, NULL))
> > > +			*paddr = pte_val;
> > > +		if (verbose) {
> > > +			fprintf(fp, "\n");
> > > +			arm64_translate_pte(pte_val, 0, 0);
> > > +		}
> > > +		goto no_page;
> > > +	}
> > > +
> > > +	return TRUE;
> > > +no_page:
> > > +	return FALSE;
> > > +}
> > > +
> > >  static int
> > >  arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int
> > >  verbose)
> > >  {
> > > @@ -2348,9 +2445,10 @@ arm64_calc_virtual_memory_ranges(void)
> > >  
> > >  	STRUCT_SIZE_INIT(page, "page");
> > >  
> > > -        switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > > +        switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> > >          {
> > >          case VM_L2_64K:
> > > +        case VM_L3_64K:
> > >  		PUD_SIZE = PGDIR_SIZE_L2_64K;
> > >  		break;
> > >          case VM_L3_4K:
> > > diff --git a/defs.h b/defs.h
> > > index 56ae06c..d1b49d0 100644
> > > --- a/defs.h
> > > +++ b/defs.h
> > > @@ -2815,7 +2815,7 @@ typedef u64 pte_t;
> > >  
> > >  typedef signed int s32;
> > >  
> > > -/*
> > > +/*
> > >   * 3-levels / 4K pages
> > >   */
> > >  #define PTRS_PER_PGD_L3_4K   (512)
> > > @@ -2823,10 +2823,23 @@ typedef signed int s32;
> > >  #define PTRS_PER_PTE_L3_4K   (512)
> > >  #define PGDIR_SHIFT_L3_4K    (30)
> > >  #define PGDIR_SIZE_L3_4K     ((1UL) << PGDIR_SHIFT_L3_4K)
> > > -#define PGDIR_MASK_L3 4K     (~(PGDIR_SIZE_L3_4K-1))
> > > +#define PGDIR_MASK_L3_4K     (~(PGDIR_SIZE_L3_4K-1))
> > >  #define PMD_SHIFT_L3_4K      (21)
> > > -#define PMD_SIZE_L3_4K       (1UL << PMD_SHIFT_4K)
> > > -#define PMD_MASK_L3 4K       (~(PMD_SIZE_4K-1))
> > > +#define PMD_SIZE_L3_4K       (1UL << PMD_SHIFT_L3_4K)
> > > +#define PMD_MASK_L3_4K       (~(PMD_SIZE_L3_4K-1))
> > > +
> > > +/*
> > > + * 3-levels / 64K pages
> > > + */
> > > +#define PTRS_PER_PGD_L3_64K  (64)
> > > +#define PTRS_PER_PMD_L3_64K  (8192)
> > > +#define PTRS_PER_PTE_L3_64K  (8192)
> > > +#define PGDIR_SHIFT_L3_64K   (42)
> > > +#define PGDIR_SIZE_L3_64K    ((1UL) << PGDIR_SHIFT_L3_64K)
> > > +#define PGDIR_MASK_L3_64K    (~(PGDIR_SIZE_L3_64K-1))
> > > +#define PMD_SHIFT_L3_64K     (29)
> > > +#define PMD_SIZE_L3_64K      (1UL << PMD_SHIFT_L3_64K)
> > > +#define PMD_MASK_L3_64K      (~(PMD_SIZE_L3_64K-1))
> > >  
> > >  /*
> > >   * 2-levels / 64K pages
> > > @@ -2868,9 +2881,10 @@ typedef signed int s32;
> > >  #define KSYMS_START   (0x1)
> > >  #define PHYS_OFFSET   (0x2)
> > >  #define VM_L2_64K     (0x4)
> > > -#define VM_L3_4K      (0x8)
> > > -#define KDUMP_ENABLED (0x10)
> > > -#define IRQ_STACKS    (0x20)
> > > +#define VM_L3_64K     (0x8)
> > > +#define VM_L3_4K      (0x10)
> > > +#define KDUMP_ENABLED (0x20)
> > > +#define IRQ_STACKS    (0x40)
> > >  
> > >  /*
> > >   * sources: Documentation/arm64/memory.txt
> > > --
> > > 2.1.4
> > > 
> > > --
> > > Crash-utility mailing list
> > > Crash-utility redhat com
> > > https://www.redhat.com/mailman/listinfo/crash-utility
> > >




More information about the Crash-utility mailing list