[Crash-utility] [PATCH] ARM64 support for 3-level page tables with 64K pages

Dave Anderson anderson at redhat.com
Wed Jun 15 20:50:42 UTC 2016


----- Original Message -----
> > Adds ARM64 support for 3-level page tables with 64K pages and 48 VA bits.
>
> Nicely done, Jim.  Queued for crash-7.1.5:
>
>   https://github.com/crash-utility/crash/commit/ab91852f945bfecfa0bca6a42253fbecb38723db
>
> Thanks,
>   Dave
>

Hi Jim,

I just noticed today that your 3-level 64K patch does not work for user virtual address space.
I haven't looked too deeply into it, but for example on a live system, all user virtual address
vtop operations fail, all disk-backed user memory space shows the "FILE:" backing, and the 
anonymous space shows "(not mapped)":

  crash> help -m | grep VM
               flags: 10400069 (KSYMS_START|VM_L3_64K|VMEMMAP|KDUMP_ENABLED|IRQ_STACKS|MACHDEP_BT_TEXT)
  crash> sys | grep RELEASE
     RELEASE: 4.5.0-0.38.el7.aarch64
  crash> set
      PID: 1212
  COMMAND: "crash"
     TASK: ffff8003d74f3f00  [THREAD_INFO: ffff8003d7454000]
      CPU: 1
    STATE: TASK_RUNNING (ACTIVE)
  crash> vm -p
  PID: 1212   TASK: ffff8003d74f3f00  CPU: 1   COMMAND: "crash"
         MM               PGD          RSS    TOTAL_VM
  ffff8000c40363c0  ffff8003db6a9200  211904k  355264k 
        VMA           START       END     FLAGS FILE
  ffff8003de746d40     400000     a00000    875 /root/crash.git/crash
  VIRTUAL     PHYSICAL        
  400000      FILE: /root/crash.git/crash  OFFSET: 0
  410000      FILE: /root/crash.git/crash  OFFSET: 10000
  420000      FILE: /root/crash.git/crash  OFFSET: 20000
  430000      FILE: /root/crash.git/crash  OFFSET: 30000
  440000      FILE: /root/crash.git/crash  OFFSET: 40000
  450000      FILE: /root/crash.git/crash  OFFSET: 50000
  ... [ cut ] ...
        VMA           START       END     FLAGS FILE
  ffff8003de745d70     a50000     b00000 100073 
  VIRTUAL     PHYSICAL        
  a50000      (not mapped)
  a60000      (not mapped)
  a70000      (not mapped)
  a80000      (not mapped)
  a90000      (not mapped)
  aa0000      (not mapped)
  ab0000      (not mapped)
  ac0000      (not mapped)
  ...

In all cases, the PGD value reads as 0 and therefore fails:
  
  crash> vtop 400000
  VIRTUAL     PHYSICAL        
  400000      (not mapped)
  
  PAGE DIRECTORY: ffff8003db6a9200
     PGD: ffff8003db6a9200 => 0
  
        VMA           START       END     FLAGS FILE
  ffff8003de746d40     400000     a00000    875 /root/crash.git/crash
  
  FILE: /root/crash.git/crash  OFFSET: 0
  
  crash>

That is the correct PGD address, and when read, it looks like a valid PTE:

  crash> rd ffff8003db6a9200
  ffff8003db6a9200:  00000043dee60003                    ....C...
  crash>

vmalloc() addresses translate just fine, and since they use the same function, 
I'm not sure what's going on?  Did you ever check user-space translations?

Thanks,
  Dave

  
  
> 
> 
> > ---
> >  arm64.c | 126
> >  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
> >  defs.h  |  28 +++++++++++----
> >  2 files changed, 133 insertions(+), 21 deletions(-)
> > 
> > diff --git a/arm64.c b/arm64.c
> > index f6ea7a1..d1c9c3e 100644
> > --- a/arm64.c
> > +++ b/arm64.c
> > @@ -34,6 +34,7 @@ static void arm64_init_kernel_pgd(void);
> >  static int arm64_kvtop(struct task_context *, ulong, physaddr_t *, int);
> >  static int arm64_uvtop(struct task_context *, ulong, physaddr_t *, int);
> >  static int arm64_vtop_2level_64k(ulong, ulong, physaddr_t *, int);
> > +static int arm64_vtop_3level_64k(ulong, ulong, physaddr_t *, int);
> >  static int arm64_vtop_3level_4k(ulong, ulong, physaddr_t *, int);
> >  static ulong arm64_get_task_pgd(ulong);
> >  static void arm64_irq_stack_init(void);
> > @@ -188,15 +189,29 @@ arm64_init(int when)
> >  			break;
> >  
> >  		case 65536:
> > -			machdep->flags |= VM_L2_64K;
> > -			machdep->ptrs_per_pgd = PTRS_PER_PGD_L2_64K;
> > -			if ((machdep->pgd =
> > -			    (char *)malloc(PTRS_PER_PGD_L2_64K * 8)) == NULL)
> > -				error(FATAL, "cannot malloc pgd space.");
> > -			if ((machdep->ptbl =
> > -			    (char *)malloc(PTRS_PER_PTE_L2_64K * 8)) == NULL)
> > -				error(FATAL, "cannot malloc ptbl space.");
> > -			machdep->pmd = NULL;  /* not used */
> > +			if (machdep->machspec->VA_BITS > PGDIR_SHIFT_L3_64K) {
> > +				machdep->flags |= VM_L3_64K;
> > +				machdep->ptrs_per_pgd = PTRS_PER_PGD_L3_64K;
> > +				if ((machdep->pgd =
> > +				    (char *)malloc(PTRS_PER_PGD_L3_64K * 8)) == NULL)
> > +					error(FATAL, "cannot malloc pgd space.");
> > +				if ((machdep->pmd =
> > +				    (char *)malloc(PTRS_PER_PMD_L3_64K * 8)) == NULL)
> > +					error(FATAL, "cannot malloc pmd space.");
> > +				if ((machdep->ptbl =
> > +				    (char *)malloc(PTRS_PER_PTE_L3_64K * 8)) == NULL)
> > +					error(FATAL, "cannot malloc ptbl space.");
> > +			} else {
> > +				machdep->flags |= VM_L2_64K;
> > +				machdep->ptrs_per_pgd = PTRS_PER_PGD_L2_64K;
> > +				if ((machdep->pgd =
> > +				    (char *)malloc(PTRS_PER_PGD_L2_64K * 8)) == NULL)
> > +					error(FATAL, "cannot malloc pgd space.");
> > +				if ((machdep->ptbl =
> > +				    (char *)malloc(PTRS_PER_PTE_L2_64K * 8)) == NULL)
> > +					error(FATAL, "cannot malloc ptbl space.");
> > +				machdep->pmd = NULL;  /* not used */
> > +			}
> >  			machdep->pud = NULL;  /* not used */
> >  			break;
> >  
> > @@ -379,6 +394,8 @@ arm64_dump_machdep_table(ulong arg)
> >  		fprintf(fp, "%sPHYS_OFFSET", others++ ? "|" : "");
> >  	if (machdep->flags & VM_L2_64K)
> >  		fprintf(fp, "%sVM_L2_64K", others++ ? "|" : "");
> > +	if (machdep->flags & VM_L3_64K)
> > +		fprintf(fp, "%sVM_L3_64K", others++ ? "|" : "");
> >  	if (machdep->flags & VM_L3_4K)
> >  		fprintf(fp, "%sVM_L3_4K", others++ ? "|" : "");
> >  	if (machdep->flags & VMEMMAP)
> > @@ -410,10 +427,14 @@ arm64_dump_machdep_table(ulong arg)
> >  	fprintf(fp, "     processor_speed: arm64_processor_speed()\n");
> >  	fprintf(fp, "               uvtop: arm64_uvtop()->%s()\n",
> >  		machdep->flags & VM_L3_4K ?
> > -		"arm64_vtop_3level_4k" : "arm64_vtop_2level_64k");
> > +		"arm64_vtop_3level_4k" :
> > +		machdep->flags & VM_L3_64K ?
> > +		"arm64_vtop_3level_64k" : "arm64_vtop_2level_64k");
> >  	fprintf(fp, "               kvtop: arm64_kvtop()->%s()\n",
> >  		machdep->flags & VM_L3_4K ?
> > -		"arm64_vtop_3level_4k" : "arm64_vtop_2level_64k");
> > +		"arm64_vtop_3level_4k" :
> > +		machdep->flags & VM_L3_64K ?
> > +		"arm64_vtop_3level_64k" : "arm64_vtop_2level_64k");
> >  	fprintf(fp, "        get_task_pgd: arm64_get_task_pgd()\n");
> >  	fprintf(fp, "            dump_irq: generic_dump_irq()\n");
> >  	fprintf(fp, "     get_stack_frame: arm64_get_stack_frame()\n");
> > @@ -719,10 +740,12 @@ arm64_kvtop(struct task_context *tc, ulong kvaddr,
> > physaddr_t *paddr, int verbos
> >  	kernel_pgd = vt->kernel_pgd[0];
> >  	*paddr = 0;
> >  
> > -	switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > +	switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> >  	{
> >  	case VM_L2_64K:
> >  		return arm64_vtop_2level_64k(kernel_pgd, kvaddr, paddr, verbose);
> > +	case VM_L3_64K:
> > +		return arm64_vtop_3level_64k(kernel_pgd, kvaddr, paddr, verbose);
> >  	case VM_L3_4K:
> >  		return arm64_vtop_3level_4k(kernel_pgd, kvaddr, paddr, verbose);
> >  	default:
> > @@ -740,10 +763,12 @@ arm64_uvtop(struct task_context *tc, ulong uvaddr,
> > physaddr_t *paddr, int verbos
> >  
> >  	*paddr = 0;
> >  
> > -	switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > +	switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> >  	{
> >  	case VM_L2_64K:
> >  		return arm64_vtop_2level_64k(user_pgd, uvaddr, paddr, verbose);
> > +	case VM_L3_64K:
> > +		return arm64_vtop_3level_64k(user_pgd, uvaddr, paddr, verbose);
> >  	case VM_L3_4K:
> >  		return arm64_vtop_3level_4k(user_pgd, uvaddr, paddr, verbose);
> >  	default:
> > @@ -820,6 +845,78 @@ no_page:
> >  	return FALSE;
> >  }
> >  
> > +static int
> > +arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int
> > verbose)
> > +{
> > +	ulong *pgd_base, *pgd_ptr, pgd_val;
> > +	ulong *pmd_base, *pmd_ptr, pmd_val;
> > +	ulong *pte_base, *pte_ptr, pte_val;
> > +
> > +        if (verbose)
> > +                fprintf(fp, "PAGE DIRECTORY: %lx\n", pgd);
> > +
> > +	pgd_base = (ulong *)pgd;
> > +	FILL_PGD(pgd_base, KVADDR, PTRS_PER_PGD_L3_64K * sizeof(ulong));
> > +	pgd_ptr = pgd_base + (((vaddr) >> PGDIR_SHIFT_L3_64K) &
> > (PTRS_PER_PGD_L3_64K - 1));
> > +        pgd_val = ULONG(machdep->pgd + PAGEOFFSET(pgd_ptr));
> > +        if (verbose)
> > +                fprintf(fp, "   PGD: %lx => %lx\n", (ulong)pgd_ptr,
> > pgd_val);
> > +	if (!pgd_val)
> > +		goto no_page;
> > +
> > +	/*
> > +	 * #define __PAGETABLE_PUD_FOLDED
> > +	 */
> > +
> > +	pmd_base = (ulong *)PTOV(pgd_val & PHYS_MASK & (s32)machdep->pagemask);
> > +	FILL_PMD(pmd_base, KVADDR, PTRS_PER_PMD_L3_64K * sizeof(ulong));
> > +	pmd_ptr = pmd_base + (((vaddr) >> PMD_SHIFT_L3_64K) & (PTRS_PER_PMD_L3_64K
> > - 1));
> > +        pmd_val = ULONG(machdep->pmd + PAGEOFFSET(pmd_ptr));
> > +        if (verbose)
> > +                fprintf(fp, "   PMD: %lx => %lx\n", (ulong)pmd_ptr,
> > pmd_val);
> > +	if (!pmd_val)
> > +		goto no_page;
> > +
> > +	if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
> > +		ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_512MB) & PHYS_MASK;
> > +		if (verbose) {
> > +			fprintf(fp, "  PAGE: %lx  (512MB)\n\n", sectionbase);
> > +			arm64_translate_pte(pmd_val, 0, 0);
> > +		}
> > +		*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
> > +		return TRUE;
> > +	}
> > +
> > +	pte_base = (ulong *)PTOV(pmd_val & PHYS_MASK & (s32)machdep->pagemask);
> > +	FILL_PTBL(pte_base, KVADDR, PTRS_PER_PTE_L3_64K * sizeof(ulong));
> > +	pte_ptr = pte_base + (((vaddr) >> machdep->pageshift) &
> > (PTRS_PER_PTE_L3_64K - 1));
> > +        pte_val = ULONG(machdep->ptbl + PAGEOFFSET(pte_ptr));
> > +        if (verbose)
> > +                fprintf(fp, "   PTE: %lx => %lx\n", (ulong)pte_ptr,
> > pte_val);
> > +	if (!pte_val)
> > +		goto no_page;
> > +
> > +	if (pte_val & PTE_VALID) {
> > +		*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
> > +		if (verbose) {
> > +			fprintf(fp, "  PAGE: %lx\n\n", PAGEBASE(*paddr));
> > +			arm64_translate_pte(pte_val, 0, 0);
> > +		}
> > +	} else {
> > +		if (IS_UVADDR(vaddr, NULL))
> > +			*paddr = pte_val;
> > +		if (verbose) {
> > +			fprintf(fp, "\n");
> > +			arm64_translate_pte(pte_val, 0, 0);
> > +		}
> > +		goto no_page;
> > +	}
> > +
> > +	return TRUE;
> > +no_page:
> > +	return FALSE;
> > +}
> > +
> >  static int
> >  arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
> >  {
> > @@ -2348,9 +2445,10 @@ arm64_calc_virtual_memory_ranges(void)
> >  
> >  	STRUCT_SIZE_INIT(page, "page");
> >  
> > -        switch (machdep->flags & (VM_L2_64K|VM_L3_4K))
> > +        switch (machdep->flags & (VM_L2_64K|VM_L3_64K|VM_L3_4K))
> >          {
> >          case VM_L2_64K:
> > +        case VM_L3_64K:
> >  		PUD_SIZE = PGDIR_SIZE_L2_64K;
> >  		break;
> >          case VM_L3_4K:
> > diff --git a/defs.h b/defs.h
> > index 56ae06c..d1b49d0 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -2815,7 +2815,7 @@ typedef u64 pte_t;
> >  
> >  typedef signed int s32;
> >  
> > -/*
> > +/*
> >   * 3-levels / 4K pages
> >   */
> >  #define PTRS_PER_PGD_L3_4K   (512)
> > @@ -2823,10 +2823,23 @@ typedef signed int s32;
> >  #define PTRS_PER_PTE_L3_4K   (512)
> >  #define PGDIR_SHIFT_L3_4K    (30)
> >  #define PGDIR_SIZE_L3_4K     ((1UL) << PGDIR_SHIFT_L3_4K)
> > -#define PGDIR_MASK_L3 4K     (~(PGDIR_SIZE_L3_4K-1))
> > +#define PGDIR_MASK_L3_4K     (~(PGDIR_SIZE_L3_4K-1))
> >  #define PMD_SHIFT_L3_4K      (21)
> > -#define PMD_SIZE_L3_4K       (1UL << PMD_SHIFT_4K)
> > -#define PMD_MASK_L3 4K       (~(PMD_SIZE_4K-1))
> > +#define PMD_SIZE_L3_4K       (1UL << PMD_SHIFT_L3_4K)
> > +#define PMD_MASK_L3_4K       (~(PMD_SIZE_L3_4K-1))
> > +
> > +/*
> > + * 3-levels / 64K pages
> > + */
> > +#define PTRS_PER_PGD_L3_64K  (64)
> > +#define PTRS_PER_PMD_L3_64K  (8192)
> > +#define PTRS_PER_PTE_L3_64K  (8192)
> > +#define PGDIR_SHIFT_L3_64K   (42)
> > +#define PGDIR_SIZE_L3_64K    ((1UL) << PGDIR_SHIFT_L3_64K)
> > +#define PGDIR_MASK_L3_64K    (~(PGDIR_SIZE_L3_64K-1))
> > +#define PMD_SHIFT_L3_64K     (29)
> > +#define PMD_SIZE_L3_64K      (1UL << PMD_SHIFT_L3_64K)
> > +#define PMD_MASK_L3_64K      (~(PMD_SIZE_L3_64K-1))
> >  
> >  /*
> >   * 2-levels / 64K pages
> > @@ -2868,9 +2881,10 @@ typedef signed int s32;
> >  #define KSYMS_START   (0x1)
> >  #define PHYS_OFFSET   (0x2)
> >  #define VM_L2_64K     (0x4)
> > -#define VM_L3_4K      (0x8)
> > -#define KDUMP_ENABLED (0x10)
> > -#define IRQ_STACKS    (0x20)
> > +#define VM_L3_64K     (0x8)
> > +#define VM_L3_4K      (0x10)
> > +#define KDUMP_ENABLED (0x20)
> > +#define IRQ_STACKS    (0x40)
> >  
> >  /*
> >   * sources: Documentation/arm64/memory.txt
> > --
> > 2.1.4
> > 
> > --
> > Crash-utility mailing list
> > Crash-utility redhat com
> > https://www.redhat.com/mailman/listinfo/crash-utility
> > 




More information about the Crash-utility mailing list