[Crash-utility] Re: [PATCH] s390: Make page table functions more generic
Dave Anderson
anderson at redhat.com
Thu Nov 15 18:50:46 UTC 2007
Michael Holzheu wrote:
> Hi Dave,
>
> For s390(x) kernels the page table allocation method will be changed.
> Instead of 3 levels, it will be now possible to allocate 4 levels.
>
> The current implementation of the page table walk functions in crash
> makes assumptions on how the page tables are allocated by the kernel.
> E.g. three levels are hard coded.
>
> This patch changes that and the page table walk is done only according
> to the s390 architecture without assumptions on the implementation in
> the kernel.
>
> So both old and new kernels are supported.
>
Hi Michael,
I have complete faith in you... ;-)
And this certainly simplifies things considerably, which
is always good.
But -- can you give me a warm-and-fuzzy feeling by confirming
that you tested this on a RHEL kernel? And that "make warn"
compiles cleanly?
Thanks,
Dave
> ---
>
> s390.c | 144 +++++++++++++++++++++++-------------------------
> s390x.c | 191 ++++++++++++++++++++++++++--------------------------------------
> 2 files changed, 151 insertions(+), 184 deletions(-)
>
> diff -Naur crash-4.0-4.8/s390.c crash-4.0-4.8-page-table-walk/s390.c
> --- crash-4.0-4.8/s390.c 2007-10-30 16:51:54.000000000 +0100
> +++ crash-4.0-4.8-page-table-walk/s390.c 2007-11-15 15:44:07.000000000 +0100
> @@ -21,17 +21,6 @@
> #define S390_WORD_SIZE 4
> #define S390_ADDR_MASK 0x7fffffff
>
> -#define S390_PAGE_SHIFT 12
> -#define S390_PAGE_SIZE (1UL << S390_PAGE_SHIFT)
> -#define S390_PAGE_MASK (~(S390_PAGE_SIZE-1))
> -
> -#define S390_PGDIR_SHIFT 20
> -#define S390_PGDIR_SIZE (1UL << S390_PGDIR_SHIFT)
> -#define S390_PGDIR_MASK (~(S390_PGDIR_SIZE-1))
> -
> -#define S390_PTRS_PER_PGD 2048
> -#define S390_PTRS_PER_PTE 256
> -
> #define S390_PMD_BASE_MASK (~((1UL<<6)-1))
> #define S390_PT_BASE_MASK S390_PMD_BASE_MASK
> #define S390_PAGE_BASE_MASK (~((1UL<<12)-1))
> @@ -44,26 +33,10 @@
> #define S390_PAGE_INVALID 0x400 /* HW invalid */
> #define S390_PAGE_INVALID_MASK 0x601ULL /* for linux 2.6 */
> #define S390_PAGE_INVALID_NONE 0x401ULL /* for linux 2.6 */
> -#define S390_PAGE_TABLE_LEN 0xf /* only full page-tables */
> -#define S390_PAGE_TABLE_INV 0x20 /* invalid page-table */
>
> #define S390_PTE_INVALID_MASK 0x80000900
> #define S390_PTE_INVALID(x) ((x) & S390_PTE_INVALID_MASK)
>
> -#define S390_PMD_INVALID_MASK 0x80000000
> -#define S390_PMD_INVALID(x) ((x) & S390_PMD_INVALID_MASK)
> -
> -/* pgd/pmd/pte query macros */
> -#define s390_pmd_none(x) ((x) & S390_PAGE_TABLE_INV)
> -#define s390_pmd_bad(x) (((x) & (~S390_PMD_BASE_MASK & \
> - ~S390_PAGE_TABLE_INV)) != \
> - S390_PAGE_TABLE_LEN)
> -
> -#define s390_pte_none(x) (((x) & (S390_PAGE_INVALID | S390_RO_S390 | \
> - S390_PAGE_PRESENT)) == \
> - S390_PAGE_INVALID)
> -
> -
> #define ASYNC_STACK_SIZE STACKSIZE() // can be 4096 or 8192
> #define KERNEL_STACK_SIZE STACKSIZE() // can be 4096 or 8192
>
> @@ -73,8 +46,6 @@
> * declarations of static functions
> */
> static void s390_print_lowcore(char*, struct bt_info*,int);
> -static unsigned long s390_pgd_offset(unsigned long, unsigned long);
> -static unsigned long s390_pte_offset(unsigned long, unsigned long);
> static int s390_kvtop(struct task_context *, ulong, physaddr_t *, int);
> static int s390_uvtop(struct task_context *, ulong, physaddr_t *, int);
> static int s390_vtop(unsigned long, ulong, physaddr_t*, int);
> @@ -292,60 +263,87 @@
> /*
> * page table traversal functions
> */
> -static unsigned long
> -s390_pgd_offset(unsigned long pgd_base, unsigned long vaddr)
> -{
> - unsigned long pgd_off, pmd_base;
>
> - pgd_off = ((vaddr >> S390_PGDIR_SHIFT) & (S390_PTRS_PER_PGD - 1))
> - * S390_WORD_SIZE;
> - readmem(pgd_base + pgd_off, PHYSADDR, &pmd_base,sizeof(long),
> - "pgd_base",FAULT_ON_ERROR);
> - return pmd_base;
> -}
> -
> -unsigned long s390_pte_offset(unsigned long pte_base, unsigned long vaddr)
> +/* Segment table traversal function */
> +static ulong _kl_sg_table_deref_s390(ulong vaddr, ulong table, int len)
> {
> - unsigned pte_off, pte_val;
> + ulong offset, entry;
> +
> + offset = ((vaddr >> 20) & 0x7ffUL) * 4;
> + if (offset >= (len + 1)*64)
> + /* Offset is over the table limit. */
> + return 0;
> + readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
> + FAULT_ON_ERROR);
>
> - pte_off = ((vaddr >> S390_PAGE_SHIFT) & (S390_PTRS_PER_PTE - 1))
> - * S390_WORD_SIZE;
> - readmem(pte_base + pte_off, PHYSADDR, &pte_val, sizeof(long),
> - "pte_val",FAULT_ON_ERROR);
> - return pte_val;
> + /*
> + * Check if the segment table entry could be read and doesn't have
> + * any of the reserved bits set.
> + */
> + if (entry & 0x80000000UL)
> + return 0;
> + /* Check if the segment table entry has the invalid bit set. */
> + if (entry & 0x40UL)
> + return 0;
> + /* Segment table entry is valid and well formed. */
> + return entry;
> +}
> +
> +/* Page table traversal function */
> +static ulong _kl_pg_table_deref_s390(ulong vaddr, ulong table, int len)
> +{
> + ulong offset, entry;
> +
> + offset = ((vaddr >> 12) & 0xffUL) * 4;
> + if (offset >= (len + 1)*64)
> + /* Offset is over the table limit. */
> + return 0;
> + readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
> + FAULT_ON_ERROR);
> + /*
> + * Check if the page table entry could be read and doesn't have
> + * any of the reserved bits set.
> + */
> + if (entry & 0x80000900UL)
> + return 0;
> + /* Check if the page table entry has the invalid bit set. */
> + if (entry & 0x400UL)
> + return 0;
> + /* Page table entry is valid and well formed. */
> + return entry;
> }
>
> -/*
> - * Generic vtop function for user and kernel addresses
> - */
> +/* lookup virtual address in page tables */
> static int
> -s390_vtop(unsigned long pgd_base, ulong kvaddr, physaddr_t *paddr, int verbose)
> +s390_vtop(unsigned long table, ulong vaddr, physaddr_t *phys_addr, int verbose)
> {
> - unsigned pte_base, pte_val;
> + ulong entry, paddr;
> + int len;
>
> - /* get the pgd entry */
> - pte_base = s390_pgd_offset(pgd_base,kvaddr);
> - if(S390_PMD_INVALID(pte_base) ||
> - s390_pmd_bad(pte_base) ||
> - s390_pmd_none(pte_base)) {
> - *paddr = 0;
> - return FALSE;
> - }
> - /* get the pte */
> - pte_base = pte_base & S390_PT_BASE_MASK;
> - pte_val = s390_pte_offset(pte_base,kvaddr);
> - if(S390_PTE_INVALID(pte_val) ||
> - s390_pte_none(pte_val)){
> - *paddr = 0;
> + /*
> + * Get the segment table entry.
> + * We assume that the segment table length field in the asce
> + * is set to the maximum value of 127 (which translates to
> + * a segment table with 2048 entries) and that the addressing
> + * mode is 31 bit.
> + */
> + entry = _kl_sg_table_deref_s390(vaddr, table, 127);
> + if (!entry)
> return FALSE;
> - }
> - if(!s390_pte_present(pte_val)){
> - /* swapped out */
> - *paddr = pte_val;
> + table = entry & 0x7ffffc00UL;
> + len = entry & 0xfUL;
> +
> + /* Get the page table entry */
> + entry = _kl_pg_table_deref_s390(vaddr, table, len);
> + if (!entry)
> return FALSE;
> - }
> - *paddr = (pte_val & S390_PAGE_BASE_MASK) |
> - (kvaddr & (~(S390_PAGE_MASK)));
> +
> + /* Isolate the page origin from the page table entry. */
> + paddr = entry & 0x7ffff000UL;
> +
> + /* Add the page offset and return the final value. */
> + *phys_addr = paddr + (vaddr & 0xfffUL);
> +
> return TRUE;
> }
>
> diff -Naur crash-4.0-4.8/s390x.c crash-4.0-4.8-page-table-walk/s390x.c
> --- crash-4.0-4.8/s390x.c 2007-10-30 16:51:54.000000000 +0100
> +++ crash-4.0-4.8-page-table-walk/s390x.c 2007-11-15 15:44:33.000000000 +0100
> @@ -20,24 +20,6 @@
>
> #define S390X_WORD_SIZE 8
>
> -#define S390X_PAGE_SHIFT 12
> -#define S390X_PAGE_SIZE (1ULL << S390X_PAGE_SHIFT)
> -#define S390X_PAGE_MASK (~(S390X_PAGE_SIZE-1))
> -
> -#define S390X_PGDIR_SHIFT 31
> -#define S390X_PGDIR_SIZE (1ULL << S390X_PGDIR_SHIFT)
> -#define S390X_PGDIR_MASK (~(S390X_PGDIR_SIZE-1))
> -
> -#define S390X_PMD_SHIFT 20
> -#define S390X_PMD_SIZE (1ULL << S390X_PMD_SHIFT)
> -#define S390X_PMD_MASK (~(S390X_PMD_SIZE-1))
> -
> -#define S390X_PTRS_PER_PGD 2048
> -#define S390X_PTRS_PER_PMD 2048
> -#define S390X_PTRS_PER_PTE 256
> -
> -#define S390X_PMD_BASE_MASK (~((1ULL<<12)-1))
> -#define S390X_PT_BASE_MASK (~((1ULL<<11)-1))
> #define S390X_PAGE_BASE_MASK (~((1ULL<<12)-1))
>
> /* Flags used in entries of page dirs and page tables.
> @@ -48,37 +30,11 @@
> #define S390X_PAGE_INVALID 0x400ULL /* HW invalid */
> #define S390X_PAGE_INVALID_MASK 0x601ULL /* for linux 2.6 */
> #define S390X_PAGE_INVALID_NONE 0x401ULL /* for linux 2.6 */
> -#define S390X_PMD_ENTRY_INV 0x20ULL /* invalid segment table entry */
> -#define S390X_PGD_ENTRY_INV 0x20ULL /* invalid region table entry */
> -#define S390X_PMD_ENTRY 0x00
> -#define S390X_PGD_ENTRY_FIRST 0x05 /* first part of pmd is valid */
> -#define S390X_PGD_ENTRY_SECOND 0xc7 /* second part of pmd is valid */
> -#define S390X_PGD_ENTRY_FULL 0x07 /* complete pmd is valid */
>
> /* bits 52, 55 must contain zeroes in a pte */
> #define S390X_PTE_INVALID_MASK 0x900ULL
> #define S390X_PTE_INVALID(x) ((x) & S390X_PTE_INVALID_MASK)
>
> -/* pgd/pmd/pte query macros */
> -#define s390x_pgd_none(x) ((x) & S390X_PGD_ENTRY_INV)
> -#define s390x_pgd_bad(x) !( (((x) & S390X_PGD_ENTRY_FIRST) == \
> - S390X_PGD_ENTRY_FIRST) || \
> - (((x) & S390X_PGD_ENTRY_SECOND) == \
> - S390X_PGD_ENTRY_SECOND) || \
> - (((x) & S390X_PGD_ENTRY_FULL) == \
> - S390X_PGD_ENTRY_FULL))
> -
> -#define s390x_pmd_none(x) ((x) & S390X_PMD_ENTRY_INV)
> -#define s390x_pmd_bad(x) (((x) & (~S390X_PT_BASE_MASK & \
> - ~S390X_PMD_ENTRY_INV)) != \
> - S390X_PMD_ENTRY)
> -
> -#define s390x_pte_none(x) (((x) & (S390X_PAGE_INVALID | \
> - S390X_PAGE_RO | \
> - S390X_PAGE_PRESENT)) == \
> - S390X_PAGE_INVALID)
> -
> -
> #define ASYNC_STACK_SIZE STACKSIZE() // can be 8192 or 16384
> #define KERNEL_STACK_SIZE STACKSIZE() // can be 8192 or 16384
>
> @@ -88,9 +44,6 @@
> * declarations of static functions
> */
> static void s390x_print_lowcore(char*, struct bt_info*,int);
> -static unsigned long s390x_pgd_offset(unsigned long, unsigned long);
> -static unsigned long s390x_pmd_offset(unsigned long, unsigned long);
> -static unsigned long s390x_pte_offset(unsigned long, unsigned long);
> static int s390x_kvtop(struct task_context *, ulong, physaddr_t *, int);
> static int s390x_uvtop(struct task_context *, ulong, physaddr_t *, int);
> static int s390x_vtop(unsigned long, ulong, physaddr_t*, int);
> @@ -304,81 +257,97 @@
> }
> }
>
> -/*
> +/*
> * page table traversal functions
> */
> -unsigned long s390x_pgd_offset(unsigned long pgd_base, unsigned long vaddr)
> -{
> - unsigned long pgd_off, pmd_base;
> -
> - pgd_off = ((vaddr >> S390X_PGDIR_SHIFT) &
> - (S390X_PTRS_PER_PGD - 1)) * 8;
> - readmem(pgd_base + pgd_off, PHYSADDR, &pmd_base, sizeof(long),
> - "pmd_base",FAULT_ON_ERROR);
> -
> - return pmd_base;
> -}
>
> -unsigned long s390x_pmd_offset(unsigned long pmd_base, unsigned long vaddr)
> -{
> - unsigned long pmd_off, pte_base;
> -
> - pmd_off = ((vaddr >> S390X_PMD_SHIFT) & (S390X_PTRS_PER_PMD - 1))
> - * 8;
> - readmem(pmd_base + pmd_off, PHYSADDR, &pte_base, sizeof(long),
> - "pte_base",FAULT_ON_ERROR);
> - return pte_base;
> -}
> -
> -unsigned long s390x_pte_offset(unsigned long pte_base, unsigned long vaddr)
> -{
> - unsigned long pte_off, pte_val;
> -
> - pte_off = ((vaddr >> S390X_PAGE_SHIFT) & (S390X_PTRS_PER_PTE - 1))
> - * 8;
> - readmem(pte_base + pte_off, PHYSADDR, &pte_val, sizeof(long),
> - "pte_val",FAULT_ON_ERROR);
> - return pte_val;
> +/* Region or segment table traversal function */
> +static ulong _kl_rsg_table_deref_s390x(ulong vaddr, ulong table,
> + int len, int level)
> +{
> + ulong offset, entry;
> +
> + offset = ((vaddr >> (11*level + 20)) & 0x7ffULL) * 8;
> + if (offset >= (len + 1)*4096)
> + /* Offset is over the table limit. */
> + return 0;
> + readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
> + FAULT_ON_ERROR);
> + /*
> + * Check if the segment table entry could be read and doesn't have
> + * any of the reserved bits set.
> + */
> + if ((entry & 0xcULL) != (level << 2))
> + return 0;
> + /* Check if the region table entry has the invalid bit set. */
> + if (entry & 0x40ULL)
> + return 0;
> + /* Region table entry is valid and well formed. */
> + return entry;
> }
>
> -/*
> - * Generic vtop function for user and kernel addresses
> - */
> -static int
> -s390x_vtop(unsigned long pgd_base, ulong kvaddr, physaddr_t *paddr, int verbose)
> +/* Page table traversal function */
> +static ulong _kl_pg_table_deref_s390x(ulong vaddr, ulong table)
> {
> - unsigned long pmd_base, pte_base, pte_val;
> + ulong offset, entry;
>
> - /* get the pgd entry */
> - pmd_base = s390x_pgd_offset(pgd_base,kvaddr);
> - if(s390x_pgd_bad(pmd_base) ||
> - s390x_pgd_none(pmd_base)){
> - *paddr = 0;
> + offset = ((vaddr >> 12) & 0xffULL) * 8;
> + readmem(table + offset, KVADDR, &entry, sizeof(entry), "entry",
> + FAULT_ON_ERROR);
> + /*
> + * Check if the page table entry could be read and doesn't have
> + * any of the reserved bits set.
> + */
> + if (entry & 0x900ULL)
> + return 0;
> + /* Check if the page table entry has the invalid bit set. */
> + if (entry & 0x400ULL)
> + return 0;
> + /* Page table entry is valid and well formed. */
> + return entry;
> +}
> +
> +/* lookup virtual address in page tables */
> +int s390x_vtop(ulong table, ulong vaddr, physaddr_t *phys_addr, int verbose)
> +{
> + ulong entry, paddr;
> + int level, len;
> +
> + /*
> + * Walk the region and segment tables.
> + * We assume that the table length field in the asce is set to the
> + * maximum value of 3 (which translates to a region first, region
> + * second, region third or segment table with 2048 entries) and that
> + * the addressing mode is 64 bit.
> + */
> + len = 3;
> + /* Read the first entry to find the number of page table levels. */
> + readmem(table, KVADDR, &entry, sizeof(entry), "entry", FAULT_ON_ERROR);
> + level = (entry & 0xcULL) >> 2;
> + if ((vaddr >> (31 + 11*level)) != 0ULL) {
> + /* Address too big for the number of page table levels. */
> return FALSE;
> }
> - /* get the pmd */
> - pmd_base = pmd_base & S390X_PMD_BASE_MASK;
> - pte_base = s390x_pmd_offset(pmd_base,kvaddr);
> - if(s390x_pmd_bad(pte_base) ||
> - s390x_pmd_none(pte_base)) {
> - *paddr = 0;
> - return FALSE;
> + while (level >= 0) {
> + entry = _kl_rsg_table_deref_s390x(vaddr, table, len, level);
> + if (!entry)
> + return 0;
> + table = entry & ~0xfffULL;
> + len = entry & 0x3ULL;
> + level--;
> }
> - /* get the pte */
> - pte_base = pte_base & S390X_PT_BASE_MASK;
> - pte_val = s390x_pte_offset(pte_base,kvaddr);
> - if (S390X_PTE_INVALID(pte_val) ||
> - s390x_pte_none(pte_val)){
> - *paddr = 0;
> - return FALSE;
> - }
> - if(!s390x_pte_present(pte_val)){
> - /* swapped out */
> - *paddr = pte_val;
> +
> + /* Get the page table entry */
> + entry = _kl_pg_table_deref_s390x(vaddr, entry & ~0x7ffULL);
> + if (!entry)
> return FALSE;
> - }
> - *paddr = (pte_val & S390X_PAGE_BASE_MASK) |
> - (kvaddr & (~(S390X_PAGE_MASK)));
> +
> + /* Isolate the page origin from the page table entry. */
> + paddr = entry & ~0xfffULL;
> +
> + /* Add the page offset and return the final value. */
> + *phys_addr = paddr + (vaddr & 0xfffULL);
> +
> return TRUE;
> }
>
>
>
>
More information about the Crash-utility
mailing list