From 94f7ff36e521674a02145a3ff04b659c40122ba3 Mon Sep 17 00:00:00 2001 From: Sergey Temerkhanov Date: Wed, 14 Oct 2015 09:55:45 -0700 Subject: [PATCH] armv8: New MMU setup code allowing to use 48+ bits PA/VA This patch adds code which sets up 2-level page tables on ARM64 thus extending available VA space. CPUs implementing 64k translation granule are able to use direct PA-VA mapping of the whole 48 bit address space. It also adds the ability to reset the SCTRL register at the very beginning of execution to avoid interference from stale mappings set up by early firmware/loaders/etc. Signed-off-by: Sergey Temerkhanov Signed-off-by: Radha Mohan Chintakuntla --- arch/arm/cpu/armv8/cache_v8.c | 77 +++++++++++++++++++++++++++++++++++ arch/arm/cpu/armv8/start.S | 36 +++++++++++++++++ arch/arm/include/asm/armv8/mmu.h | 82 +++++++++++++++++++++++++++++++++++--- arch/arm/include/asm/global_data.h | 3 ++ arch/arm/include/asm/system.h | 7 ++++ doc/README.arm64 | 35 +++++++++++++--- 6 files changed, 228 insertions(+), 12 deletions(-) diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index 53bac3b..71f0020 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -12,6 +12,69 @@ DECLARE_GLOBAL_DATA_PTR; #ifndef CONFIG_SYS_DCACHE_OFF + +#ifdef CONFIG_SYS_FULL_VA +static void set_ptl1_entry(u64 index, u64 ptl2_entry) +{ + u64 *pgd = (u64 *)gd->arch.tlb_addr; + u64 value; + + value = ptl2_entry | PTL1_TYPE_TABLE; + pgd[index] = value; +} + +static void set_ptl2_block(u64 ptl1, u64 bfn, u64 address, u64 memory_attrs) +{ + u64 *pmd = (u64 *)ptl1; + u64 value; + + value = address | PTL2_TYPE_BLOCK | PTL2_BLOCK_AF; + value |= memory_attrs; + pmd[bfn] = value; +} + +static struct mm_region mem_map[] = CONFIG_SYS_MEM_MAP; + +#define PTL1_ENTRIES CONFIG_SYS_PTL1_ENTRIES +#define PTL2_ENTRIES CONFIG_SYS_PTL2_ENTRIES + +static void setup_pgtables(void) +{ + int l1_e, l2_e; + unsigned long pmd = 0; + unsigned long address; + + /* Setup the PMD pointers */ + for (l1_e = 0; l1_e < CONFIG_SYS_MEM_MAP_SIZE; l1_e++) { + gd->arch.pmd_addr[l1_e] = gd->arch.tlb_addr + + PTL1_ENTRIES * sizeof(u64); + gd->arch.pmd_addr[l1_e] += PTL2_ENTRIES * sizeof(u64) * l1_e; + gd->arch.pmd_addr[l1_e] = ALIGN(gd->arch.pmd_addr[l1_e], + 0x10000UL); + } + + /* Setup the page tables */ + for (l1_e = 0; l1_e < PTL1_ENTRIES; l1_e++) { + if (mem_map[pmd].base == + (uintptr_t)l1_e << PTL2_BITS) { + set_ptl1_entry(l1_e, gd->arch.pmd_addr[pmd]); + + for (l2_e = 0; l2_e < PTL2_ENTRIES; l2_e++) { + address = mem_map[pmd].base + + (uintptr_t)l2_e * BLOCK_SIZE; + set_ptl2_block(gd->arch.pmd_addr[pmd], l2_e, + address, mem_map[pmd].attrs); + } + + pmd++; + } else { + set_ptl1_entry(l1_e, 0); + } + } +} + +#else + inline void set_pgtable_section(u64 *page_table, u64 index, u64 section, u64 memory_type, u64 attribute) { @@ -30,14 +93,24 @@ inline void set_pgtable_table(u64 *page_table, u64 index, u64 *table_addr) value = (u64)table_addr | PMD_TYPE_TABLE; page_table[index] = value; } +#endif /* to activate the MMU we need to set up virtual memory */ __weak void mmu_setup(void) { +#ifndef CONFIG_SYS_FULL_VA bd_t *bd = gd->bd; u64 *page_table = (u64 *)gd->arch.tlb_addr, i, j; +#endif int el; +#ifdef CONFIG_SYS_FULL_VA + unsigned long coreid = read_mpidr() & CONFIG_COREID_MASK; + + /* Set up page tables only on BSP */ + if (coreid == BSP_COREID) + setup_pgtables(); +#else /* Setup an identity-mapping for all spaces */ for (i = 0; i < (PGTABLE_SIZE >> 3); i++) { set_pgtable_section(page_table, i, i << SECTION_SHIFT, @@ -55,6 +128,7 @@ __weak void mmu_setup(void) } } +#endif /* load TTBR0 */ el = current_el(); if (el == 1) { @@ -154,6 +228,7 @@ u64 *__weak arch_get_page_table(void) { return NULL; } +#ifndef CONFIG_SYS_FULL_VA void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size, enum dcache_option option) { @@ -179,6 +254,8 @@ void mmu_set_region_dcache_behaviour(phys_addr_t start, size_t size, flush_dcache_range(start, end); asm volatile("dsb sy"); } +#endif + #else /* CONFIG_SYS_DCACHE_OFF */ void invalidate_dcache_all(void) diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S index da45d98..2ee60d6 100644 --- a/arch/arm/cpu/armv8/start.S +++ b/arch/arm/cpu/armv8/start.S @@ -43,6 +43,9 @@ _bss_end_ofs: .quad __bss_end - _start reset: +#ifdef CONFIG_SYS_RESET_SCTRL + bl reset_sctrl +#endif /* * Could be EL3/EL2/EL1, Initial State: * Little Endian, MMU Disabled, i/dCache Disabled @@ -99,6 +102,39 @@ master_cpu: bl _main +#ifdef CONFIG_SYS_RESET_SCTRL +reset_sctrl: + switch_el x1, 3f, 2f, 1f +3: + mrs x0, sctlr_el3 + b 0f +2: + mrs x0, sctlr_el2 + b 0f +1: + mrs x0, sctlr_el1 + +0: + ldr x1, =0xfdfffffa + and x0, x0, x1 + + switch_el x1, 6f, 5f, 4f +6: + msr sctlr_el3, x0 + b 7f +5: + msr sctlr_el2, x0 + b 7f +4: + msr sctlr_el1, x0 + +7: + dsb sy + isb + b __asm_invalidate_tlb_all + ret +#endif + /*-----------------------------------------------------------------------*/ WEAK(apply_core_errata) diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h index 2e2a3a8..897f010 100644 --- a/arch/arm/include/asm/armv8/mmu.h +++ b/arch/arm/include/asm/armv8/mmu.h @@ -21,7 +21,13 @@ * The following definitions are related each other, shoud be * calculated specifically. */ + +#ifndef CONFIG_SYS_FULL_VA #define VA_BITS (42) /* 42 bits virtual address */ +#else +#define VA_BITS CONFIG_SYS_VA_BITS +#define PTL2_BITS CONFIG_SYS_PTL2_BITS +#endif /* PAGE_SHIFT determines the page size */ #undef PAGE_SIZE @@ -30,11 +36,18 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) /* - * section address mask and size definitions. + * block/section address mask and size definitions. */ +#ifndef CONFIG_SYS_FULL_VA #define SECTION_SHIFT 29 #define SECTION_SIZE (UL(1) << SECTION_SHIFT) #define SECTION_MASK (~(SECTION_SIZE-1)) +#else +#define BLOCK_SHIFT CONFIG_SYS_BLOCK_SHIFT +#define BLOCK_SIZE (UL(1) << BLOCK_SHIFT) +#define BLOCK_MASK (~(BLOCK_SIZE-1)) +#endif + /***************************************************************/ /* @@ -46,15 +59,54 @@ #define MT_NORMAL_NC 3 #define MT_NORMAL 4 -#define MEMORY_ATTRIBUTES ((0x00 << (MT_DEVICE_NGNRNE*8)) | \ - (0x04 << (MT_DEVICE_NGNRE*8)) | \ - (0x0c << (MT_DEVICE_GRE*8)) | \ - (0x44 << (MT_NORMAL_NC*8)) | \ - (UL(0xff) << (MT_NORMAL*8))) +#define MEMORY_ATTRIBUTES ((0x00 << (MT_DEVICE_NGNRNE * 8)) | \ + (0x04 << (MT_DEVICE_NGNRE * 8)) | \ + (0x0c << (MT_DEVICE_GRE * 8)) | \ + (0x44 << (MT_NORMAL_NC * 8)) | \ + (UL(0xff) << (MT_NORMAL * 8))) /* * Hardware page table definitions. * + */ + +#ifdef CONFIG_SYS_FULL_VA +/* + * Level 1 descriptor (PGD). + */ + +#define PTL1_TYPE_MASK (3 << 0) +#define PTL1_TYPE_TABLE (3 << 0) + +#define PTL1_TABLE_PXN (1UL << 59) +#define PTL1_TABLE_XN (1UL << 60) +#define PTL1_TABLE_AP (1UL << 61) +#define PTL1_TABLE_NS (1UL << 63) + + +/* + * Level 2 descriptor (PMD). + */ + +#define PTL2_TYPE_MASK (3 << 0) +#define PTL2_TYPE_FAULT (0 << 0) +#define PTL2_TYPE_TABLE (3 << 0) +#define PTL2_TYPE_BLOCK (1 << 0) + +/* + * Block + */ +#define PTL2_MEMTYPE(x) ((x) << 2) +#define PTL2_BLOCK_NON_SHARE (0 << 8) +#define PTL2_BLOCK_OUTER_SHARE (2 << 8) +#define PTL2_BLOCK_INNER_SHARE (3 << 8) +#define PTL2_BLOCK_AF (1 << 10) +#define PTL2_BLOCK_NG (1 << 11) +#define PTL2_BLOCK_PXN (UL(1) << 53) +#define PTL2_BLOCK_UXN (UL(1) << 54) + +#else +/* * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (3 << 0) @@ -74,6 +126,8 @@ #define PMD_SECT_PXN (UL(1) << 53) #define PMD_SECT_UXN (UL(1) << 54) +#endif + /* * AttrIndx[2:0] */ @@ -100,9 +154,16 @@ #define TCR_TG0_4K (0 << 14) #define TCR_TG0_64K (1 << 14) #define TCR_TG0_16K (2 << 14) + +#ifndef CONFIG_SYS_FULL_VA #define TCR_EL1_IPS_BITS (UL(3) << 32) /* 42 bits physical address */ #define TCR_EL2_IPS_BITS (3 << 16) /* 42 bits physical address */ #define TCR_EL3_IPS_BITS (3 << 16) /* 42 bits physical address */ +#else +#define TCR_EL1_IPS_BITS CONFIG_SYS_TCR_EL1_IPS_BITS +#define TCR_EL2_IPS_BITS CONFIG_SYS_TCR_EL2_IPS_BITS +#define TCR_EL3_IPS_BITS CONFIG_SYS_TCR_EL3_IPS_BITS +#endif /* PTWs cacheable, inner/outer WBWA and inner shareable */ #define TCR_FLAGS (TCR_TG0_64K | \ @@ -116,6 +177,7 @@ #define TCR_EL3_RSVD (1 << 31 | 1 << 23) #ifndef __ASSEMBLY__ +#ifndef CONFIG_SYS_FULL_VA void set_pgtable_section(u64 *page_table, u64 index, u64 section, u64 memory_type, @@ -123,6 +185,7 @@ void set_pgtable_section(u64 *page_table, u64 index, void set_pgtable_table(u64 *page_table, u64 index, u64 *table_addr); +#endif static inline void set_ttbr_tcr_mair(int el, u64 table, u64 tcr, u64 attr) { asm volatile("dsb sy"); @@ -143,5 +206,12 @@ static inline void set_ttbr_tcr_mair(int el, u64 table, u64 tcr, u64 attr) } asm volatile("isb"); } + +struct mm_region { + u64 base; + u64 size; + u64 attrs; +}; #endif + #endif /* _ASM_ARMV8_MMU_H_ */ diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h index bd27281..dcfa098 100644 --- a/arch/arm/include/asm/global_data.h +++ b/arch/arm/include/asm/global_data.h @@ -38,6 +38,9 @@ struct arch_global_data { unsigned long long timer_reset_value; #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF)) unsigned long tlb_addr; +#if defined(CONFIG_SYS_FULL_VA) + unsigned long pmd_addr[CONFIG_SYS_PTL1_ENTRIES]; +#endif unsigned long tlb_size; #endif diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 5d66fa0..393e7af 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -14,7 +14,12 @@ #define CR_WXN (1 << 19) /* Write Permision Imply XN */ #define CR_EE (1 << 25) /* Exception (Big) Endian */ +#ifndef CONFIG_SYS_FULL_VA #define PGTABLE_SIZE (0x10000) +#else +#define PGTABLE_SIZE CONFIG_SYS_PGTABLE_SIZE +#endif + /* 2MB granularity */ #define MMU_SECTION_SHIFT 21 #define MMU_SECTION_SIZE (1 << MMU_SECTION_SHIFT) @@ -148,7 +153,9 @@ void flush_l3_cache(void); #define CR_AFE (1 << 29) /* Access flag enable */ #define CR_TE (1 << 30) /* Thumb exception enable */ +#ifndef PGTABLE_SIZE #define PGTABLE_SIZE (4096 * 4) +#endif /* * This is used to ensure the compiler did actually allocate the register we diff --git a/doc/README.arm64 b/doc/README.arm64 index 75586db..f32108f 100644 --- a/doc/README.arm64 +++ b/doc/README.arm64 @@ -36,11 +36,34 @@ Notes 6. CONFIG_ARM64 instead of CONFIG_ARMV8 is used to distinguish aarch64 and aarch32 specific codes. +7. CONFIG_SYS_FULL_VA is used to enable 2-level page tables. For cores + supporting 64k pages it allows usage of full 48+ virtual/physical addresses + + Enabling this option requires the following ones to be defined: + - CONFIG_SYS_MEM_MAP - an array of 'struct mm_region' describing the + system memory map (start, length, attributes) + - CONFIG_SYS_MEM_MAP_SIZE - number of entries in CONFIG_SYS_MEM_MAP + - CONFIG_SYS_PTL1_ENTRIES - number of 1st level page table entries + - CONFIG_SYS_PTL2_ENTRIES - number of 1nd level page table entries + for the largest CONFIG_SYS_MEM_MAP entry + - CONFIG_COREID_MASK - the mask value used to get the core from the + MPIDR_EL1 register + - CONFIG_SYS_PTL2_BITS - number of bits addressed by the 2nd level + page tables + - CONFIG_SYS_BLOCK_SHIFT - number of bits addressed by a single block + entry from L2 page tables + - CONFIG_SYS_PGTABLE_SIZE - total size of the page table + - CONFIG_SYS_TCR_EL{1,2,3}_IPS_BITS - the IPS field of the TCR_EL{1,2,3} + + + + Contributor =========== - Tom Rini - Scott Wood - York Sun - Simon Glass - Sharma Bhupesh - Rob Herring + Tom Rini + Scott Wood + York Sun + Simon Glass + Sharma Bhupesh + Rob Herring + Sergey Temerkhanov