Patrick Rudolph has uploaded this change for review. ( https://review.coreboot.org/24970
Change subject: arch/arm64/armv8/mmu: Add support for 48bit VA ......................................................................
arch/arm64/armv8/mmu: Add support for 48bit VA
The VA space needs to be extended to support 48bit, as on Cavium SoCs the MMIO starts at 1 << 47.
The following changes were done: * Add a Kconfig to define the VA space on platform level and support 64KB granular size for VA > 39. * Decide to use either 4KB granule or 64KB granule depending on supported VA space. * Add helper functions for different page level sizes * Add asserts to make sure that the address is withing the VA * Don't use BLOCK in level1 if not on 4KB granule
Tested on Cavium SoC. Needs tests on other platforms.
Change-Id: I89e6a4809b6b725c3945bad7fce82b0dfee7c262 Signed-off-by: Patrick Rudolph patrick.rudolph@9elements.com --- M src/arch/arm64/armv8/Kconfig M src/arch/arm64/armv8/mmu.c M src/arch/arm64/include/armv8/arch/mmu.h 3 files changed, 120 insertions(+), 34 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/70/24970/1
diff --git a/src/arch/arm64/armv8/Kconfig b/src/arch/arm64/armv8/Kconfig index 2070bea..1797727 100644 --- a/src/arch/arm64/armv8/Kconfig +++ b/src/arch/arm64/armv8/Kconfig @@ -25,3 +25,10 @@ All ARMv8 implementations are downwards-compatible, so this does not need to be changed unless specific features (e.g. new instructions) are used by the SoC's coreboot code. + +config ARCH_MMU_VA_BITS + int + default 33 + help + Specify the amount of virtual address space that has to be supported + by the MMU. \ No newline at end of file diff --git a/src/arch/arm64/armv8/mmu.c b/src/arch/arm64/armv8/mmu.c index 55bd703..9dfbab1 100644 --- a/src/arch/arm64/armv8/mmu.c +++ b/src/arch/arm64/armv8/mmu.c @@ -81,11 +81,43 @@ return attr; }
+static size_t bits_per_level(const size_t level) +{ + switch (level) { + case 1: + return BITS_RESOLVED_PER_LVL_1; + case 2: + return BITS_RESOLVED_PER_LVL_2; + case 3: + return BITS_RESOLVED_PER_LVL_3; + default: + die("Invalid page table level\n"); + return 0; + } +} + +static size_t addr_shift_level(const size_t level) +{ + switch (level) { + case 1: + return L1_ADDR_SHIFT; + case 2: + return L2_ADDR_SHIFT; + case 3: + return L3_ADDR_SHIFT; + default: + die("Invalid page table level\n"); + return 0; + } +} + /* Func : setup_new_table * Desc : Get next free table from TTB and set it up to match old parent entry. */ -static uint64_t *setup_new_table(uint64_t desc, size_t xlat_size) +static uint64_t *setup_new_table(uint64_t desc, size_t xlat_size, size_t level) { + const size_t shift = bits_per_level(level); + while (next_free_table[0] != UNUSED_DESC) { next_free_table += GRANULE_SIZE/sizeof(*next_free_table); if (_ettb - (u8 *)next_free_table <= 0) @@ -95,7 +127,7 @@ void *frame_base = (void *)(desc & XLAT_ADDR_MASK); printk(BIOS_DEBUG, "Backing address range [%p:%p) with new page" " table @%p\n", frame_base, frame_base + - (xlat_size << BITS_RESOLVED_PER_LVL), next_free_table); + (xlat_size << shift), next_free_table);
if (!desc) { memset(next_free_table, 0, GRANULE_SIZE); @@ -118,12 +150,12 @@ * Desc: Check if the table entry is a valid descriptor. If not, initialize new * table, update the entry and return the table addr. If valid, return the addr */ -static uint64_t *get_next_level_table(uint64_t *ptr, size_t xlat_size) +static uint64_t *get_next_level_table(uint64_t *ptr, size_t xlat_size, size_t level) { uint64_t desc = *ptr;
if ((desc & DESC_MASK) != TABLE_DESC) { - uint64_t *new_table = setup_new_table(desc, xlat_size); + uint64_t *new_table = setup_new_table(desc, xlat_size, level); desc = ((uint64_t)new_table) | TABLE_DESC; *ptr = desc; } @@ -151,18 +183,21 @@ /* L1 table lookup * If VA has bits more than L2 can resolve, lookup starts at L1 * Assumption: we don't need L0 table in coreboot */ - if (BITS_PER_VA > L1_ADDR_SHIFT) { + if (CONFIG_ARCH_MMU_VA_BITS > L1_ADDR_SHIFT) { if ((size >= L1_XLAT_SIZE) && - IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) { - /* If block address is aligned and size is greater than + IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT)) && + (GRANULE_SIZE_SHIFT == 12)) { + /* + * If block address is aligned and size is greater than * or equal to size addressed by each L1 entry, we can - * directly store a block desc */ + * directly store a block desc + */ desc = base_addr | BLOCK_DESC | attr; table[l1_index] = desc; /* L2 lookup is not required */ return L1_XLAT_SIZE; } - table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE); + table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE, 2); }
/* L2 table lookup @@ -180,7 +215,7 @@ }
/* L2 entry stores a table descriptor */ - table = get_next_level_table(&table[l2_index], L3_XLAT_SIZE); + table = get_next_level_table(&table[l2_index], L3_XLAT_SIZE, 3);
/* L3 table lookup */ desc = base_addr | PAGE_DESC | attr; @@ -202,19 +237,20 @@ * Desc : Returns the page table entry governing a specific address. */ static uint64_t get_pte(void *addr) { - int shift = BITS_PER_VA > L1_ADDR_SHIFT ? L1_ADDR_SHIFT : L2_ADDR_SHIFT; + size_t level = CONFIG_ARCH_MMU_VA_BITS > L1_ADDR_SHIFT ? 1 : 2; + size_t shift = addr_shift_level(level); + uint64_t *pte = (uint64_t *)_ttb;
while (1) { - int index = ((uintptr_t)addr >> shift) & - ((1UL << BITS_RESOLVED_PER_LVL) - 1); + int index = ((uintptr_t)addr >> shift) & ((1UL << shift) - 1);
if ((pte[index] & DESC_MASK) != TABLE_DESC || shift <= GRANULE_SIZE_SHIFT) return pte[index];
pte = (uint64_t *)(pte[index] & XLAT_ADDR_MASK); - shift -= BITS_RESOLVED_PER_LVL; + shift = addr_shift_level(++level); } }
@@ -229,6 +265,9 @@ uint64_t base_addr = (uintptr_t)start; uint64_t temp_size = size;
+ assert(base_addr < (1UL << CONFIG_ARCH_MMU_VA_BITS)); + assert(size < (1UL << CONFIG_ARCH_MMU_VA_BITS)); + printk(BIOS_INFO, "Mapping address range [%p:%p) as ", start, start + size); print_tag(BIOS_INFO, tag); @@ -252,13 +291,15 @@ */ void mmu_init(void) { + uint32_t flags; + /* Initially mark all table slots unused (first PTE == UNUSED_DESC). */ uint64_t *table = (uint64_t *)_ttb; for (; _ettb - (u8 *)table > 0; table += GRANULE_SIZE/sizeof(*table)) table[0] = UNUSED_DESC;
/* Initialize the root table (L1) to be completely unmapped. */ - uint64_t *root = setup_new_table(INVALID_DESC, L1_XLAT_SIZE); + uint64_t *root = setup_new_table(INVALID_DESC, L1_XLAT_SIZE, 1); assert((u8 *)root == _ttb);
/* Initialize TTBR */ @@ -268,9 +309,35 @@ raw_write_mair_el3(MAIR_ATTRIBUTES);
/* Initialize TCR flags */ - raw_write_tcr_el3(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC | - TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_64GB | - TCR_TBI_USED); + flags = TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC | + TCR_SH0_IS | TCR_TBI_USED; + + if (CONFIG_ARCH_MMU_VA_BITS <= 32) + flags |= TCR_PS_4GB; + else if (CONFIG_ARCH_MMU_VA_BITS <= 36) + flags |= TCR_PS_64GB; + else if (CONFIG_ARCH_MMU_VA_BITS <= 40) + flags |= TCR_PS_1TB; + else if (CONFIG_ARCH_MMU_VA_BITS <= 42) + flags |= TCR_PS_4TB; + else if (CONFIG_ARCH_MMU_VA_BITS <= 44) + flags |= TCR_PS_16TB; + else + flags |= TCR_PS_256TB; + + switch (GRANULE_SIZE_SHIFT) { + case 12: + flags |= TCR_TG0_4KB; + break; + case 14: + flags |= TCR_TG0_16KB; + break; + case 16: + flags |= TCR_TG0_64KB; + break; + } + + raw_write_tcr_el3(flags); }
void mmu_enable(void) diff --git a/src/arch/arm64/include/armv8/arch/mmu.h b/src/arch/arm64/include/armv8/arch/mmu.h index a812073..a21c3b7 100644 --- a/src/arch/arm64/include/armv8/arch/mmu.h +++ b/src/arch/arm64/include/armv8/arch/mmu.h @@ -69,25 +69,37 @@ /* XLAT Table Init Attributes */
#define VA_START 0x0 -#define BITS_PER_VA 33 + +#if (CONFIG_ARCH_MMU_VA_BITS <= 39) /* Granule size of 4KB is being used */ #define GRANULE_SIZE_SHIFT 12 -#define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT) -#define XLAT_ADDR_MASK ((1UL << BITS_PER_VA) - GRANULE_SIZE) -#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1) - -#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3) -#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2) -#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1) -#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0) - -#if BITS_PER_VA > L1_ADDR_SHIFT + BITS_RESOLVED_PER_LVL - #error "BITS_PER_VA too large (we don't have L0 table support)" +#else +/* Granule size of 64KB is being used */ +#define GRANULE_SIZE_SHIFT 16 #endif
-#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT) -#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT) -#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT) +#define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT) +#define XLAT_ADDR_MASK ((1UL << CONFIG_ARCH_MMU_VA_BITS) - GRANULE_SIZE) +#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1) + +#if GRANULE_SIZE_SHIFT == 12 +#define BITS_RESOLVED_PER_LVL_1 9 +#define BITS_RESOLVED_PER_LVL_2 9 +#define BITS_RESOLVED_PER_LVL_3 9 +#elif GRANULE_SIZE_SHIFT == 16 +#define BITS_RESOLVED_PER_LVL_1 6 +#define BITS_RESOLVED_PER_LVL_2 13 +#define BITS_RESOLVED_PER_LVL_3 13 +#endif + +#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL_2 \ + + BITS_RESOLVED_PER_LVL_3) +#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL_3) +#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT) + +#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL_1) - 1) << L1_ADDR_SHIFT) +#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL_2) - 1) << L2_ADDR_SHIFT) +#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL_3) - 1) << L3_ADDR_SHIFT)
/* These macros give the size of the region addressed by each entry of a xlat table at any given level */ @@ -113,7 +125,7 @@ (0xffUL << (BLOCK_INDEX_MEM_NORMAL*8)))
/* TCR attributes */ -#define TCR_TOSZ (64 - BITS_PER_VA) +#define TCR_TOSZ (64 - CONFIG_ARCH_MMU_VA_BITS)
#define TCR_IRGN0_SHIFT 8 #define TCR_IRGN0_NM_NC (0x00 << TCR_IRGN0_SHIFT)