Patrick Rudolph has uploaded this change for review.

View Change

arch/arm64/armv8/mmu: Add support for 48bit VA

The VA space needs to be extended to support 48bit, as on Cavium SoCs
the MMIO starts at 1 << 47.

The following changes were done:
* Add a Kconfig to define the VA space on platform level and support
64KB granular size for VA > 39.
* Decide to use either 4KB granule or 64KB granule depending on
supported VA space.
* Add helper functions for different page level sizes
* Add asserts to make sure that the address is withing the VA
* Don't use BLOCK in level1 if not on 4KB granule

Tested on Cavium SoC.
Needs tests on other platforms.

Change-Id: I89e6a4809b6b725c3945bad7fce82b0dfee7c262
Signed-off-by: Patrick Rudolph <patrick.rudolph@9elements.com>
---
M src/arch/arm64/armv8/Kconfig
M src/arch/arm64/armv8/mmu.c
M src/arch/arm64/include/armv8/arch/mmu.h
3 files changed, 120 insertions(+), 34 deletions(-)

git pull ssh://review.coreboot.org:29418/coreboot refs/changes/70/24970/1
diff --git a/src/arch/arm64/armv8/Kconfig b/src/arch/arm64/armv8/Kconfig
index 2070bea..1797727 100644
--- a/src/arch/arm64/armv8/Kconfig
+++ b/src/arch/arm64/armv8/Kconfig
@@ -25,3 +25,10 @@
All ARMv8 implementations are downwards-compatible, so this does not
need to be changed unless specific features (e.g. new instructions)
are used by the SoC's coreboot code.
+
+config ARCH_MMU_VA_BITS
+ int
+ default 33
+ help
+ Specify the amount of virtual address space that has to be supported
+ by the MMU.
\ No newline at end of file
diff --git a/src/arch/arm64/armv8/mmu.c b/src/arch/arm64/armv8/mmu.c
index 55bd703..9dfbab1 100644
--- a/src/arch/arm64/armv8/mmu.c
+++ b/src/arch/arm64/armv8/mmu.c
@@ -81,11 +81,43 @@
return attr;
}

+static size_t bits_per_level(const size_t level)
+{
+ switch (level) {
+ case 1:
+ return BITS_RESOLVED_PER_LVL_1;
+ case 2:
+ return BITS_RESOLVED_PER_LVL_2;
+ case 3:
+ return BITS_RESOLVED_PER_LVL_3;
+ default:
+ die("Invalid page table level\n");
+ return 0;
+ }
+}
+
+static size_t addr_shift_level(const size_t level)
+{
+ switch (level) {
+ case 1:
+ return L1_ADDR_SHIFT;
+ case 2:
+ return L2_ADDR_SHIFT;
+ case 3:
+ return L3_ADDR_SHIFT;
+ default:
+ die("Invalid page table level\n");
+ return 0;
+ }
+}
+
/* Func : setup_new_table
* Desc : Get next free table from TTB and set it up to match old parent entry.
*/
-static uint64_t *setup_new_table(uint64_t desc, size_t xlat_size)
+static uint64_t *setup_new_table(uint64_t desc, size_t xlat_size, size_t level)
{
+ const size_t shift = bits_per_level(level);
+
while (next_free_table[0] != UNUSED_DESC) {
next_free_table += GRANULE_SIZE/sizeof(*next_free_table);
if (_ettb - (u8 *)next_free_table <= 0)
@@ -95,7 +127,7 @@
void *frame_base = (void *)(desc & XLAT_ADDR_MASK);
printk(BIOS_DEBUG, "Backing address range [%p:%p) with new page"
" table @%p\n", frame_base, frame_base +
- (xlat_size << BITS_RESOLVED_PER_LVL), next_free_table);
+ (xlat_size << shift), next_free_table);

if (!desc) {
memset(next_free_table, 0, GRANULE_SIZE);
@@ -118,12 +150,12 @@
* Desc: Check if the table entry is a valid descriptor. If not, initialize new
* table, update the entry and return the table addr. If valid, return the addr
*/
-static uint64_t *get_next_level_table(uint64_t *ptr, size_t xlat_size)
+static uint64_t *get_next_level_table(uint64_t *ptr, size_t xlat_size, size_t level)
{
uint64_t desc = *ptr;

if ((desc & DESC_MASK) != TABLE_DESC) {
- uint64_t *new_table = setup_new_table(desc, xlat_size);
+ uint64_t *new_table = setup_new_table(desc, xlat_size, level);
desc = ((uint64_t)new_table) | TABLE_DESC;
*ptr = desc;
}
@@ -151,18 +183,21 @@
/* L1 table lookup
* If VA has bits more than L2 can resolve, lookup starts at L1
* Assumption: we don't need L0 table in coreboot */
- if (BITS_PER_VA > L1_ADDR_SHIFT) {
+ if (CONFIG_ARCH_MMU_VA_BITS > L1_ADDR_SHIFT) {
if ((size >= L1_XLAT_SIZE) &&
- IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT))) {
- /* If block address is aligned and size is greater than
+ IS_ALIGNED(base_addr, (1UL << L1_ADDR_SHIFT)) &&
+ (GRANULE_SIZE_SHIFT == 12)) {
+ /*
+ * If block address is aligned and size is greater than
* or equal to size addressed by each L1 entry, we can
- * directly store a block desc */
+ * directly store a block desc
+ */
desc = base_addr | BLOCK_DESC | attr;
table[l1_index] = desc;
/* L2 lookup is not required */
return L1_XLAT_SIZE;
}
- table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE);
+ table = get_next_level_table(&table[l1_index], L2_XLAT_SIZE, 2);
}

/* L2 table lookup
@@ -180,7 +215,7 @@
}

/* L2 entry stores a table descriptor */
- table = get_next_level_table(&table[l2_index], L3_XLAT_SIZE);
+ table = get_next_level_table(&table[l2_index], L3_XLAT_SIZE, 3);

/* L3 table lookup */
desc = base_addr | PAGE_DESC | attr;
@@ -202,19 +237,20 @@
* Desc : Returns the page table entry governing a specific address. */
static uint64_t get_pte(void *addr)
{
- int shift = BITS_PER_VA > L1_ADDR_SHIFT ? L1_ADDR_SHIFT : L2_ADDR_SHIFT;
+ size_t level = CONFIG_ARCH_MMU_VA_BITS > L1_ADDR_SHIFT ? 1 : 2;
+ size_t shift = addr_shift_level(level);
+
uint64_t *pte = (uint64_t *)_ttb;

while (1) {
- int index = ((uintptr_t)addr >> shift) &
- ((1UL << BITS_RESOLVED_PER_LVL) - 1);
+ int index = ((uintptr_t)addr >> shift) & ((1UL << shift) - 1);

if ((pte[index] & DESC_MASK) != TABLE_DESC ||
shift <= GRANULE_SIZE_SHIFT)
return pte[index];

pte = (uint64_t *)(pte[index] & XLAT_ADDR_MASK);
- shift -= BITS_RESOLVED_PER_LVL;
+ shift = addr_shift_level(++level);
}
}

@@ -229,6 +265,9 @@
uint64_t base_addr = (uintptr_t)start;
uint64_t temp_size = size;

+ assert(base_addr < (1UL << CONFIG_ARCH_MMU_VA_BITS));
+ assert(size < (1UL << CONFIG_ARCH_MMU_VA_BITS));
+
printk(BIOS_INFO, "Mapping address range [%p:%p) as ",
start, start + size);
print_tag(BIOS_INFO, tag);
@@ -252,13 +291,15 @@
*/
void mmu_init(void)
{
+ uint32_t flags;
+
/* Initially mark all table slots unused (first PTE == UNUSED_DESC). */
uint64_t *table = (uint64_t *)_ttb;
for (; _ettb - (u8 *)table > 0; table += GRANULE_SIZE/sizeof(*table))
table[0] = UNUSED_DESC;

/* Initialize the root table (L1) to be completely unmapped. */
- uint64_t *root = setup_new_table(INVALID_DESC, L1_XLAT_SIZE);
+ uint64_t *root = setup_new_table(INVALID_DESC, L1_XLAT_SIZE, 1);
assert((u8 *)root == _ttb);

/* Initialize TTBR */
@@ -268,9 +309,35 @@
raw_write_mair_el3(MAIR_ATTRIBUTES);

/* Initialize TCR flags */
- raw_write_tcr_el3(TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC |
- TCR_SH0_IS | TCR_TG0_4KB | TCR_PS_64GB |
- TCR_TBI_USED);
+ flags = TCR_TOSZ | TCR_IRGN0_NM_WBWAC | TCR_ORGN0_NM_WBWAC |
+ TCR_SH0_IS | TCR_TBI_USED;
+
+ if (CONFIG_ARCH_MMU_VA_BITS <= 32)
+ flags |= TCR_PS_4GB;
+ else if (CONFIG_ARCH_MMU_VA_BITS <= 36)
+ flags |= TCR_PS_64GB;
+ else if (CONFIG_ARCH_MMU_VA_BITS <= 40)
+ flags |= TCR_PS_1TB;
+ else if (CONFIG_ARCH_MMU_VA_BITS <= 42)
+ flags |= TCR_PS_4TB;
+ else if (CONFIG_ARCH_MMU_VA_BITS <= 44)
+ flags |= TCR_PS_16TB;
+ else
+ flags |= TCR_PS_256TB;
+
+ switch (GRANULE_SIZE_SHIFT) {
+ case 12:
+ flags |= TCR_TG0_4KB;
+ break;
+ case 14:
+ flags |= TCR_TG0_16KB;
+ break;
+ case 16:
+ flags |= TCR_TG0_64KB;
+ break;
+ }
+
+ raw_write_tcr_el3(flags);
}

void mmu_enable(void)
diff --git a/src/arch/arm64/include/armv8/arch/mmu.h b/src/arch/arm64/include/armv8/arch/mmu.h
index a812073..a21c3b7 100644
--- a/src/arch/arm64/include/armv8/arch/mmu.h
+++ b/src/arch/arm64/include/armv8/arch/mmu.h
@@ -69,25 +69,37 @@
/* XLAT Table Init Attributes */

#define VA_START 0x0
-#define BITS_PER_VA 33
+
+#if (CONFIG_ARCH_MMU_VA_BITS <= 39)
/* Granule size of 4KB is being used */
#define GRANULE_SIZE_SHIFT 12
-#define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT)
-#define XLAT_ADDR_MASK ((1UL << BITS_PER_VA) - GRANULE_SIZE)
-#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1)
-
-#define BITS_RESOLVED_PER_LVL (GRANULE_SIZE_SHIFT - 3)
-#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2)
-#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1)
-#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0)
-
-#if BITS_PER_VA > L1_ADDR_SHIFT + BITS_RESOLVED_PER_LVL
- #error "BITS_PER_VA too large (we don't have L0 table support)"
+#else
+/* Granule size of 64KB is being used */
+#define GRANULE_SIZE_SHIFT 16
#endif

-#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT)
-#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT)
-#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT)
+#define GRANULE_SIZE (1 << GRANULE_SIZE_SHIFT)
+#define XLAT_ADDR_MASK ((1UL << CONFIG_ARCH_MMU_VA_BITS) - GRANULE_SIZE)
+#define GRANULE_SIZE_MASK ((1 << GRANULE_SIZE_SHIFT) - 1)
+
+#if GRANULE_SIZE_SHIFT == 12
+#define BITS_RESOLVED_PER_LVL_1 9
+#define BITS_RESOLVED_PER_LVL_2 9
+#define BITS_RESOLVED_PER_LVL_3 9
+#elif GRANULE_SIZE_SHIFT == 16
+#define BITS_RESOLVED_PER_LVL_1 6
+#define BITS_RESOLVED_PER_LVL_2 13
+#define BITS_RESOLVED_PER_LVL_3 13
+#endif
+
+#define L1_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL_2 \
+ + BITS_RESOLVED_PER_LVL_3)
+#define L2_ADDR_SHIFT (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL_3)
+#define L3_ADDR_SHIFT (GRANULE_SIZE_SHIFT)
+
+#define L1_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL_1) - 1) << L1_ADDR_SHIFT)
+#define L2_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL_2) - 1) << L2_ADDR_SHIFT)
+#define L3_ADDR_MASK (((1UL << BITS_RESOLVED_PER_LVL_3) - 1) << L3_ADDR_SHIFT)

/* These macros give the size of the region addressed by each entry of a xlat
table at any given level */
@@ -113,7 +125,7 @@
(0xffUL << (BLOCK_INDEX_MEM_NORMAL*8)))

/* TCR attributes */
-#define TCR_TOSZ (64 - BITS_PER_VA)
+#define TCR_TOSZ (64 - CONFIG_ARCH_MMU_VA_BITS)

#define TCR_IRGN0_SHIFT 8
#define TCR_IRGN0_NM_NC (0x00 << TCR_IRGN0_SHIFT)

To view, visit change 24970. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: coreboot
Gerrit-Branch: master
Gerrit-MessageType: newchange
Gerrit-Change-Id: I89e6a4809b6b725c3945bad7fce82b0dfee7c262
Gerrit-Change-Number: 24970
Gerrit-PatchSet: 1
Gerrit-Owner: Patrick Rudolph <patrick.rudolph@9elements.com>