Isaac Christensen (isaac.christensen@se-eng.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/6993
-gerrit
commit 4cbf25f8eca3a12bbfec5b015953c0fc2b69c877 Author: Julius Werner jwerner@chromium.org Date: Tue Jan 21 20:11:22 2014 -0800
arm: Redesign, clarify and clean up cache related code
This patch changes several cache-related pieces to be cleaner, faster or more correct. The largest point is removing the old arm_invalidate_caches() function and surrounding bootblock code to initialize SCTLR and replace it with an all-assembly function that takes care of cache and SCTLR initialization to bring the system to a known state. It runs without stack and before coreboot makes any write accesses to be as compatible as possible with whatever state the system was left in by preceeding code. This also finally fixes the dreaded icache bug that wasted hundreds of milliseconds during boot.
Old-Change-Id: I7bb4995af8184f6383f8e3b1b870b0662bde8bd4 Signed-off-by: Julius Werner jwerner@chromium.org Reviewed-on: https://chromium-review.googlesource.com/183890 (cherry picked from commit 07a35925dc957919bf88dfc90515971a36e81b97)
nyan_big: apply cache-related changes from nyan
This applies the same changes from 07a3592 that were applied to nyan.
Old-Change-Id: Idcbe85436d7a2f65fcd751954012eb5f4bec0b6c Reviewed-on: https://chromium-review.googlesource.com/184551 Commit-Queue: David Hendricks dhendrix@chromium.org Tested-by: David Hendricks dhendrix@chromium.org Reviewed-by: David Hendricks dhendrix@chromium.org (cherry picked from commit 4af27f02614da41c611aee2c6d175b1b948428ea)
Squashed the followup patch for nyan_big into the original patch.
Change-Id: Id14aef7846355ea2da496e55da227b635aca409e Signed-off-by: Isaac Christensen isaac.christensen@se-eng.com --- payloads/libpayload/arch/arm/cache.c | 78 ++++----------------------- payloads/libpayload/include/arm/arch/cache.h | 19 +------ payloads/libpayload/include/x86/arch/cache.h | 2 +- src/arch/arm/armv4/cache.c | 7 +-- src/arch/arm/armv7/bootblock.S | 2 + src/arch/arm/armv7/bootblock_simple.c | 17 ------ src/arch/arm/armv7/cache.c | 79 ++++------------------------ src/arch/arm/armv7/cpu.S | 31 +++++++++++ src/arch/arm/armv7/mmu.c | 13 +++-- src/arch/arm/include/armv4/arch/cache.h | 7 +-- src/arch/arm/include/armv7/arch/cache.h | 19 +------ src/arch/arm/stages.c | 18 ++----- src/mainboard/google/daisy/mainboard.c | 1 - src/mainboard/google/nyan/romstage.c | 23 +++----- src/mainboard/google/nyan_big/romstage.c | 23 +++----- src/mainboard/google/peach_pit/mainboard.c | 1 - src/soc/samsung/exynos5420/wakeup.c | 1 - 17 files changed, 94 insertions(+), 247 deletions(-)
diff --git a/payloads/libpayload/arch/arm/cache.c b/payloads/libpayload/arch/arm/cache.c index defe640..63715bc 100644 --- a/payloads/libpayload/arch/arm/cache.c +++ b/payloads/libpayload/arch/arm/cache.c @@ -38,26 +38,9 @@
void tlb_invalidate_all(void) { - /* - * FIXME: ARMv7 Architecture Ref. Manual claims that the distinction - * instruction vs. data TLBs is deprecated in ARMv7, however this does - * not seem to be the case as of Cortex-A15. - */ + /* TLBIALL includes dTLB and iTLB on systems that have them. */ tlbiall(); - dtlbiall(); - itlbiall(); - isb(); dsb(); -} - -void icache_invalidate_all(void) -{ - /* - * icache can be entirely invalidated with one operation. - * Note: If branch predictors are architecturally-visible, ICIALLU - * also performs a BPIALL operation (B2-1283 in arch manual) - */ - iciallu(); isb(); }
@@ -135,6 +118,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len) dcache_op_mva(addr, len, OP_DCIMVAC); }
+/* + * CAUTION: This implementation assumes that coreboot never uses non-identity + * page tables for pages containing executed code. If you ever want to violate + * this assumption, have fun figuring out the associated problems on your own. + */ void dcache_mmu_disable(void) { uint32_t sctlr; @@ -150,58 +138,14 @@ void dcache_mmu_enable(void) uint32_t sctlr;
sctlr = read_sctlr(); - dcache_clean_invalidate_all(); sctlr |= SCTLR_C | SCTLR_M; write_sctlr(sctlr); }
-void arm_invalidate_caches(void) +void cache_sync_instructions(void) { - uint32_t clidr; - int level; - - /* Invalidate branch predictor */ - bpiall(); - - /* Iterate thru each cache identified in CLIDR and invalidate */ - clidr = read_clidr(); - for (level = 0; level < 7; level++) { - unsigned int ctype = (clidr >> (level * 3)) & 0x7; - uint32_t csselr; - - switch(ctype) { - case 0x0: - /* no cache */ - break; - case 0x1: - /* icache only */ - csselr = (level << 1) | 1; - write_csselr(csselr); - icache_invalidate_all(); - break; - case 0x2: - case 0x4: - /* dcache only or unified cache */ - csselr = level << 1; - write_csselr(csselr); - dcache_invalidate_all(); - break; - case 0x3: - /* separate icache and dcache */ - csselr = (level << 1) | 1; - write_csselr(csselr); - icache_invalidate_all(); - - csselr = level << 1; - write_csselr(csselr); - dcache_invalidate_all(); - break; - default: - /* reserved */ - break; - } - } - - /* Invalidate TLB */ - tlb_invalidate_all(); + dcache_clean_all(); /* includes trailing DSB (in assembly) */ + iciallu(); /* includes BPIALLU (architecturally) */ + dsb(); + isb(); } diff --git a/payloads/libpayload/include/arm/arch/cache.h b/payloads/libpayload/include/arm/arch/cache.h index 5210dfe..470eb55 100644 --- a/payloads/libpayload/include/arm/arch/cache.h +++ b/payloads/libpayload/include/arm/arch/cache.h @@ -93,18 +93,6 @@ static inline void isb(void) * Low-level TLB maintenance operations */
-/* invalidate entire data TLB */ -static inline void dtlbiall(void) -{ - asm volatile ("mcr p15, 0, %0, c8, c6, 0" : : "r" (0) : "memory"); -} - -/* invalidate entire instruction TLB */ -static inline void itlbiall(void) -{ - asm volatile ("mcr p15, 0, %0, c8, c5, 0" : : "r" (0)); -} - /* invalidate entire unified TLB */ static inline void tlbiall(void) { @@ -313,8 +301,8 @@ void dcache_mmu_disable(void); /* dcache and MMU enable */ void dcache_mmu_enable(void);
-/* icache invalidate all (on current level given by CSSELR) */ -void icache_invalidate_all(void); +/* perform all icache/dcache maintenance needed after loading new code */ +void cache_sync_instructions(void);
/* tlb invalidate all */ void tlb_invalidate_all(void); @@ -323,9 +311,6 @@ void tlb_invalidate_all(void); * Generalized setup/init functions */
-/* invalidate all caches on ARM */ -void arm_invalidate_caches(void); - /* mmu initialization (set page table address, set permissions, etc) */ void mmu_init(void);
diff --git a/payloads/libpayload/include/x86/arch/cache.h b/payloads/libpayload/include/x86/arch/cache.h index 3960488..ffefcdb 100644 --- a/payloads/libpayload/include/x86/arch/cache.h +++ b/payloads/libpayload/include/x86/arch/cache.h @@ -41,6 +41,6 @@ #define dcache_invalidate_by_mva(addr, len) #define dcache_clean_invalidate_all() #define dcache_clean_invalidate_by_mva(addr, len) -#define icache_invalidate_all() +#define cache_sync_instructions()
#endif diff --git a/src/arch/arm/armv4/cache.c b/src/arch/arm/armv4/cache.c index e5cf293..140beee 100644 --- a/src/arch/arm/armv4/cache.c +++ b/src/arch/arm/armv4/cache.c @@ -39,10 +39,6 @@ void tlb_invalidate_all(void) { }
-void icache_invalidate_all(void) -{ -} - void dcache_clean_all(void) { } @@ -82,11 +78,10 @@ void dcache_mmu_disable(void) { }
- void dcache_mmu_enable(void) { }
-void arm_invalidate_caches(void) +void cache_sync_instructions(void) { } diff --git a/src/arch/arm/armv7/bootblock.S b/src/arch/arm/armv7/bootblock.S index 4258caf..9ed5d54 100644 --- a/src/arch/arm/armv7/bootblock.S +++ b/src/arch/arm/armv7/bootblock.S @@ -46,6 +46,8 @@ ENDPROC(_start)
.thumb ENTRY(_thumb_start) + bl arm_init_caches + /* * From Cortex-A Series Programmer's Guide: * Only CPU 0 performs initialization. Other CPUs go into WFI diff --git a/src/arch/arm/armv7/bootblock_simple.c b/src/arch/arm/armv7/bootblock_simple.c index 5cd5970..248ea4e 100644 --- a/src/arch/arm/armv7/bootblock_simple.c +++ b/src/arch/arm/armv7/bootblock_simple.c @@ -32,23 +32,6 @@ void main(void) { const char *stage_name = "fallback/romstage"; void *entry; - uint32_t sctlr; - - /* Globally disable MMU, caches, and branch prediction (these should - * be disabled by default on reset) */ - sctlr = read_sctlr(); - sctlr &= ~(SCTLR_M | SCTLR_C | SCTLR_Z | SCTLR_I); - write_sctlr(sctlr); - - arm_invalidate_caches(); - - /* - * Re-enable icache and branch prediction. MMU and dcache will be - * set up later. - */ - sctlr = read_sctlr(); - sctlr |= SCTLR_Z | SCTLR_I; - write_sctlr(sctlr);
bootblock_cpu_init(); bootblock_mainboard_init(); diff --git a/src/arch/arm/armv7/cache.c b/src/arch/arm/armv7/cache.c index 7db86c8..31819f7 100644 --- a/src/arch/arm/armv7/cache.c +++ b/src/arch/arm/armv7/cache.c @@ -37,26 +37,9 @@
void tlb_invalidate_all(void) { - /* - * FIXME: ARMv7 Architecture Ref. Manual claims that the distinction - * instruction vs. data TLBs is deprecated in ARMv7, however this does - * not seem to be the case as of Cortex-A15. - */ + /* TLBIALL includes dTLB and iTLB on systems that have them. */ tlbiall(); - dtlbiall(); - itlbiall(); - isb(); dsb(); -} - -void icache_invalidate_all(void) -{ - /* - * icache can be entirely invalidated with one operation. - * Note: If branch predictors are architecturally-visible, ICIALLU - * also performs a BPIALL operation (B2-1283 in arch manual) - */ - iciallu(); isb(); }
@@ -133,6 +116,11 @@ void dcache_invalidate_by_mva(void const *addr, size_t len) dcache_op_mva(addr, len, OP_DCIMVAC); }
+/* + * CAUTION: This implementation assumes that coreboot never uses non-identity + * page tables for pages containing executed code. If you ever want to violate + * this assumption, have fun figuring out the associated problems on your own. + */ void dcache_mmu_disable(void) { uint32_t sctlr; @@ -143,64 +131,19 @@ void dcache_mmu_disable(void) write_sctlr(sctlr); }
- void dcache_mmu_enable(void) { uint32_t sctlr;
sctlr = read_sctlr(); - dcache_clean_invalidate_all(); sctlr |= SCTLR_C | SCTLR_M; write_sctlr(sctlr); }
-void arm_invalidate_caches(void) +void cache_sync_instructions(void) { - uint32_t clidr; - int level; - - /* Invalidate branch predictor */ - bpiall(); - - /* Iterate thru each cache identified in CLIDR and invalidate */ - clidr = read_clidr(); - for (level = 0; level < 7; level++) { - unsigned int ctype = (clidr >> (level * 3)) & 0x7; - uint32_t csselr; - - switch(ctype) { - case 0x0: - /* no cache */ - break; - case 0x1: - /* icache only */ - csselr = (level << 1) | 1; - write_csselr(csselr); - icache_invalidate_all(); - break; - case 0x2: - case 0x4: - /* dcache only or unified cache */ - csselr = level << 1; - write_csselr(csselr); - dcache_invalidate_all(); - break; - case 0x3: - /* separate icache and dcache */ - csselr = (level << 1) | 1; - write_csselr(csselr); - icache_invalidate_all(); - - csselr = level << 1; - write_csselr(csselr); - dcache_invalidate_all(); - break; - default: - /* reserved */ - break; - } - } - - /* Invalidate TLB */ - tlb_invalidate_all(); + dcache_clean_all(); /* includes trailing DSB (in assembly) */ + iciallu(); /* includes BPIALLU (architecturally) */ + dsb(); + isb(); } diff --git a/src/arch/arm/armv7/cpu.S b/src/arch/arm/armv7/cpu.S index 29a19e7..5738116 100644 --- a/src/arch/arm/armv7/cpu.S +++ b/src/arch/arm/armv7/cpu.S @@ -104,6 +104,37 @@ bx lr .endm
+/* + * Bring an ARM processor we just gained control of (e.g. from IROM) into a + * known state regarding caches/SCTLR. Completely cleans and invalidates + * icache/dcache, disables MMU and dcache (if active), and enables unaligned + * accesses, icache and branch prediction (if inactive). Clobbers r4 and r5. + */ +ENTRY(arm_init_caches) + /* r4: SCTLR, return address: r5 (stay valid for the whole function) */ + mov r5, lr + mrc p15, 0, r4, c1, c0, 0 + + /* Activate ICache (12) and Branch Prediction (11) already for speed */ + orr r4, # (1 << 11) | (1 << 12) + mcr p15, 0, r4, c1, c0, 0 + + /* Flush and invalidate dcache in ascending order */ + bl dcache_clean_invalidate_all + + /* Deactivate MMU (0), Alignment Check (1) and DCache (2) */ + and r4, # ~(1 << 0) & ~(1 << 1) & ~(1 << 2) + mcr p15, 0, r4, c1, c0, 0 + + /* Invalidate icache and TLB for good measure */ + mcr p15, 0, r0, c7, c5, 0 + mcr p15, 0, r0, c8, c7, 0 + dsb + isb + + bx r5 +ENDPROC(arm_init_caches) + ENTRY(dcache_invalidate_all) dcache_apply_all crm=c6 ENDPROC(dcache_invalidate_all) diff --git a/src/arch/arm/armv7/mmu.c b/src/arch/arm/armv7/mmu.c index cc915a6..d710030 100644 --- a/src/arch/arm/armv7/mmu.c +++ b/src/arch/arm/armv7/mmu.c @@ -106,11 +106,18 @@ void mmu_config_range(unsigned long start_mb, unsigned long size_mb, for (i = start_mb; i < start_mb + size_mb; i++) writel((i << 20) | attr, &ttb_entry[i]);
- /* Flush the page table entries, and old translations from the TLB. */ - for (i = start_mb; i < start_mb + size_mb; i++) { + /* Flush the page table entries from the dcache. */ + for (i = start_mb; i < start_mb + size_mb; i++) dccmvac((uintptr_t)&ttb_entry[i]); + + dsb(); + + /* Invalidate the TLB entries. */ + for (i = start_mb; i < start_mb + size_mb; i++) tlbimvaa(i*MiB); - } + + dsb(); + isb(); }
void mmu_init(void) diff --git a/src/arch/arm/include/armv4/arch/cache.h b/src/arch/arm/include/armv4/arch/cache.h index 6a3f593..e41ff9a 100644 --- a/src/arch/arm/include/armv4/arch/cache.h +++ b/src/arch/arm/include/armv4/arch/cache.h @@ -66,8 +66,8 @@ void dcache_mmu_disable(void); /* dcache and MMU enable */ void dcache_mmu_enable(void);
-/* icache invalidate all (on current level given by CSSELR) */ -void icache_invalidate_all(void); +/* perform all icache/dcache maintenance needed after loading new code */ +void cache_sync_instructions(void);
/* tlb invalidate all */ void tlb_invalidate_all(void); @@ -76,9 +76,6 @@ void tlb_invalidate_all(void); * Generalized setup/init functions */
-/* invalidate all caches on ARM */ -void arm_invalidate_caches(void); - /* mmu initialization (set page table address, set permissions, etc) */ void mmu_init(void);
diff --git a/src/arch/arm/include/armv7/arch/cache.h b/src/arch/arm/include/armv7/arch/cache.h index 5210dfe..470eb55 100644 --- a/src/arch/arm/include/armv7/arch/cache.h +++ b/src/arch/arm/include/armv7/arch/cache.h @@ -93,18 +93,6 @@ static inline void isb(void) * Low-level TLB maintenance operations */
-/* invalidate entire data TLB */ -static inline void dtlbiall(void) -{ - asm volatile ("mcr p15, 0, %0, c8, c6, 0" : : "r" (0) : "memory"); -} - -/* invalidate entire instruction TLB */ -static inline void itlbiall(void) -{ - asm volatile ("mcr p15, 0, %0, c8, c5, 0" : : "r" (0)); -} - /* invalidate entire unified TLB */ static inline void tlbiall(void) { @@ -313,8 +301,8 @@ void dcache_mmu_disable(void); /* dcache and MMU enable */ void dcache_mmu_enable(void);
-/* icache invalidate all (on current level given by CSSELR) */ -void icache_invalidate_all(void); +/* perform all icache/dcache maintenance needed after loading new code */ +void cache_sync_instructions(void);
/* tlb invalidate all */ void tlb_invalidate_all(void); @@ -323,9 +311,6 @@ void tlb_invalidate_all(void); * Generalized setup/init functions */
-/* invalidate all caches on ARM */ -void arm_invalidate_caches(void); - /* mmu initialization (set page table address, set permissions, etc) */ void mmu_init(void);
diff --git a/src/arch/arm/stages.c b/src/arch/arm/stages.c index 38d1b19..47f13fa 100644 --- a/src/arch/arm/stages.c +++ b/src/arch/arm/stages.c @@ -20,12 +20,7 @@ /* * This file contains entry/exit functions for each stage during coreboot * execution (bootblock entry and ramstage exit will depend on external - * loading. - * - * Unlike other files, this one should be compiled with a -m option to - * specify a pre-determined instruction set. This is to ensure consistency - * in the CPU operating mode (ARM or Thumb) when hand-off between stages - * occurs. + * loading). * * Entry points must be placed at the location the previous stage jumps * to (the lowest address in the stage image). This is done by giving @@ -49,13 +44,10 @@ void stage_entry(void) void stage_exit(void *addr) { void (*doit)(void) = addr; - /* make sure any code we installed is written to memory. Not all ARM have - * unified caches. - */ - dcache_clean_all(); - /* Because most stages copy code to memory, it's a safe and hygienic thing - * to flush the icache here. + /* + * Most stages load code so we need to sync caches here. Should maybe + * go into cbfs_load_stage() instead... */ - icache_invalidate_all(); + cache_sync_instructions(); doit(); } diff --git a/src/mainboard/google/daisy/mainboard.c b/src/mainboard/google/daisy/mainboard.c index 0adadb6..8a252b1 100644 --- a/src/mainboard/google/daisy/mainboard.c +++ b/src/mainboard/google/daisy/mainboard.c @@ -334,7 +334,6 @@ static void mainboard_enable(device_t dev) mmu_config_range(DRAM_START, DRAM_SIZE, DCACHE_WRITEBACK); mmu_config_range(DMA_START >> 20, DMA_SIZE >> 20, DCACHE_OFF); mmu_config_range(DRAM_END, 4096 - DRAM_END, DCACHE_OFF); - dcache_invalidate_all(); dcache_mmu_enable();
const unsigned epll_hz = 192000000; diff --git a/src/mainboard/google/nyan/romstage.c b/src/mainboard/google/nyan/romstage.c index ea40388..8d8e6a9 100644 --- a/src/mainboard/google/nyan/romstage.c +++ b/src/mainboard/google/nyan/romstage.c @@ -72,25 +72,12 @@ static void configure_l2actlr(void) write_l2actlr(val); }
-void main(void) +static void __attribute__((noinline)) romstage(void) { #if CONFIG_COLLECT_TIMESTAMPS uint64_t romstage_start_time = timestamp_get(); #endif
- // Globally disable MMU, caches and branch prediction (these should - // already be disabled by default on reset). - uint32_t sctlr = read_sctlr(); - sctlr &= ~(SCTLR_M | SCTLR_C | SCTLR_Z | SCTLR_I); - write_sctlr(sctlr); - - arm_invalidate_caches(); - - // Renable icache and branch prediction. - sctlr = read_sctlr(); - sctlr |= SCTLR_Z | SCTLR_I; - write_sctlr(sctlr); - configure_l2ctlr(); configure_l2actlr();
@@ -104,7 +91,6 @@ void main(void) CONFIG_DRAM_DMA_SIZE >> 20, DCACHE_OFF); mmu_config_range(DRAM_END, 4096 - DRAM_END, DCACHE_OFF); mmu_disable_range(0, 1); - dcache_invalidate_all(); dcache_mmu_enable();
/* For quality of the user experience, it's important to get @@ -140,3 +126,10 @@ void main(void) #endif stage_exit(entry); } + +/* Stub to force arm_init_caches to the top, before any stack/memory accesses */ +void main(void) +{ + asm ("bl arm_init_caches" ::: "r0","r1","r2","r3","r4","r5","ip"); + romstage(); +} diff --git a/src/mainboard/google/nyan_big/romstage.c b/src/mainboard/google/nyan_big/romstage.c index 0e71020..c013d11 100644 --- a/src/mainboard/google/nyan_big/romstage.c +++ b/src/mainboard/google/nyan_big/romstage.c @@ -72,25 +72,12 @@ static void configure_l2actlr(void) write_l2actlr(val); }
-void main(void) +static void __attribute__((noinline)) romstage(void) { #if CONFIG_COLLECT_TIMESTAMPS uint64_t romstage_start_time = timestamp_get(); #endif
- // Globally disable MMU, caches and branch prediction (these should - // already be disabled by default on reset). - uint32_t sctlr = read_sctlr(); - sctlr &= ~(SCTLR_M | SCTLR_C | SCTLR_Z | SCTLR_I); - write_sctlr(sctlr); - - arm_invalidate_caches(); - - // Renable icache and branch prediction. - sctlr = read_sctlr(); - sctlr |= SCTLR_Z | SCTLR_I; - write_sctlr(sctlr); - configure_l2ctlr(); configure_l2actlr();
@@ -104,7 +91,6 @@ void main(void) CONFIG_DRAM_DMA_SIZE >> 20, DCACHE_OFF); mmu_config_range(DRAM_END, 4096 - DRAM_END, DCACHE_OFF); mmu_disable_range(0, 1); - dcache_invalidate_all(); dcache_mmu_enable();
/* For quality of the user experience, it's important to get @@ -140,3 +126,10 @@ void main(void) #endif stage_exit(entry); } + +/* Stub to force arm_init_caches to the top, before any stack/memory accesses */ +void main(void) +{ + asm ("bl arm_init_caches" ::: "r0","r1","r2","r3","r4","r5","ip"); + romstage(); +} diff --git a/src/mainboard/google/peach_pit/mainboard.c b/src/mainboard/google/peach_pit/mainboard.c index 1fb441d..706447a 100644 --- a/src/mainboard/google/peach_pit/mainboard.c +++ b/src/mainboard/google/peach_pit/mainboard.c @@ -469,7 +469,6 @@ static void mainboard_enable(device_t dev) /* set up caching for the DRAM */ mmu_config_range(DRAM_START, DRAM_SIZE, DCACHE_WRITEBACK); mmu_config_range(DMA_START >> 20, DMA_SIZE >> 20, DCACHE_OFF); - tlb_invalidate_all();
const unsigned epll_hz = 192000000; const unsigned sample_rate = 48000; diff --git a/src/soc/samsung/exynos5420/wakeup.c b/src/soc/samsung/exynos5420/wakeup.c index 753afd9..a240717 100644 --- a/src/soc/samsung/exynos5420/wakeup.c +++ b/src/soc/samsung/exynos5420/wakeup.c @@ -29,7 +29,6 @@ void wakeup(void)
power_init(); /* Ensure ps_hold_setup() for early wakeup. */ dcache_mmu_disable(); - icache_invalidate_all(); power_exit_wakeup(); /* Should never return. If we do, reset. */ power_reset();