David Hendricks (dhendrix@chromium.org) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/2729
-gerrit
commit a1223c4ea1ca094341e18ec1737e27b2ce22b2a7 Author: David Hendricks dhendrix@chromium.org Date: Thu Mar 14 15:24:57 2013 -0700
armv7/exynos/snow: set up caches properly
** do not submit **
This (hopefully) sets up caches more carefully than we were doing before. This patch needs a bit more testing before going in.
TODO: - Get rid of imported CP15ISB and isb() macros. They are wrong for ARMv7 -- it was assumed in u-boot that the code would be compiled with -march=armv5 (see comment in armv7.h). - Clean up. Most of this is now done in a generic manner in bootblock_simple, so a lot of earlier code can go away. - Set cache policy explicitly before re-enabling - Replace imported cache routines (and use BSD license to match libpayload?)
Change-Id: I7390981190e3213f4e1431f8e56746545c5cc7c9 Signed-off-by: David Hendricks dhendrix@chromium.org --- src/arch/armv7/bootblock_simple.c | 242 ++++++++++++++++++++++++++++++ src/arch/armv7/include/system.h | 18 ++- src/arch/armv7/lib/Makefile.inc | 1 + src/arch/armv7/lib/cache-cp15.c | 3 +- src/arch/armv7/lib/cache_v7.c | 2 +- src/cpu/samsung/exynos5250/Makefile.inc | 1 - src/cpu/samsung/exynos5250/bootblock.c | 2 + src/cpu/samsung/exynos5250/exynos_cache.c | 8 +- src/mainboard/google/snow/ramstage.c | 15 -- src/mainboard/google/snow/romstage.c | 54 ++++--- 10 files changed, 296 insertions(+), 50 deletions(-)
diff --git a/src/arch/armv7/bootblock_simple.c b/src/arch/armv7/bootblock_simple.c index 0132b87..c5d900d 100644 --- a/src/arch/armv7/bootblock_simple.c +++ b/src/arch/armv7/bootblock_simple.c @@ -24,9 +24,233 @@ #include <arch/stages.h> #include <cbfs.h> #include <console/console.h> +#include <system.h>
#include "stages.c"
+/* FIXME: gross hack to get around src/include/lib.h's romcc limitation. Note: + * returns 0 in the log2(0) case. */ +static unsigned long log2(unsigned long u) +{ + int i = 0; + + while (u >>= 1) + i++; + + return i; +} + +#if 0 +/* + * Set/clear program flow prediction and return the previous state. + */ +static int config_branch_prediction(int set_cr_z) +{ + unsigned int cr; + + /* System Control Register: 11th bit Z Branch prediction enable */ + cr = get_cr(); + set_cr(set_cr_z ? cr | CR_Z : cr & ~CR_Z); + + return cr & CR_Z; +} +#endif + +static inline uint32_t read_clidr(void) +{ + uint32_t val = 0; + asm volatile ("mrc p15, 1, %0, c0, c0, 1" : "=r" (val)); + return val; +} + +static inline uint32_t read_ccsidr(void) +{ + uint32_t val = 0; + asm volatile ("mrc p15, 1, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline uint32_t read_csselr(void) +{ + uint32_t val = 0; + asm volatile ("mrc p15, 2, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +/* Write to Cache Size Selection Register (CSSELR) */ +static inline void write_csselr(uint32_t val) +{ + /* + * Bits [3:1] - Cache level + 1 (0b000 = L1, 0b110 = L7, 0b111 is rsvd) + * Bit 0 - 0 = data or unified cache, 1 = instruction cache + */ + asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r" (val)); + isb(); /* ISB to sync the change in case CCSIDR follows */ +} + +/* Branch predictor invalidate all */ +static inline void bpiall(void) +{ + asm volatile ("mcr p15, 0, %0, c7, c5, 6" : : "r" (0)); +} + +/* Invalidate entire unified TLB */ +static inline void tlbiall(void) +{ + asm volatile ("mcr p15, 0, %0, c8, c7, 0" : : "r" (0)); +} + +/* Instruction cache invalidate all by PoU */ +static inline void iciallu(void) +{ + asm volatile ("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); +} + +/* Data cache invalidate all by set/way */ +static inline void dcisw(uint32_t val) +{ + asm volatile ("mcr p15, 0, %0, c7, c6, 2" : : "r" (val)); +} + +static inline void dsb(void) +{ + asm volatile ("dsb"); +} + +/* FIXME: replace isb() in system.h */ +static inline void myisb(void) +{ + asm volatile ("isb"); +} + +static void clear_icache(unsigned int level) +{ + uint32_t ccselr; + + ccselr = (level << 1) | 1; + write_csselr(ccselr); + + /* icache can be entirely invalidated with one operation. + * Note: If branch predictors are architecturally-visible, ICIALLU + * also performs a BPIALL operation (B2-1283 in arch manual) + */ + iciallu(); + isb(); +} + +#define bitmask(high, low) ((1UL << (high)) + \ + ((1UL << (high)) - 1) - ((1UL << (low)) - 1)) + +static void clear_dcache(unsigned int level) +{ + uint32_t ccsidr, ccselr; + unsigned int associativity, num_sets, linesize_bytes; + unsigned int set, way; + + ccselr = level << 1; + write_csselr(ccselr); + + /* + * dcache must be invalidated by set/way for portability since virtual + * memory mapping is system-defined. The number of sets and + * associativity is given by CCSIDR. We'll use DCISW to invalidate the + * dcache. + */ + ccsidr = read_ccsidr(); + + /* FIXME: rounding up required here? */ + num_sets = ((ccsidr & bitmask(27, 13)) >> 13) + 1; + associativity = ((ccsidr & bitmask(12, 3)) >> 3) + 1; + /* FIXME: do we need to use CTR.DminLine here? */ + linesize_bytes = (1 << ((ccsidr & 0x7) + 2)) * 4; + + /* + * Set/way operations require an interesting bit packing. See section + * B4-35 in the ARMv7 Architecture Reference Manual: + * + * A: Log2(associativity) + * B: L+S + * L: Log2(linesize) + * S: Log2(num_sets) + * + * The bits are packed as follows: + * 31 31-A B B-1 L L-1 4 3 1 0 + * |---|-------------|--------|-------|-----|-| + * |Way| zeros | Set | zeros |level|0| + * |---|-------------|--------|-------|-----|-| + */ + for (way = 0; way < associativity; way++) { + for (set = 0; set < num_sets; set++) { + uint32_t val = 0; + val |= way << (32 - log2(associativity)); + val |= set << log2(linesize_bytes); + val |= level << 1; + dcisw(val); + } + } + + dsb(); +} + +static void armv7_invalidate_caches(void) +{ + uint32_t clidr; + int level; + + /* + * From Cortex-A Series Programmers guide (Example 15-3): + * 1. Disable MMU + * 2. Disable L1 caches (example disables i-cache then d-cache) + * 3. Invalidate L1 caches (same order) + * 4. Invalidate TLB + * + * L2 cache setup will be done by SoC-specific code. MMU setup + * needs to be done after DRAM init in board-specific code. + */ + + /* Invalidate branch predictor */ + bpiall(); + + /* + * Iterate thru each cache identified in CLIDR and invalidate. + */ + clidr = read_clidr(); + for (level = 0; level < 7; level++) { + unsigned int ctype = (clidr >> (level * 3)) & 0x7; + switch(ctype) { + case 0x0: + /* no cache */ + break; + case 0x1: + /* icache only */ + clear_icache(level); + break; + case 0x2: + case 0x4: + /* dcache only or unified cache */ + clear_dcache(level); + break; + case 0x3: + /* separate icache and dcache */ + clear_icache(level); + clear_dcache(level); + break; + default: + /* reserved */ + break; + } + } + + /* Invalidate TLB */ + v7_inval_tlb(); + /* FIXME: ARMv7 Architecture Ref. Manual claims that the distinction + * instruction vs. data TLBs is deprecated in ARMv7. But that doesn't + * really seem true for Cortex-A15? */ +// itlbiall(); +// dtlbiall(); +// tlbiall(); +} + static int boot_cpu(void) { /* @@ -41,6 +265,24 @@ void main(void) { const char *stage_name = "fallback/romstage"; void *entry; + uint32_t sctlr; + + /* Globally disable MMU, caches, and branch prediction (these should + * be disabled by default on reset) */ + sctlr = get_cr(); + sctlr &= ~(CR_M | CR_C | CR_Z | CR_I); + set_cr(sctlr); + + armv7_invalidate_caches(); + + /* + * Re-enable caches and branch prediction. MMU will be set up later. + * Note: If booting from USB, we need to disable branch prediction + * before copying from USB into RAM (FIXME: why?) + */ + sctlr = get_cr(); + sctlr |= CR_C | CR_Z | CR_I; + set_cr(sctlr);
if (boot_cpu()) { bootblock_cpu_init(); diff --git a/src/arch/armv7/include/system.h b/src/arch/armv7/include/system.h index 053df8d..fdafd15 100644 --- a/src/arch/armv7/include/system.h +++ b/src/arch/armv7/include/system.h @@ -61,7 +61,21 @@ static inline void set_cr(unsigned int val) { asm volatile("mcr p15, 0, %0, c1, c0, 0 @ set CR" : : "r" (val) : "cc"); - isb(); + isb(); /* ref: B3.10.2 of ARM Arch Ref. Manual for ARMv7 */ +} + +static inline unsigned int get_acr(void) +{ + unsigned int val; + asm("mrc p15, 0, %0, c1, c0, 1 @ get ACR" : "=r" (val) : : "cc"); + return val; +} + +static inline void set_acr(unsigned int val) +{ + asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set ACR" + : : "r" (val) : "cc"); + isb(); /* ref: B3.10.2 of ARM Arch Ref. Manual for ARMv7 */ }
/* options available for data cache on each page */ @@ -97,6 +111,8 @@ void mmu_page_table_flush(unsigned long start, unsigned long stop);
void mmu_setup(unsigned long start, unsigned long size);
+void v7_inval_tlb(void); + void arm_init_before_mmu(void);
/* diff --git a/src/arch/armv7/lib/Makefile.inc b/src/arch/armv7/lib/Makefile.inc index 508f776..75e6752 100644 --- a/src/arch/armv7/lib/Makefile.inc +++ b/src/arch/armv7/lib/Makefile.inc @@ -14,6 +14,7 @@ ramstage-y += div0.c #ramstage-y += memcpy.S #ramstage-y += memset.S ramstage-y += syslib.c +ramstage-y += cache-cp15.c ramstage-y += cache_v7.c
#FIXME(dhendrix): should this be a config option? diff --git a/src/arch/armv7/lib/cache-cp15.c b/src/arch/armv7/lib/cache-cp15.c index e08ea57..32f3c79 100644 --- a/src/arch/armv7/lib/cache-cp15.c +++ b/src/arch/armv7/lib/cache-cp15.c @@ -123,8 +123,7 @@ inline void mmu_setup(unsigned long start, unsigned long size_mb) int i; u32 reg;
- arm_init_before_mmu(); - +// arm_init_before_mmu(); /* Set up an identity-mapping for all 4GB, rw for everyone */ for (i = 0; i < 4096; i++) set_section_dcache(i, DCACHE_OFF); diff --git a/src/arch/armv7/lib/cache_v7.c b/src/arch/armv7/lib/cache_v7.c index 31072c7..1764351 100644 --- a/src/arch/armv7/lib/cache_v7.c +++ b/src/arch/armv7/lib/cache_v7.c @@ -226,7 +226,7 @@ static void v7_dcache_maint_range(u32 start, u32 stop, u32 range_op) }
/* Invalidate TLB */ -static void v7_inval_tlb(void) +void v7_inval_tlb(void) { /* Invalidate entire unified TLB */ asm volatile ("mcr p15, 0, %0, c8, c7, 0" : : "r" (0)); diff --git a/src/cpu/samsung/exynos5250/Makefile.inc b/src/cpu/samsung/exynos5250/Makefile.inc index 2774b12..961b719 100644 --- a/src/cpu/samsung/exynos5250/Makefile.inc +++ b/src/cpu/samsung/exynos5250/Makefile.inc @@ -30,7 +30,6 @@ ramstage-y += power.c ramstage-y += soc.c ramstage-$(CONFIG_CONSOLE_SERIAL_UART) += uart.c ramstage-y += cpu.c -ramstage-y += exynos_cache.c
#ramstage-$(CONFIG_SATA_AHCI) += sata.c
diff --git a/src/cpu/samsung/exynos5250/bootblock.c b/src/cpu/samsung/exynos5250/bootblock.c index 949468f..4995dc2 100644 --- a/src/cpu/samsung/exynos5250/bootblock.c +++ b/src/cpu/samsung/exynos5250/bootblock.c @@ -17,6 +17,8 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+#include <armv7.h> + void bootblock_cpu_init(void); void bootblock_cpu_init(void) { diff --git a/src/cpu/samsung/exynos5250/exynos_cache.c b/src/cpu/samsung/exynos5250/exynos_cache.c index 7f4effe..87eded5 100644 --- a/src/cpu/samsung/exynos5250/exynos_cache.c +++ b/src/cpu/samsung/exynos5250/exynos_cache.c @@ -34,15 +34,11 @@ enum l2_cache_params { };
-/* FIXME(dhendrix): maybe move this to a romstage-specific file? */ -#ifdef __PRE_RAM__ void enable_caches(void) { - /* Enable D-cache. I-cache is already enabled in start.S */ - /* can't use it anyway -- it has dependencies we have to fix. */ - //dcache_enable(); + icache_enable(CONFIG_SYS_SDRAM_BASE, CONFIG_DRAM_SIZE_MB); + dcache_enable(CONFIG_SYS_SDRAM_BASE, CONFIG_DRAM_SIZE_MB); } -#endif
/* * Set L2 cache parameters diff --git a/src/mainboard/google/snow/ramstage.c b/src/mainboard/google/snow/ramstage.c index e4d53cf..687f9b1 100644 --- a/src/mainboard/google/snow/ramstage.c +++ b/src/mainboard/google/snow/ramstage.c @@ -23,22 +23,10 @@ #include <cpu/samsung/exynos5250/clk.h> #include <cpu/samsung/exynos5250/power.h>
-#include <system.h> /* FIXME: for testing cache */ -static void cp_delay(void) -{ - volatile int i; - - /* copro seems to need some delay between reading and writing */ - for (i = 0; i < 100; i++) - nop(); - asm volatile("" : : : "memory"); -} - static inline uint32_t read_clidr(void) { uint32_t val = 0; asm volatile ("mrc p15, 1, %0, c0, c0, 1" : "=r" (val)); - isb(); return val; }
@@ -46,7 +34,6 @@ static inline uint32_t read_ccsidr(void) { uint32_t val = 0; asm volatile ("mrc p15, 1, %0, c0, c0, 0" : "=r" (val)); - isb(); return val; }
@@ -54,7 +41,6 @@ static inline uint32_t read_csselr(void) { uint32_t val = 0; asm volatile ("mrc p15, 2, %0, c0, c0, 0" : "=r" (val)); - isb(); return val; }
@@ -66,7 +52,6 @@ static inline void write_csselr(uint32_t val) * Bit 0 - 0 = data or unified cache, 1 = instruction cache */ asm volatile ("mcr p15, 2, %0, c0, c0, 0" : : "r" (val)); - isb(); }
#ifndef __mask diff --git a/src/mainboard/google/snow/romstage.c b/src/mainboard/google/snow/romstage.c index ea2feec..0a46f2d 100644 --- a/src/mainboard/google/snow/romstage.c +++ b/src/mainboard/google/snow/romstage.c @@ -20,6 +20,7 @@ #include <types.h> #include <system.h>
+#include <armv7.h> #include <cache.h> #include <cbfs.h> #include <common.h> @@ -52,20 +53,6 @@ static int board_wakeup_permitted(void) } #endif
-/* - * Set/clear program flow prediction and return the previous state. - */ -static int config_branch_prediction(int set_cr_z) -{ - unsigned int cr; - - /* System Control Register: 11th bit Z Branch prediction enable */ - cr = get_cr(); - set_cr(set_cr_z ? cr | CR_Z : cr & ~CR_Z); - - return cr & CR_Z; -} - static void initialize_s5p_mshc(void) { /* MMC0: Fixed, 8 bit mode, connected with GPIO. */ @@ -88,19 +75,33 @@ static void graphics(void) exynos_pinmux_config(PERIPH_ID_DPHPD, 0); }
+static inline void dsb(void) +{ + asm volatile ("dsb"); +} + +/* FIXME: replace isb() from system.h */ +static inline void my_isb(void) +{ + asm volatile ("isb"); +} + void main(void) { struct mem_timings *mem; struct arm_clk_ratios *arm_ratios; int ret; void *entry; - - /* FIXME: if we boot from USB, we need to disable branch prediction - * before copying from USB into RAM */ - config_branch_prediction(1); + unsigned int cr;
clock_set_rate(PERIPH_ID_SPI1, 50000000); /* set spi clock to 50Mhz */
+ /* + * FIXME: Do necessary I2C init so low-level PMIC code doesn't need to. + * Also, we should only call power_init() on cold boot. + */ + power_init(); + /* Clock must be initialized before console_init, otherwise you may need * to re-initialize serial console drivers again. */ mem = get_mem_timings(); @@ -109,12 +110,6 @@ void main(void)
console_init();
- /* - * FIXME: Do necessary I2C init so low-level PMIC code doesn't need to. - * Also, we should only call power_init() on cold boot. - */ - power_init(); - if (!mem) { printk(BIOS_CRIT, "Unable to auto-detect memory timings\n"); while(1); @@ -132,7 +127,18 @@ void main(void) while(1); }
+ /* Set up MMU and caches */ mmu_setup(CONFIG_SYS_SDRAM_BASE, CONFIG_DRAM_SIZE_MB); +// cr = get_cr(); +// cr |= CR_C | CR_I; +// set_cr(cr); + + /* Enable D-side prefetch */ + cr = get_acr(); + cr |= (1 << 2); + set_acr(cr); + dsb(); + my_isb();
initialize_s5p_mshc();