Paul Menzel (paulepanter(a)users.sourceforge.net) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/6969
-gerrit
commit 20b8079f1ae5ea6588e95c6029cef3aeb0d37fe1
Author: Isaac Christensen <isaac.christensen(a)se-eng.com>
Date: Wed Sep 24 14:59:32 2014 -0600
x86/mtrr: Enable MTRR's before enabling caching
Fix up the following commit by enabling the MTRR's before enabling caching.
7756fe7 x86: Minimize work done with the caches disabled in mtrr functions.
Also fix two typos in comments.
Change-Id: If751b815f9dab781fc38c898cf692f0940c57695
Signed-off-by: Isaac Christensen <isaac.christensen(a)se-eng.com>
---
src/cpu/x86/mtrr/mtrr.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/cpu/x86/mtrr/mtrr.c b/src/cpu/x86/mtrr/mtrr.c
index e0392f7..69cd2d2 100644
--- a/src/cpu/x86/mtrr/mtrr.c
+++ b/src/cpu/x86/mtrr/mtrr.c
@@ -51,8 +51,8 @@
#define OS_MTRRS 2
#define MTRRS (BIOS_MTRRS + OS_MTRRS)
/*
- * Static storage size for variable MTRRs. Its sized sufficiently large to
- * handle different types of CPUs. Empiricially, 16 variable MTRRs has not
+ * Static storage size for variable MTRRs. It's sized sufficiently large to
+ * handle different types of CPUs. Empirically, 16 variable MTRRs has not
* yet been observed.
*/
#define NUM_MTRR_STATIC_STORAGE 16
@@ -769,7 +769,7 @@ static void commit_var_mtrrs(const struct var_mtrr_solution *sol)
{
int i;
- /* Write out the variable MTTRs. */
+ /* Write out the variable MTRRs. */
disable_cache();
for (i = 0; i < sol->num_used; i++) {
wrmsr(MTRRphysBase_MSR(i), sol->regs[i].base);
@@ -778,6 +778,7 @@ static void commit_var_mtrrs(const struct var_mtrr_solution *sol)
/* Clear the ones that are unused. */
for (; i < total_mtrrs; i++)
clear_var_mtrr(i);
+ enable_var_mtrr(sol->mtrr_default_type);
enable_cache();
}
@@ -800,7 +801,6 @@ void x86_setup_var_mtrrs(unsigned int address_bits, unsigned int above4gb)
}
commit_var_mtrrs(sol);
- enable_var_mtrr(sol->mtrr_default_type);
}
void x86_setup_mtrrs(void)
Isaac Christensen (isaac.christensen(a)se-eng.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/6931
-gerrit
commit aa2eb4347c4a19458a5e33429c064d49b7171085
Author: Julius Werner <jwerner(a)chromium.org>
Date: Mon Jan 13 11:13:23 2014 -0800
arm: Import armv7_dcache_wbinv_all function from NetBSD
This patch pulls in NetBSD's full cache flushing algorithm for ARM, to
replace our old, slow and slightly overzealous C-only implementation.
It's a beautiful piece of code that manages to run on only caller-saved
registers (meaning it doesn't need to write to memory) in a very tight
loop, and it's BSD-licensed to boot (which we need for libpayload).
Unfortunately it's also not quite correct, but I can fix that. Pulling
the original in a separate commit to make it more obvious what changes
are mine.
Change-Id: I7a71c9e570866a6e25f756cb09ae2b6445048d83
Signed-off-by: Julius Werner <jwerner(a)chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/183878
Reviewed-by: Stefan Reinauer <reinauer(a)google.com>
Reviewed-by: Vincent Palatin <vpalatin(a)chromium.org>
Reviewed-by: David Hendricks <dhendrix(a)chromium.org>
(cherry picked from commit 4698467320613d7ddc39714f40aacbc990af9399)
Signed-off-by: Isaac Christensen <isaac.christensen(a)se-eng.com>
---
src/arch/arm/armv7/cpu.S | 99 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 99 insertions(+)
diff --git a/src/arch/arm/armv7/cpu.S b/src/arch/arm/armv7/cpu.S
new file mode 100644
index 0000000..42e2354
--- /dev/null
+++ b/src/arch/arm/armv7/cpu.S
@@ -0,0 +1,99 @@
+/*
+ * Optimized assembly for low-level CPU operations on ARMv7 processors.
+ *
+ * Cache flushing code based off sys/arch/arm/arm/cpufunc_asm_armv7.S in NetBSD
+ *
+ * Copyright (c) 2010 Per Odlund <per.odlund(a)armagedon.se>
+ * Copyright (c) 2014 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * These work very hard to not push registers onto the stack and to limit themselves
+ * to use r0-r3 and ip.
+ */
+
+/* * LINTSTUB: void armv7_dcache_wbinv_all(void); */
+ENTRY_NP(armv7_dcache_wbinv_all)
+ mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
+ ands r3, r0, #0x07000000
+ beq .Ldone_wbinv
+ lsr r3, r3, #23 @ left align loc (low 4 bits)
+
+ mov r1, #0
+.Lstart_wbinv:
+ add r2, r3, r3, lsr #1 @ r2 = level * 3 / 2
+ mov r1, r0, lsr r2 @ r1 = cache type
+ bfc r1, #3, #28
+ cmp r1, #2 @ is it data or i&d?
+ blt .Lnext_level_wbinv @ nope, skip level
+
+ mcr p15, 2, r3, c0, c0, 0 @ select cache level
+ isb
+ mrc p15, 1, r0, c0, c0, 0 @ read CCSIDR
+
+ ubfx ip, r0, #0, #3 @ get linesize from CCSIDR
+ add ip, ip, #4 @ apply bias
+ ubfx r2, r0, #13, #15 @ get numsets - 1 from CCSIDR
+ lsl r2, r2, ip @ shift to set position
+ orr r3, r3, r2 @ merge set into way/set/level
+ mov r1, #1
+ lsl r1, r1, ip @ r1 = set decr
+
+ ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR
+ clz r2, ip @ number of bits to MSB of way
+ lsl ip, ip, r2 @ shift by that into way position
+ mov r0, #1 @
+ lsl r2, r0, r2 @ r2 now contains the way decr
+ mov r0, r3 @ get sets/level (no way yet)
+ orr r3, r3, ip @ merge way into way/set/level
+ bfc r0, #0, #4 @ clear low 4 bits (level) to get numset - 1
+ sub r2, r2, r0 @ subtract from way decr
+
+ /* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
+1: mcr p15, 0, r3, c7, c14, 2 @ writeback and invalidate line
+ cmp r3, #15 @ are we done with this level (way/set == 0)
+ bls .Lnext_level_wbinv @ yes, go to next level
+ lsl r0, r3, #10 @ clear way bits leaving only set/level bits
+ lsr r0, r0, #4 @ clear level bits leaving only set bits
+ subne r3, r3, r1 @ non-zero?, decrement set #
+ subeq r3, r3, r2 @ zero?, decrement way # and restore set count
+ b 1b
+
+.Lnext_level_wbinv:
+ mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
+ and ip, r0, #0x07000000 @ narrow to LoC
+ lsr ip, ip, #23 @ left align LoC (low 4 bits)
+ add r3, r3, #2 @ go to next level
+ cmp r3, ip @ compare
+ blt .Lstart_wbinv @ not done, next level (r0 == CLIDR)
+
+.Ldone_wbinv:
+ mov r0, #0 @ default back to cache level 0
+ mcr p15, 2, r0, c0, c0, 0 @ select cache level
+ dsb
+ isb
+ bx lr
+END(armv7_dcache_wbinv_all)
Isaac Christensen (isaac.christensen(a)se-eng.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/6932
-gerrit
commit 0833a3566ba686595bfae103b812d3da79f149b7
Author: Julius Werner <jwerner(a)chromium.org>
Date: Wed Jan 15 14:13:25 2014 -0800
arm: Fix up new cache flush algorithm and replace dcache_*_all() with it
This patch fixes the remaining few bugs in our shiny new cache iteration
by set/way/level algorithm to actually make it work: It makes it start
from cache level 0 (previously it would always start at LoC and be
"done" instantly), fixes up the two shifts that isolate the set bits at
the end (which didn't seem to account for the fact that the first shift
affects the second), and throws an S bit on that last shift so that it
actually affects the conditionals after it.
In addition, also moves the next_level block to the top so that we can
share (and thus eliminate) some code at initialization, and turns the
whole thing into a thrice-instantiated macro to create functions that
fit our existing interface.
Change-Id: I1338a589cbb37d74ea6e7a3d4f67ff827e24edbe
Signed-off-by: Julius Werner <jwerner(a)chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/183879
Reviewed-by: Stefan Reinauer <reinauer(a)google.com>
Reviewed-by: David Hendricks <dhendrix(a)chromium.org>
(cherry picked from commit 6d94f8330191c316fe093ddb5288329453da8a4b)
Signed-off-by: Isaac Christensen <isaac.christensen(a)se-eng.com>
---
payloads/libpayload/arch/arm/Makefile.inc | 2 +-
payloads/libpayload/arch/arm/cache.c | 125 ------------------------------
payloads/libpayload/arch/arm/cpu.S | 117 ++++++++++++++++++++++++++++
src/arch/arm/armv7/Makefile.inc | 3 +
src/arch/arm/armv7/cache.c | 125 ------------------------------
src/arch/arm/armv7/cpu.S | 72 ++++++++++-------
6 files changed, 166 insertions(+), 278 deletions(-)
diff --git a/payloads/libpayload/arch/arm/Makefile.inc b/payloads/libpayload/arch/arm/Makefile.inc
index 53a52df..0078ffe 100644
--- a/payloads/libpayload/arch/arm/Makefile.inc
+++ b/payloads/libpayload/arch/arm/Makefile.inc
@@ -35,5 +35,5 @@ libc-y += timer.c coreboot.c util.S
libc-y += virtual.c
libc-y += memcpy.S memset.S memmove.S
libc-y += exception_asm.S exception.c
-libc-y += cache.c
+libc-y += cache.c cpu.S
libcbfs-$(CONFIG_LP_CBFS) += dummy_media.c
diff --git a/payloads/libpayload/arch/arm/cache.c b/payloads/libpayload/arch/arm/cache.c
index 4c222ea..defe640 100644
--- a/payloads/libpayload/arch/arm/cache.c
+++ b/payloads/libpayload/arch/arm/cache.c
@@ -36,21 +36,6 @@
#include <arch/cache.h>
#include <arch/virtual.h>
-#define bitmask(high, low) ((1UL << (high)) + \
- ((1UL << (high)) - 1) - ((1UL << (low)) - 1))
-
-/* Basic log2() implementation. Note: log2(0) is 0 for our purposes. */
-/* FIXME: src/include/lib.h is difficult to work with due to romcc */
-static unsigned long log2(unsigned long u)
-{
- int i = 0;
-
- while (u >>= 1)
- i++;
-
- return i;
-}
-
void tlb_invalidate_all(void)
{
/*
@@ -85,116 +70,6 @@ enum dcache_op {
OP_DCIMVAC,
};
-/*
- * Do a dcache operation on entire cache by set/way. This is done for
- * portability because mapping of memory address to cache location is
- * implementation defined (See note on "Requirements for operations by
- * set/way" in arch ref. manual).
- */
-static void dcache_op_set_way(enum dcache_op op)
-{
- uint32_t ccsidr;
- unsigned int associativity, num_sets, linesize_bytes;
- unsigned int set, way;
- unsigned int level;
-
- level = (read_csselr() >> 1) & 0x7;
-
- /*
- * dcache must be invalidated by set/way for portability since virtual
- * memory mapping is system-defined. The number of sets and
- * associativity is given by CCSIDR. We'll use DCISW to invalidate the
- * dcache.
- */
- ccsidr = read_ccsidr();
-
- /* FIXME: rounding up required here? */
- num_sets = ((ccsidr & bitmask(27, 13)) >> 13) + 1;
- associativity = ((ccsidr & bitmask(12, 3)) >> 3) + 1;
- /* FIXME: do we need to use CTR.DminLine here? */
- linesize_bytes = (1 << ((ccsidr & 0x7) + 2)) * 4;
-
- dsb();
-
- /*
- * Set/way operations require an interesting bit packing. See section
- * B4-35 in the ARMv7 Architecture Reference Manual:
- *
- * A: Log2(associativity)
- * B: L+S
- * L: Log2(linesize)
- * S: Log2(num_sets)
- *
- * The bits are packed as follows:
- * 31 31-A B B-1 L L-1 4 3 1 0
- * |---|-------------|--------|-------|-----|-|
- * |Way| zeros | Set | zeros |level|0|
- * |---|-------------|--------|-------|-----|-|
- */
- for (way = 0; way < associativity; way++) {
- for (set = 0; set < num_sets; set++) {
- uint32_t val = 0;
- val |= way << (32 - log2(associativity));
- val |= set << log2(linesize_bytes);
- val |= level << 1;
- switch(op) {
- case OP_DCCISW:
- dccisw(val);
- break;
- case OP_DCISW:
- dcisw(val);
- break;
- case OP_DCCSW:
- dccsw(val);
- break;
- default:
- break;
- }
- }
- }
- isb();
-}
-
-static void dcache_foreach(enum dcache_op op)
-{
- uint32_t clidr;
- int level;
-
- clidr = read_clidr();
- for (level = 0; level < 7; level++) {
- unsigned int ctype = (clidr >> (level * 3)) & 0x7;
- uint32_t csselr;
-
- switch(ctype) {
- case 0x2:
- case 0x3:
- case 0x4:
- csselr = level << 1;
- write_csselr(csselr);
- dcache_op_set_way(op);
- break;
- default:
- /* no cache, icache only, or reserved */
- break;
- }
- }
-}
-
-void dcache_clean_all(void)
-{
- dcache_foreach(OP_DCCSW);
-}
-
-void dcache_clean_invalidate_all(void)
-{
- dcache_foreach(OP_DCCISW);
-}
-
-void dcache_invalidate_all(void)
-{
- dcache_foreach(OP_DCISW);
-}
-
unsigned int dcache_line_bytes(void)
{
uint32_t ccsidr;
diff --git a/payloads/libpayload/arch/arm/cpu.S b/payloads/libpayload/arch/arm/cpu.S
new file mode 100644
index 0000000..29a19e7
--- /dev/null
+++ b/payloads/libpayload/arch/arm/cpu.S
@@ -0,0 +1,117 @@
+/*
+ * Optimized assembly for low-level CPU operations on ARMv7 processors.
+ *
+ * Cache flushing code based off sys/arch/arm/arm/cpufunc_asm_armv7.S in NetBSD
+ *
+ * Copyright (c) 2010 Per Odlund <per.odlund(a)armagedon.se>
+ * Copyright (c) 2014 Google Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <arch/asm.h>
+
+/*
+ * Dcache invalidations by set/way work by passing a [way:sbz:set:sbz:level:0]
+ * bitfield in a register to the appropriate MCR instruction. This algorithm
+ * works by initializing a bitfield with the highest-numbered set and way, and
+ * generating a "set decrement" and a "way decrement". The former just contains
+ * the LSB of the set field, but the latter contains the LSB of the way field
+ * minus the highest valid set field... such that when you subtract it from a
+ * [way:0:level] field you end up with a [way - 1:highest_set:level] field
+ * through the magic of double subtraction. It's quite ingenius, really.
+ * Takes care to only use r0-r3 and ip so it's pefectly ABI-compatible without
+ * needing to write to memory.
+ */
+
+.macro dcache_apply_all crm
+ dsb
+ mov r3, #-2 @ initialize level so that we start at 0
+
+1: @next_level
+ add r3, r3, #2 @ increment level
+
+ mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
+ and ip, r0, #0x07000000 @ narrow to LoC
+ lsr ip, ip, #23 @ left align LoC (low 4 bits)
+ cmp r3, ip @ compare
+ bge 3f @done @ else fall through (r0 == CLIDR)
+
+ add r2, r3, r3, lsr #1 @ r2 = (level << 1) * 3 / 2
+ mov r1, r0, lsr r2 @ r1 = cache type
+ bfc r1, #3, #28
+ cmp r1, #2 @ is it data or i&d?
+ blt 1b @next_level @ nope, skip level
+
+ mcr p15, 2, r3, c0, c0, 0 @ select cache level
+ isb
+ mrc p15, 1, r0, c0, c0, 0 @ read CCSIDR
+
+ ubfx ip, r0, #0, #3 @ get linesize from CCSIDR
+ add ip, ip, #4 @ apply bias
+ ubfx r2, r0, #13, #15 @ get numsets - 1 from CCSIDR
+ lsl r2, r2, ip @ shift to set position
+ orr r3, r3, r2 @ merge set into way/set/level
+ mov r1, #1
+ lsl r1, r1, ip @ r1 = set decr
+
+ ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR
+ clz r2, ip @ number of bits to MSB of way
+ lsl ip, ip, r2 @ shift by that into way position
+ mov r0, #1
+ lsl r2, r0, r2 @ r2 now contains the way decr
+ mov r0, r3 @ get sets/level (no way yet)
+ orr r3, r3, ip @ merge way into way/set/level
+ bfc r0, #0, #4 @ clear low 4 bits (level) to get numset - 1
+ sub r2, r2, r0 @ subtract from way decr
+
+ /* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
+2: mcr p15, 0, r3, c7, \crm, 2 @ writeback and/or invalidate line
+ cmp r3, #15 @ are we done with this level (way/set == 0)
+ bls 1b @next_level @ yes, go to next level
+ lsr r0, r3, #4 @ clear level bits leaving only way/set bits
+ lsls r0, r0, #14 @ clear way bits leaving only set bits
+ subne r3, r3, r1 @ non-zero?, decrement set #
+ subeq r3, r3, r2 @ zero?, decrement way # and restore set count
+ b 2b
+
+3: @done
+ mov r0, #0 @ default back to cache level 0
+ mcr p15, 2, r0, c0, c0, 0 @ select cache level
+ dsb
+ isb
+ bx lr
+.endm
+
+ENTRY(dcache_invalidate_all)
+ dcache_apply_all crm=c6
+ENDPROC(dcache_invalidate_all)
+
+ENTRY(dcache_clean_all)
+ dcache_apply_all crm=c10
+ENDPROC(dcache_clean_all)
+
+ENTRY(dcache_clean_invalidate_all)
+ dcache_apply_all crm=c14
+ENDPROC(dcache_clean_invalidate_all)
diff --git a/src/arch/arm/armv7/Makefile.inc b/src/arch/arm/armv7/Makefile.inc
index 033d498..c46fb39 100644
--- a/src/arch/arm/armv7/Makefile.inc
+++ b/src/arch/arm/armv7/Makefile.inc
@@ -34,6 +34,7 @@ bootblock-$(CONFIG_BOOTBLOCK_SIMPLE) += bootblock_simple.c
endif
bootblock-y += cache.c
+bootblock-y += cpu.S
bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception.c
bootblock-$(CONFIG_BOOTBLOCK_CONSOLE) += exception_asm.S
bootblock-y += mmu.c
@@ -50,6 +51,7 @@ endif # CONFIG_ARCH_BOOTBLOCK_ARMV7
ifeq ($(CONFIG_ARCH_ROMSTAGE_ARMV7),y)
romstage-y += cache.c
+romstage-y += cpu.S
romstage-y += exception.c
romstage-y += exception_asm.S
romstage-y += mmu.c
@@ -66,6 +68,7 @@ endif # CONFIG_ARCH_ROMSTAGE_ARMV7
ifeq ($(CONFIG_ARCH_RAMSTAGE_ARMV7),y)
ramstage-y += cache.c
+ramstage-y += cpu.S
ramstage-y += exception.c
ramstage-y += exception_asm.S
ramstage-y += mmu.c
diff --git a/src/arch/arm/armv7/cache.c b/src/arch/arm/armv7/cache.c
index 4ee2687..7db86c8 100644
--- a/src/arch/arm/armv7/cache.c
+++ b/src/arch/arm/armv7/cache.c
@@ -35,21 +35,6 @@
#include <arch/cache.h>
-#define bitmask(high, low) ((1UL << (high)) + \
- ((1UL << (high)) - 1) - ((1UL << (low)) - 1))
-
-/* Basic log2() implementation. Note: log2(0) is 0 for our purposes. */
-/* FIXME: src/include/lib.h is difficult to work with due to romcc */
-static unsigned long log2(unsigned long u)
-{
- int i = 0;
-
- while (u >>= 1)
- i++;
-
- return i;
-}
-
void tlb_invalidate_all(void)
{
/*
@@ -84,116 +69,6 @@ enum dcache_op {
OP_DCIMVAC,
};
-/*
- * Do a dcache operation on entire cache by set/way. This is done for
- * portability because mapping of memory address to cache location is
- * implementation defined (See note on "Requirements for operations by
- * set/way" in arch ref. manual).
- */
-static void dcache_op_set_way(enum dcache_op op)
-{
- uint32_t ccsidr;
- unsigned int associativity, num_sets, linesize_bytes;
- unsigned int set, way;
- unsigned int level;
-
- level = (read_csselr() >> 1) & 0x7;
-
- /*
- * dcache must be invalidated by set/way for portability since virtual
- * memory mapping is system-defined. The number of sets and
- * associativity is given by CCSIDR. We'll use DCISW to invalidate the
- * dcache.
- */
- ccsidr = read_ccsidr();
-
- /* FIXME: rounding up required here? */
- num_sets = ((ccsidr & bitmask(27, 13)) >> 13) + 1;
- associativity = ((ccsidr & bitmask(12, 3)) >> 3) + 1;
- /* FIXME: do we need to use CTR.DminLine here? */
- linesize_bytes = (1 << ((ccsidr & 0x7) + 2)) * 4;
-
- dsb();
-
- /*
- * Set/way operations require an interesting bit packing. See section
- * B4-35 in the ARMv7 Architecture Reference Manual:
- *
- * A: Log2(associativity)
- * B: L+S
- * L: Log2(linesize)
- * S: Log2(num_sets)
- *
- * The bits are packed as follows:
- * 31 31-A B B-1 L L-1 4 3 1 0
- * |---|-------------|--------|-------|-----|-|
- * |Way| zeros | Set | zeros |level|0|
- * |---|-------------|--------|-------|-----|-|
- */
- for (way = 0; way < associativity; way++) {
- for (set = 0; set < num_sets; set++) {
- uint32_t val = 0;
- val |= way << (32 - log2(associativity));
- val |= set << log2(linesize_bytes);
- val |= level << 1;
- switch(op) {
- case OP_DCCISW:
- dccisw(val);
- break;
- case OP_DCISW:
- dcisw(val);
- break;
- case OP_DCCSW:
- dccsw(val);
- break;
- default:
- break;
- }
- }
- }
- isb();
-}
-
-static void dcache_foreach(enum dcache_op op)
-{
- uint32_t clidr;
- int level;
-
- clidr = read_clidr();
- for (level = 0; level < 7; level++) {
- unsigned int ctype = (clidr >> (level * 3)) & 0x7;
- uint32_t csselr;
-
- switch(ctype) {
- case 0x2:
- case 0x3:
- case 0x4:
- csselr = level << 1;
- write_csselr(csselr);
- dcache_op_set_way(op);
- break;
- default:
- /* no cache, icache only, or reserved */
- break;
- }
- }
-}
-
-void dcache_clean_all(void)
-{
- dcache_foreach(OP_DCCSW);
-}
-
-void dcache_clean_invalidate_all(void)
-{
- dcache_foreach(OP_DCCISW);
-}
-
-void dcache_invalidate_all(void)
-{
- dcache_foreach(OP_DCISW);
-}
-
unsigned int dcache_line_bytes(void)
{
uint32_t ccsidr;
diff --git a/src/arch/arm/armv7/cpu.S b/src/arch/arm/armv7/cpu.S
index 42e2354..29a19e7 100644
--- a/src/arch/arm/armv7/cpu.S
+++ b/src/arch/arm/armv7/cpu.S
@@ -30,25 +30,39 @@
* SUCH DAMAGE.
*/
+#include <arch/asm.h>
+
/*
- * These work very hard to not push registers onto the stack and to limit themselves
- * to use r0-r3 and ip.
+ * Dcache invalidations by set/way work by passing a [way:sbz:set:sbz:level:0]
+ * bitfield in a register to the appropriate MCR instruction. This algorithm
+ * works by initializing a bitfield with the highest-numbered set and way, and
+ * generating a "set decrement" and a "way decrement". The former just contains
+ * the LSB of the set field, but the latter contains the LSB of the way field
+ * minus the highest valid set field... such that when you subtract it from a
+ * [way:0:level] field you end up with a [way - 1:highest_set:level] field
+ * through the magic of double subtraction. It's quite ingenius, really.
+ * Takes care to only use r0-r3 and ip so it's pefectly ABI-compatible without
+ * needing to write to memory.
*/
-/* * LINTSTUB: void armv7_dcache_wbinv_all(void); */
-ENTRY_NP(armv7_dcache_wbinv_all)
+.macro dcache_apply_all crm
+ dsb
+ mov r3, #-2 @ initialize level so that we start at 0
+
+1: @next_level
+ add r3, r3, #2 @ increment level
+
mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
- ands r3, r0, #0x07000000
- beq .Ldone_wbinv
- lsr r3, r3, #23 @ left align loc (low 4 bits)
+ and ip, r0, #0x07000000 @ narrow to LoC
+ lsr ip, ip, #23 @ left align LoC (low 4 bits)
+ cmp r3, ip @ compare
+ bge 3f @done @ else fall through (r0 == CLIDR)
- mov r1, #0
-.Lstart_wbinv:
- add r2, r3, r3, lsr #1 @ r2 = level * 3 / 2
+ add r2, r3, r3, lsr #1 @ r2 = (level << 1) * 3 / 2
mov r1, r0, lsr r2 @ r1 = cache type
bfc r1, #3, #28
cmp r1, #2 @ is it data or i&d?
- blt .Lnext_level_wbinv @ nope, skip level
+ blt 1b @next_level @ nope, skip level
mcr p15, 2, r3, c0, c0, 0 @ select cache level
isb
@@ -65,7 +79,7 @@ ENTRY_NP(armv7_dcache_wbinv_all)
ubfx ip, r0, #3, #10 @ get numways - 1 from [to be discarded] CCSIDR
clz r2, ip @ number of bits to MSB of way
lsl ip, ip, r2 @ shift by that into way position
- mov r0, #1 @
+ mov r0, #1
lsl r2, r0, r2 @ r2 now contains the way decr
mov r0, r3 @ get sets/level (no way yet)
orr r3, r3, ip @ merge way into way/set/level
@@ -73,27 +87,31 @@ ENTRY_NP(armv7_dcache_wbinv_all)
sub r2, r2, r0 @ subtract from way decr
/* r3 = ways/sets/level, r2 = way decr, r1 = set decr, r0 and ip are free */
-1: mcr p15, 0, r3, c7, c14, 2 @ writeback and invalidate line
+2: mcr p15, 0, r3, c7, \crm, 2 @ writeback and/or invalidate line
cmp r3, #15 @ are we done with this level (way/set == 0)
- bls .Lnext_level_wbinv @ yes, go to next level
- lsl r0, r3, #10 @ clear way bits leaving only set/level bits
- lsr r0, r0, #4 @ clear level bits leaving only set bits
+ bls 1b @next_level @ yes, go to next level
+ lsr r0, r3, #4 @ clear level bits leaving only way/set bits
+ lsls r0, r0, #14 @ clear way bits leaving only set bits
subne r3, r3, r1 @ non-zero?, decrement set #
subeq r3, r3, r2 @ zero?, decrement way # and restore set count
- b 1b
+ b 2b
-.Lnext_level_wbinv:
- mrc p15, 1, r0, c0, c0, 1 @ read CLIDR
- and ip, r0, #0x07000000 @ narrow to LoC
- lsr ip, ip, #23 @ left align LoC (low 4 bits)
- add r3, r3, #2 @ go to next level
- cmp r3, ip @ compare
- blt .Lstart_wbinv @ not done, next level (r0 == CLIDR)
-
-.Ldone_wbinv:
+3: @done
mov r0, #0 @ default back to cache level 0
mcr p15, 2, r0, c0, c0, 0 @ select cache level
dsb
isb
bx lr
-END(armv7_dcache_wbinv_all)
+.endm
+
+ENTRY(dcache_invalidate_all)
+ dcache_apply_all crm=c6
+ENDPROC(dcache_invalidate_all)
+
+ENTRY(dcache_clean_all)
+ dcache_apply_all crm=c10
+ENDPROC(dcache_clean_all)
+
+ENTRY(dcache_clean_invalidate_all)
+ dcache_apply_all crm=c14
+ENDPROC(dcache_clean_invalidate_all)